From: Ben Skeggs Date: Sat, 6 Jun 2020 23:51:53 +0000 (+1000) Subject: nvir: introduce OP_SHF X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e1e4d1d373aa3090ed3cd186fe3158cee38d1e31;p=mesa.git nvir: introduce OP_SHF We already use a hack from NVC0LegalizeSSA::handleShift() on GK110 and newer which encodes SHF into the existing SHL/SHR opcodes, but there's a couple of problems with it: - LO/HI are swapped in one of the directions, which is very confusing. - The initial SM70 code will emit this from NIR->NVIR, and using the existing encodings will confuse the optimisation passes. As I want to limit the impact on other GPUs from the initial bring-up of Volta/Turing, let's add an explicit representation of SHF in the IR. Signed-off-by: Ben Skeggs Reviewed-by: Karol Herbst Part-of: --- diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index e4bbc8edafb..b03cab82a03 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -70,6 +70,7 @@ enum operation OP_LOP3_LUT, OP_SHL, OP_SHR, + OP_SHF, OP_MAX, OP_MIN, OP_SAT, // CLAMP(f32, 0.0, 1.0) @@ -271,6 +272,13 @@ enum operation #define NV50_IR_SUBOP_MINMAX_MED 2 #define NV50_IR_SUBOP_MINMAX_HIGH 3 +#define NV50_IR_SUBOP_SHF_L (0 << 0) +#define NV50_IR_SUBOP_SHF_R (1 << 0) +#define NV50_IR_SUBOP_SHF_LO (0 << 1) +#define NV50_IR_SUBOP_SHF_HI (1 << 1) +#define NV50_IR_SUBOP_SHF_C (0 << 2) +#define NV50_IR_SUBOP_SHF_W (1 << 2) + // xmad(src0, src1, 0) << 16 + src2 #define NV50_IR_SUBOP_XMAD_PSL (1 << 0) // (xmad(src0, src1, src2) & 0xffff) | (src1 << 16) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index e3bac648761..c6550ca187f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -96,6 +96,7 @@ const char *operationStr[OP_LAST + 1] = "lop3 lut", "shl", "shr", + "shf", "max", "min", "sat", diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index 77edf41e124..888dc7fb876 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -33,7 +33,7 @@ const uint8_t Target::operationSrcNr[] = 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD 3, 3, // SHLADD, XMAD 1, 1, 1, // ABS, NEG, NOT - 2, 2, 2, 3, 2, 2, // AND, OR, XOR, LOP3_LUT, SHL, SHR + 2, 2, 2, 3, 2, 2, 3, // AND, OR, XOR, LOP3_LUT, SHL, SHR, SHF 2, 2, 1, // MAX, MIN, SAT 1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT 3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT @@ -76,10 +76,10 @@ const OpClass Target::operationClass[] = OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, - // ABS, NEG; NOT, AND, OR, XOR, LOP3_LUT; SHL, SHR + // ABS, NEG; NOT, AND, OR, XOR, LOP3_LUT; SHL, SHR, SHF OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, - OPCLASS_SHIFT, OPCLASS_SHIFT, + OPCLASS_SHIFT, OPCLASS_SHIFT, OPCLASS_SHIFT, // MAX, MIN OPCLASS_COMPARE, OPCLASS_COMPARE, // SAT, CEIL, FLOOR, TRUNC; CVT