From: Ben Skeggs Date: Sat, 6 Jun 2020 23:51:45 +0000 (+1000) Subject: nvir: introduce OP_LOP3_LUT X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=4b9b7e4dd33b06f696534327a3878de89659f2a6;p=mesa.git nvir: introduce OP_LOP3_LUT Will be required to support SM70, but is also available on earlier GPUs. v2: - add convenience macro suggested by Karol Signed-off-by: Ben Skeggs Reviewed-by: Karol Herbst Part-of: --- diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index 2718975b98a..a4ade030b00 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -67,6 +67,7 @@ enum operation OP_AND, OP_OR, OP_XOR, + OP_LOP3_LUT, OP_SHL, OP_SHR, OP_MAX, @@ -254,6 +255,15 @@ enum operation #define NV50_IR_SUBOP_VOTE_ALL 0 #define NV50_IR_SUBOP_VOTE_ANY 1 #define NV50_IR_SUBOP_VOTE_UNI 2 +#define NV50_IR_SUBOP_LOP3_LUT_SRC0 0xf0 +#define NV50_IR_SUBOP_LOP3_LUT_SRC1 0xcc +#define NV50_IR_SUBOP_LOP3_LUT_SRC2 0xaa +#define NV50_IR_SUBOP_LOP3_LUT(exp) ({ \ + uint8_t a = NV50_IR_SUBOP_LOP3_LUT_SRC0; \ + uint8_t b = NV50_IR_SUBOP_LOP3_LUT_SRC1; \ + uint8_t c = NV50_IR_SUBOP_LOP3_LUT_SRC2; \ + (uint8_t)(exp); \ +}) #define NV50_IR_SUBOP_MINMAX_LOW 1 #define NV50_IR_SUBOP_MINMAX_MED 2 diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index e24e74a6463..5c3d15968cf 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -780,6 +780,14 @@ ConstantFolding::expr(Instruction *i, memset(&res.data, 0, sizeof(res.data)); switch (i->op) { + case OP_LOP3_LUT: + for (int n = 0; n < 32; n++) { + uint8_t lut = ((a->data.u32 >> n) & 1) << 2 | + ((b->data.u32 >> n) & 1) << 1 | + ((c->data.u32 >> n) & 1); + res.data.u32 |= !!(i->subOp & (1 << lut)) << n; + } + break; case OP_INSBF: { int offset = b->data.u32 & 0xff; int width = (b->data.u32 >> 8) & 0xff; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index 5dcbf3c3e0c..74738221080 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -93,6 +93,7 @@ const char *operationStr[OP_LAST + 1] = "and", "or", "xor", + "lop3 lut", "shl", "shr", "max", diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index 5c6d0570ae2..92ac7101cfc 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -33,7 +33,7 @@ const uint8_t Target::operationSrcNr[] = 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD 3, 3, // SHLADD, XMAD 1, 1, 1, // ABS, NEG, NOT - 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR + 2, 2, 2, 3, 2, 2, // AND, OR, XOR, LOP3_LUT, SHL, SHR 2, 2, 1, // MAX, MIN, SAT 1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT 3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT @@ -75,9 +75,9 @@ const OpClass Target::operationClass[] = OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, - // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR + // ABS, NEG; NOT, AND, OR, XOR, LOP3_LUT; SHL, SHR OPCLASS_CONVERT, OPCLASS_CONVERT, - OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, + OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_SHIFT, OPCLASS_SHIFT, // MAX, MIN OPCLASS_COMPARE, OPCLASS_COMPARE,