From 6905698fc21710c18722295dedceb96ef5d5923b Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 8 Oct 2013 15:11:02 -0400 Subject: [PATCH] gallium: Add support for 32x32 muls with 64 bit results MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The code introduces two new 32bit integer multiplication opcodes which can be used to produce correct 64 bit results. GLSL, OpenCL and D3D10+ require them. We use two seperate opcodes, because they match the behavior of GLSL and OpenCL, are a lot easier to add than a single opcode with multiple destinations and because there's not much (any) difference wrt code-generation. Signed-off-by: Zack Rusin Reviewed-by: José Fonseca Reviewed-by: Roland Scheidegger Reviewed-by: Brian Paul --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 34 +++++++++++++++++++ src/gallium/auxiliary/tgsi/tgsi_info.c | 6 ++++ src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h | 3 ++ src/gallium/auxiliary/tgsi/tgsi_util.c | 2 ++ src/gallium/docs/source/tgsi.rst | 30 ++++++++++++++++ src/gallium/include/pipe/p_shader_tokens.h | 5 ++- .../tests/graw/vertex-shader/vert-imul_hi.sh | 13 +++++++ .../tests/graw/vertex-shader/vert-umul_hi.sh | 11 ++++++ 8 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 src/gallium/tests/graw/vertex-shader/vert-imul_hi.sh create mode 100644 src/gallium/tests/graw/vertex-shader/vert-umul_hi.sh diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 0750a502f16..6db1238a60d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -3477,6 +3477,32 @@ micro_umul(union tgsi_exec_channel *dst, dst->u[3] = src0->u[3] * src1->u[3]; } +static void +micro_imul_hi(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ +#define I64M(x, y) ((((int64_t)x) * ((int64_t)y)) >> 32) + dst->i[0] = I64M(src0->i[0], src1->i[0]); + dst->i[1] = I64M(src0->i[1], src1->i[1]); + dst->i[2] = I64M(src0->i[2], src1->i[2]); + dst->i[3] = I64M(src0->i[3], src1->i[3]); +#undef I64M +} + +static void +micro_umul_hi(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ +#define U64M(x, y) ((((uint64_t)x) * ((uint64_t)y)) >> 32) + dst->u[0] = U64M(src0->u[0], src1->u[0]); + dst->u[1] = U64M(src0->u[1], src1->u[1]); + dst->u[2] = U64M(src0->u[2], src1->u[2]); + dst->u[3] = U64M(src0->u[3], src1->u[3]); +#undef U64M +} + static void micro_useq(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, @@ -4277,6 +4303,14 @@ exec_instruction( exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; + case TGSI_OPCODE_IMUL_HI: + exec_vector_binary(mach, inst, micro_imul_hi, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_UMUL_HI: + exec_vector_binary(mach, inst, micro_umul_hi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + case TGSI_OPCODE_USEQ: exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 7a5d18f59c5..0beef44454d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -219,6 +219,8 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 3, 1, 0, 0, 0, OTHR, "TEX2", TGSI_OPCODE_TEX2 }, { 1, 3, 1, 0, 0, 0, OTHR, "TXB2", TGSI_OPCODE_TXB2 }, { 1, 3, 1, 0, 0, 0, OTHR, "TXL2", TGSI_OPCODE_TXL2 }, + { 1, 2, 0, 0, 0, 0, COMP, "IMUL_HI", TGSI_OPCODE_IMUL_HI }, + { 1, 2, 0, 0, 0, 0, COMP, "UMUL_HI", TGSI_OPCODE_UMUL_HI }, }; const struct tgsi_opcode_info * @@ -297,6 +299,7 @@ tgsi_opcode_infer_type( uint opcode ) case TGSI_OPCODE_USLT: case TGSI_OPCODE_USNE: case TGSI_OPCODE_SVIEWINFO: + case TGSI_OPCODE_UMUL_HI: return TGSI_TYPE_UNSIGNED; case TGSI_OPCODE_ARL: case TGSI_OPCODE_ARR: @@ -317,6 +320,7 @@ tgsi_opcode_infer_type( uint opcode ) case TGSI_OPCODE_UARL: case TGSI_OPCODE_IABS: case TGSI_OPCODE_ISSG: + case TGSI_OPCODE_IMUL_HI: return TGSI_TYPE_SIGNED; default: return TGSI_TYPE_FLOAT; @@ -339,7 +343,9 @@ tgsi_opcode_infer_src_type( uint opcode ) case TGSI_OPCODE_CASE: case TGSI_OPCODE_SAMPLE_I: case TGSI_OPCODE_SAMPLE_I_MS: + case TGSI_OPCODE_UMUL_HI: return TGSI_TYPE_UNSIGNED; + case TGSI_OPCODE_IMUL_HI: case TGSI_OPCODE_I2F: return TGSI_TYPE_SIGNED; case TGSI_OPCODE_ARL: diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index b8144a8916b..1ef78ddcc8f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -204,6 +204,9 @@ OP12(SAMPLE_INFO) OP13(UCMP) +OP12(IMUL_HI) +OP12(UMUL_HI) + #undef OP00 #undef OP01 #undef OP10 diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index b3bc8f283a4..73a0667fd06 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -243,6 +243,8 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, case TGSI_OPCODE_USHR: case TGSI_OPCODE_USLT: case TGSI_OPCODE_USNE: + case TGSI_OPCODE_IMUL_HI: + case TGSI_OPCODE_UMUL_HI: /* Channel-wise operations */ read_mask = write_mask; break; diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index 41f2798d704..f80c08d3175 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -1103,6 +1103,36 @@ Support for these opcodes indicated by PIPE_SHADER_CAP_INTEGERS (all of them?) dst.w = src0.w \times src1.w +.. opcode:: IMUL_HI - Signed Integer Multiply High Bits + + The high 32bits of the multiplication of 2 signed integers are returned. + +.. math:: + + dst.x = (src0.x \times src1.x) >> 32 + + dst.y = (src0.y \times src1.y) >> 32 + + dst.z = (src0.z \times src1.z) >> 32 + + dst.w = (src0.w \times src1.w) >> 32 + + +.. opcode:: UMUL_HI - Unsigned Integer Multiply High Bits + + The high 32bits of the multiplication of 2 unsigned integers are returned. + +.. math:: + + dst.x = (src0.x \times src1.x) >> 32 + + dst.y = (src0.y \times src1.y) >> 32 + + dst.z = (src0.z \times src1.z) >> 32 + + dst.w = (src0.w \times src1.w) >> 32 + + .. opcode:: IDIV - Signed Integer Division TBD: behavior for division by zero. diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 1beec054c1b..801090204fa 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -450,7 +450,10 @@ struct tgsi_property_data { #define TGSI_OPCODE_TXB2 178 #define TGSI_OPCODE_TXL2 179 -#define TGSI_OPCODE_LAST 180 +#define TGSI_OPCODE_IMUL_HI 180 +#define TGSI_OPCODE_UMUL_HI 181 + +#define TGSI_OPCODE_LAST 182 #define TGSI_SAT_NONE 0 /* do not saturate */ #define TGSI_SAT_ZERO_ONE 1 /* clamp to [0,1] */ diff --git a/src/gallium/tests/graw/vertex-shader/vert-imul_hi.sh b/src/gallium/tests/graw/vertex-shader/vert-imul_hi.sh new file mode 100644 index 00000000000..60e2d80dd7f --- /dev/null +++ b/src/gallium/tests/graw/vertex-shader/vert-imul_hi.sh @@ -0,0 +1,13 @@ +VERT +DCL IN[0] +DCL IN[1] +DCL OUT[0], POSITION +DCL OUT[1], COLOR +DCL TEMP[0] +DCL TEMP[1] +IMM[0] INT32 {-2147483648, 2, 0, -1} +MOV OUT[0], IN[0] +IMUL_HI TEMP[0], IMM[0].xzzx, IMM[0].yzzy +UMUL TEMP[0], TEMP[0], IMM[0].wwww +I2F OUT[1], TEMP[0] +END diff --git a/src/gallium/tests/graw/vertex-shader/vert-umul_hi.sh b/src/gallium/tests/graw/vertex-shader/vert-umul_hi.sh new file mode 100644 index 00000000000..4aa79fe8647 --- /dev/null +++ b/src/gallium/tests/graw/vertex-shader/vert-umul_hi.sh @@ -0,0 +1,11 @@ +VERT +DCL IN[0] +DCL IN[1] +DCL OUT[0], POSITION +DCL OUT[1], COLOR +DCL TEMP[0] +IMM[0] INT32 {4, 1073741824, 0, 1} +MOV OUT[0], IN[0] +UMUL_HI TEMP[0], IMM[0].xzzx, IMM[0].yzzy +I2F OUT[1], TEMP[0] +END -- 2.30.2