From: Dave Airlie Date: Thu, 9 Jun 2016 00:18:42 +0000 (+1000) Subject: tgsi/softpipe: prepare ARB_gpu_shader_int64 support. (v3) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=6b26039da3deb3950d9798150d5431cb942f5637;p=mesa.git tgsi/softpipe: prepare ARB_gpu_shader_int64 support. (v3) This adds all the opcodes to tgsi_exec for softpipe to use. v2: add conversion opcodes. v3: - no PIPE_CAP_INT64 yet - change TGSI_OPCODE_I2U64 to TGSI_OPCODE_U2I64 Reviewed-by: Roland Scheidegger Reviewed-by: Edward O'Callaghan Signed-off-by: Dave Airlie Signed-off-by: Nicolai Hähnle --- diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 37f3fc7bcae..7b5c56d9d02 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -694,11 +694,251 @@ micro_u2d(union tgsi_double_channel *dst, dst->d[3] = (double)src->u[3]; } +static void +micro_i64abs(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0]; + dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1]; + dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2]; + dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3]; +} + +static void +micro_i64sgn(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0; + dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0; + dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0; + dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0; +} + +static void +micro_i64neg(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = -src->i64[0]; + dst->i64[1] = -src->i64[1]; + dst->i64[2] = -src->i64[2]; + dst->i64[3] = -src->i64[3]; +} + +static void +micro_u64seq(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U; + dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U; + dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U; + dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U; +} + +static void +micro_u64sne(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U; + dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U; + dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U; + dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U; +} + +static void +micro_i64slt(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U; + dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U; + dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U; + dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U; +} + +static void +micro_u64slt(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U; + dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U; + dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U; + dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U; +} + +static void +micro_i64sge(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U; + dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U; + dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U; + dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U; +} + +static void +micro_u64sge(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U; + dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U; + dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U; + dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U; +} + +static void +micro_u64max(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0]; + dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1]; + dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2]; + dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3]; +} + +static void +micro_i64max(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0]; + dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1]; + dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2]; + dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3]; +} + +static void +micro_u64min(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0]; + dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1]; + dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2]; + dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3]; +} + +static void +micro_i64min(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0]; + dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1]; + dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2]; + dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3]; +} + +static void +micro_u64add(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u64[0] = src[0].u64[0] + src[1].u64[0]; + dst->u64[1] = src[0].u64[1] + src[1].u64[1]; + dst->u64[2] = src[0].u64[2] + src[1].u64[2]; + dst->u64[3] = src[0].u64[3] + src[1].u64[3]; +} + +static void +micro_u64mul(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u64[0] = src[0].u64[0] * src[1].u64[0]; + dst->u64[1] = src[0].u64[1] * src[1].u64[1]; + dst->u64[2] = src[0].u64[2] * src[1].u64[2]; + dst->u64[3] = src[0].u64[3] * src[1].u64[3]; +} + +static void +micro_u64div(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u64[0] = src[0].u64[0] / src[1].u64[0]; + dst->u64[1] = src[0].u64[1] / src[1].u64[1]; + dst->u64[2] = src[0].u64[2] / src[1].u64[2]; + dst->u64[3] = src[0].u64[3] / src[1].u64[3]; +} + +static void +micro_i64div(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = src[0].i64[0] / src[1].i64[0]; + dst->i64[1] = src[0].i64[1] / src[1].i64[1]; + dst->i64[2] = src[0].i64[2] / src[1].i64[2]; + dst->i64[3] = src[0].i64[3] / src[1].i64[3]; +} + +static void +micro_u64mod(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u64[0] = src[0].u64[0] % src[1].u64[0]; + dst->u64[1] = src[0].u64[1] % src[1].u64[1]; + dst->u64[2] = src[0].u64[2] % src[1].u64[2]; + dst->u64[3] = src[0].u64[3] % src[1].u64[3]; +} + +static void +micro_i64mod(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = src[0].i64[0] % src[1].i64[0]; + dst->i64[1] = src[0].i64[1] % src[1].i64[1]; + dst->i64[2] = src[0].i64[2] % src[1].i64[2]; + dst->i64[3] = src[0].i64[3] % src[1].i64[3]; +} + +static void +micro_u64shl(union tgsi_double_channel *dst, + const union tgsi_double_channel *src0, + union tgsi_exec_channel *src1) +{ + unsigned masked_count; + masked_count = src1->u[0] & 0x3f; + dst->u64[0] = src0->u64[0] << masked_count; + masked_count = src1->u[1] & 0x3f; + dst->u64[1] = src0->u64[1] << masked_count; + masked_count = src1->u[2] & 0x3f; + dst->u64[2] = src0->u64[2] << masked_count; + masked_count = src1->u[3] & 0x3f; + dst->u64[3] = src0->u64[3] << masked_count; +} + +static void +micro_i64shr(union tgsi_double_channel *dst, + const union tgsi_double_channel *src0, + union tgsi_exec_channel *src1) +{ + unsigned masked_count; + masked_count = src1->u[0] & 0x3f; + dst->i64[0] = src0->i64[0] >> masked_count; + masked_count = src1->u[1] & 0x3f; + dst->i64[1] = src0->i64[1] >> masked_count; + masked_count = src1->u[2] & 0x3f; + dst->i64[2] = src0->i64[2] >> masked_count; + masked_count = src1->u[3] & 0x3f; + dst->i64[3] = src0->i64[3] >> masked_count; +} + +static void +micro_u64shr(union tgsi_double_channel *dst, + const union tgsi_double_channel *src0, + union tgsi_exec_channel *src1) +{ + unsigned masked_count; + masked_count = src1->u[0] & 0x3f; + dst->u64[0] = src0->u64[0] >> masked_count; + masked_count = src1->u[1] & 0x3f; + dst->u64[1] = src0->u64[1] >> masked_count; + masked_count = src1->u[2] & 0x3f; + dst->u64[2] = src0->u64[2] >> masked_count; + masked_count = src1->u[3] & 0x3f; + dst->u64[3] = src0->u64[3] >> masked_count; +} + enum tgsi_exec_datatype { TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT, - TGSI_EXEC_DATA_DOUBLE + TGSI_EXEC_DATA_DOUBLE, + TGSI_EXEC_DATA_INT64, + TGSI_EXEC_DATA_UINT64, }; /* @@ -3398,6 +3638,16 @@ exec_endswitch(struct tgsi_exec_machine *mach) typedef void (* micro_dop)(union tgsi_double_channel *dst, const union tgsi_double_channel *src); +typedef void (* micro_dop_sop)(union tgsi_double_channel *dst, + const union tgsi_double_channel *src0, + union tgsi_exec_channel *src1); + +typedef void (* micro_dop_s)(union tgsi_double_channel *dst, + const union tgsi_exec_channel *src); + +typedef void (* micro_sop_d)(union tgsi_exec_channel *dst, + const union tgsi_double_channel *src); + static void fetch_double_channel(struct tgsi_exec_machine *mach, union tgsi_double_channel *chan, @@ -3549,131 +3799,6 @@ exec_double_trinary(struct tgsi_exec_machine *mach, } } -static void -exec_f2d(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - union tgsi_exec_channel src; - union tgsi_double_channel dst; - - if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { - fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - micro_f2d(&dst, &src); - store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); - } - if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { - fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - micro_f2d(&dst, &src); - store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); - } -} - -static void -exec_d2f(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - union tgsi_double_channel src; - union tgsi_exec_channel dst; - int wm = inst->Dst[0].Register.WriteMask; - int i; - int bit; - for (i = 0; i < 2; i++) { - bit = ffs(wm); - if (bit) { - wm &= ~(1 << (bit - 1)); - if (i == 0) - fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); - else - fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); - micro_d2f(&dst, &src); - store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, TGSI_EXEC_DATA_FLOAT); - } - } -} - -static void -exec_i2d(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - union tgsi_exec_channel src; - union tgsi_double_channel dst; - - if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { - fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); - micro_i2d(&dst, &src); - store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); - } - if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { - fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_INT); - micro_i2d(&dst, &src); - store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); - } -} - -static void -exec_d2i(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - union tgsi_double_channel src; - union tgsi_exec_channel dst; - int wm = inst->Dst[0].Register.WriteMask; - int i; - int bit; - for (i = 0; i < 2; i++) { - bit = ffs(wm); - if (bit) { - wm &= ~(1 << (bit - 1)); - if (i == 0) - fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); - else - fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); - micro_d2i(&dst, &src); - store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, TGSI_EXEC_DATA_INT); - } - } -} -static void -exec_u2d(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - union tgsi_exec_channel src; - union tgsi_double_channel dst; - - if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { - fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); - micro_u2d(&dst, &src); - store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); - } - if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { - fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_UINT); - micro_u2d(&dst, &src); - store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); - } -} - -static void -exec_d2u(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - union tgsi_double_channel src; - union tgsi_exec_channel dst; - int wm = inst->Dst[0].Register.WriteMask; - int i; - int bit; - for (i = 0; i < 2; i++) { - bit = ffs(wm); - if (bit) { - wm &= ~(1 << (bit - 1)); - if (i == 0) - fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); - else - fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); - micro_d2u(&dst, &src); - store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, TGSI_EXEC_DATA_UINT); - } - } -} - static void exec_dldexp(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) @@ -3721,6 +3846,32 @@ exec_dfracexp(struct tgsi_exec_machine *mach, } } +static void +exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_dop_sop op) +{ + union tgsi_double_channel src0; + union tgsi_exec_channel src1; + union tgsi_double_channel dst; + int wmask; + + wmask = inst->Dst[0].Register.WriteMask; + if (wmask & TGSI_WRITEMASK_XY) { + fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); + fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); + op(&dst, &src0, &src1); + store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); + } + + if (wmask & TGSI_WRITEMASK_ZW) { + fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); + fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT); + op(&dst, &src0, &src1); + store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); + } +} + static int get_image_coord_dim(unsigned tgsi_tex) { @@ -4303,6 +4454,152 @@ exec_resq(struct tgsi_exec_machine *mach, exec_resq_buf(mach, inst); } +static void +micro_f2u64(union tgsi_double_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u64[0] = (uint64_t)src->f[0]; + dst->u64[1] = (uint64_t)src->f[1]; + dst->u64[2] = (uint64_t)src->f[2]; + dst->u64[3] = (uint64_t)src->f[3]; +} + +static void +micro_f2i64(union tgsi_double_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i64[0] = (int64_t)src->f[0]; + dst->i64[1] = (int64_t)src->f[1]; + dst->i64[2] = (int64_t)src->f[2]; + dst->i64[3] = (int64_t)src->f[3]; +} + +static void +micro_u2i64(union tgsi_double_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u64[0] = (uint64_t)src->u[0]; + dst->u64[1] = (uint64_t)src->u[1]; + dst->u64[2] = (uint64_t)src->u[2]; + dst->u64[3] = (uint64_t)src->u[3]; +} + +static void +micro_i2i64(union tgsi_double_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i64[0] = (int64_t)src->i[0]; + dst->i64[1] = (int64_t)src->i[1]; + dst->i64[2] = (int64_t)src->i[2]; + dst->i64[3] = (int64_t)src->i[3]; +} + +static void +micro_d2u64(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->u64[0] = (uint64_t)src->d[0]; + dst->u64[1] = (uint64_t)src->d[1]; + dst->u64[2] = (uint64_t)src->d[2]; + dst->u64[3] = (uint64_t)src->d[3]; +} + +static void +micro_d2i64(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->i64[0] = (int64_t)src->d[0]; + dst->i64[1] = (int64_t)src->d[1]; + dst->i64[2] = (int64_t)src->d[2]; + dst->i64[3] = (int64_t)src->d[3]; +} + +static void +micro_u642d(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->d[0] = (double)src->u64[0]; + dst->d[1] = (double)src->u64[1]; + dst->d[2] = (double)src->u64[2]; + dst->d[3] = (double)src->u64[3]; +} + +static void +micro_i642d(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->d[0] = (double)src->i64[0]; + dst->d[1] = (double)src->i64[1]; + dst->d[2] = (double)src->i64[2]; + dst->d[3] = (double)src->i64[3]; +} + +static void +micro_u642f(union tgsi_exec_channel *dst, + const union tgsi_double_channel *src) +{ + dst->f[0] = (float)src->u64[0]; + dst->f[1] = (float)src->u64[1]; + dst->f[2] = (float)src->u64[2]; + dst->f[3] = (float)src->u64[3]; +} + +static void +micro_i642f(union tgsi_exec_channel *dst, + const union tgsi_double_channel *src) +{ + dst->f[0] = (float)src->i64[0]; + dst->f[1] = (float)src->i64[1]; + dst->f[2] = (float)src->i64[2]; + dst->f[3] = (float)src->i64[3]; +} + +static void +exec_t_2_64(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_dop_s op, + enum tgsi_exec_datatype src_datatype) +{ + union tgsi_exec_channel src; + union tgsi_double_channel dst; + + if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { + fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype); + op(&dst, &src); + store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); + } + if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { + fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, src_datatype); + op(&dst, &src); + store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); + } +} + +static void +exec_64_2_t(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_sop_d op, + enum tgsi_exec_datatype dst_datatype) +{ + union tgsi_double_channel src; + union tgsi_exec_channel dst; + int wm = inst->Dst[0].Register.WriteMask; + int i; + int bit; + for (i = 0; i < 2; i++) { + bit = ffs(wm); + if (bit) { + wm &= ~(1 << (bit - 1)); + if (i == 0) + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); + else + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); + op(&dst, &src); + store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, dst_datatype); + } + } +} + static void micro_i2f(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) @@ -5687,11 +5984,11 @@ exec_instruction( break; case TGSI_OPCODE_F2D: - exec_f2d(mach, inst); + exec_t_2_64(mach, inst, micro_f2d, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_D2F: - exec_d2f(mach, inst); + exec_64_2_t(mach, inst, micro_d2f, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DABS: @@ -5763,19 +6060,19 @@ exec_instruction( break; case TGSI_OPCODE_I2D: - exec_i2d(mach, inst); + exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_INT); break; case TGSI_OPCODE_D2I: - exec_d2i(mach, inst); + exec_64_2_t(mach, inst, micro_d2i, TGSI_EXEC_DATA_INT); break; case TGSI_OPCODE_U2D: - exec_u2d(mach, inst); + exec_t_2_64(mach, inst, micro_u2d, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_D2U: - exec_d2u(mach, inst); + exec_64_2_t(mach, inst, micro_d2u, TGSI_EXEC_DATA_INT); break; case TGSI_OPCODE_LOAD: @@ -5806,6 +6103,118 @@ exec_instruction( case TGSI_OPCODE_MEMBAR: return TRUE; break; + + case TGSI_OPCODE_I64ABS: + exec_double_unary(mach, inst, micro_i64abs); + break; + + case TGSI_OPCODE_I64SSG: + exec_double_unary(mach, inst, micro_i64sgn); + break; + + case TGSI_OPCODE_I64NEG: + exec_double_unary(mach, inst, micro_i64neg); + break; + + case TGSI_OPCODE_U64SEQ: + exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_U64SNE: + exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_I64SLT: + exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT); + break; + case TGSI_OPCODE_U64SLT: + exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_I64SGE: + exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT); + break; + case TGSI_OPCODE_U64SGE: + exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_I64MIN: + exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64); + break; + case TGSI_OPCODE_U64MIN: + exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64); + break; + case TGSI_OPCODE_I64MAX: + exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64); + break; + case TGSI_OPCODE_U64MAX: + exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64); + break; + case TGSI_OPCODE_U64ADD: + exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64); + break; + case TGSI_OPCODE_U64MUL: + exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64); + break; + case TGSI_OPCODE_U64SHL: + exec_arg0_64_arg1_32(mach, inst, micro_u64shl); + break; + case TGSI_OPCODE_I64SHR: + exec_arg0_64_arg1_32(mach, inst, micro_i64shr); + break; + case TGSI_OPCODE_U64SHR: + exec_arg0_64_arg1_32(mach, inst, micro_u64shr); + break; + case TGSI_OPCODE_U64DIV: + exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64); + break; + case TGSI_OPCODE_I64DIV: + exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64); + break; + case TGSI_OPCODE_U64MOD: + exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64); + break; + case TGSI_OPCODE_I64MOD: + exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64); + break; + + case TGSI_OPCODE_F2U64: + exec_t_2_64(mach, inst, micro_f2u64, TGSI_EXEC_DATA_FLOAT); + break; + + case TGSI_OPCODE_F2I64: + exec_t_2_64(mach, inst, micro_f2i64, TGSI_EXEC_DATA_FLOAT); + break; + + case TGSI_OPCODE_U2I64: + exec_t_2_64(mach, inst, micro_u2i64, TGSI_EXEC_DATA_INT); + break; + case TGSI_OPCODE_I2I64: + exec_t_2_64(mach, inst, micro_i2i64, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_D2U64: + exec_double_unary(mach, inst, micro_d2u64); + break; + + case TGSI_OPCODE_D2I64: + exec_double_unary(mach, inst, micro_d2i64); + break; + + case TGSI_OPCODE_U642F: + exec_64_2_t(mach, inst, micro_u642f, TGSI_EXEC_DATA_FLOAT); + break; + case TGSI_OPCODE_I642F: + exec_64_2_t(mach, inst, micro_i642f, TGSI_EXEC_DATA_FLOAT); + break; + + case TGSI_OPCODE_U642D: + exec_double_unary(mach, inst, micro_u642d); + break; + case TGSI_OPCODE_I642D: + exec_double_unary(mach, inst, micro_i642d); + break; + default: assert( 0 ); }