X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Ftgsi%2Ftgsi_exec.c;h=41dd0f0466a28285e78321004aefe9f0a6b6b26e;hb=b89708f95fafc458cc79bc210407b723a0f0f78c;hp=a8fc0089f5ab0c084e88c4068e2954af4fed31da;hpb=3cd1338534374c8ed13651548dcbf3949857dbcd;p=mesa.git diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index a8fc0089f5a..41dd0f0466a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -58,6 +58,7 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi_exec.h" +#include "util/u_half.h" #include "util/u_memory.h" #include "util/u_math.h" @@ -735,7 +736,7 @@ static const union tgsi_exec_channel M128Vec = { * not lead to crashes, etc. But when debugging, it's helpful to catch * them. */ -static INLINE void +static inline void check_inf_or_nan(const union tgsi_exec_channel *chan) { assert(!util_is_inf_or_nan((chan)->f[0])); @@ -852,7 +853,9 @@ void tgsi_exec_machine_bind_shader( struct tgsi_exec_machine *mach, const struct tgsi_token *tokens, - struct tgsi_sampler *sampler) + struct tgsi_sampler *sampler, + struct tgsi_image *image, + struct tgsi_buffer *buffer) { uint k; struct tgsi_parse_context parse; @@ -870,6 +873,8 @@ tgsi_exec_machine_bind_shader( mach->Tokens = tokens; mach->Sampler = sampler; + mach->Image = image; + mach->Buffer = buffer; if (!tokens) { /* unbind and free all */ @@ -1515,7 +1520,7 @@ store_dest_dstret(struct tgsi_exec_machine *mach, enum tgsi_exec_datatype dst_datatype) { uint i; - union tgsi_exec_channel null; + static union tgsi_exec_channel null; union tgsi_exec_channel *dst; union tgsi_exec_channel index2D; uint execmask = mach->ExecMask; @@ -1765,14 +1770,12 @@ store_dest(struct tgsi_exec_machine *mach, if (!dst) return; - switch (inst->Instruction.Saturate) { - case TGSI_SAT_NONE: + if (!inst->Instruction.Saturate) { for (i = 0; i < TGSI_QUAD_SIZE; i++) if (execmask & (1 << i)) dst->i[i] = chan->i[i]; - break; - - case TGSI_SAT_ZERO_ONE: + } + else { for (i = 0; i < TGSI_QUAD_SIZE; i++) if (execmask & (1 << i)) { if (chan->f[i] < 0.0f) @@ -1782,22 +1785,6 @@ store_dest(struct tgsi_exec_machine *mach, else dst->i[i] = chan->i[i]; } - break; - - case TGSI_SAT_MINUS_PLUS_ONE: - for (i = 0; i < TGSI_QUAD_SIZE; i++) - if (execmask & (1 << i)) { - if (chan->f[i] < -1.0f) - dst->f[i] = -1.0f; - else if (chan->f[i] > 1.0f) - dst->f[i] = 1.0f; - else - dst->i[i] = chan->i[i]; - } - break; - - default: - assert( 0 ); } } @@ -1952,7 +1939,7 @@ fetch_texel( struct tgsi_sampler *sampler, #define TEX_MODIFIER_LOD_BIAS 2 #define TEX_MODIFIER_EXPLICIT_LOD 3 #define TEX_MODIFIER_LEVEL_ZERO 4 - +#define TEX_MODIFIER_GATHER 5 /* * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array. @@ -2006,11 +1993,46 @@ fetch_assign_deriv_channel(struct tgsi_exec_machine *mach, derivs[1][3] = d.f[3]; } +static uint +fetch_sampler_unit(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + uint sampler) +{ + uint unit = 0; + int i; + if (inst->Src[sampler].Register.Indirect) { + const struct tgsi_full_src_register *reg = &inst->Src[sampler]; + union tgsi_exec_channel indir_index, index2; + const uint execmask = mach->ExecMask; + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->Indirect.Index; + + fetch_src_file_channel(mach, + 0, + reg->Indirect.File, + reg->Indirect.Swizzle, + &index2, + &ZeroVec, + &indir_index); + for (i = 0; i < TGSI_QUAD_SIZE; i++) { + if (execmask & (1 << i)) { + unit = inst->Src[sampler].Register.Index + indir_index.i[i]; + break; + } + } + + } else { + unit = inst->Src[sampler].Register.Index; + } + return unit; +} /* * execute a texture instruction. * - * modifier is used to control the channel routing for the\ + * modifier is used to control the channel routing for the * instruction variants like proj, lod, and texture with lod bias. * sampler indicates which src register the sampler is contained in. */ @@ -2019,21 +2041,23 @@ exec_tex(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst, uint modifier, uint sampler) { - const uint unit = inst->Src[sampler].Register.Index; const union tgsi_exec_channel *args[5], *proj = NULL; union tgsi_exec_channel r[5]; - enum tgsi_sampler_control control = tgsi_sampler_lod_none; + enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE; uint chan; + uint unit; int8_t offsets[3]; int dim, shadow_ref, i; + unit = fetch_sampler_unit(mach, inst, sampler); /* always fetch all 3 offsets, overkill but keeps code simple */ fetch_texel_offsets(mach, inst, offsets); assert(modifier != TEX_MODIFIER_LEVEL_ZERO); assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER); - dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, &shadow_ref); + dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); + shadow_ref = tgsi_util_get_shadow_ref_src_index(inst->Texture.Texture); assert(dim <= 4); if (shadow_ref >= 0) @@ -2066,9 +2090,11 @@ exec_tex(struct tgsi_exec_machine *mach, args[i] = &ZeroVec; if (modifier == TEX_MODIFIER_EXPLICIT_LOD) - control = tgsi_sampler_lod_explicit; + control = TGSI_SAMPLER_LOD_EXPLICIT; else if (modifier == TEX_MODIFIER_LOD_BIAS) - control = tgsi_sampler_lod_bias; + control = TGSI_SAMPLER_LOD_BIAS; + else if (modifier == TEX_MODIFIER_GATHER) + control = TGSI_SAMPLER_GATHER; } else { for (i = dim; i < Elements(args); i++) @@ -2118,17 +2144,58 @@ exec_tex(struct tgsi_exec_machine *mach, } } +static void +exec_lodq(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint unit; + int dim; + int i; + union tgsi_exec_channel coords[4]; + const union tgsi_exec_channel *args[Elements(coords)]; + union tgsi_exec_channel r[2]; + + unit = fetch_sampler_unit(mach, inst, 1); + dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); + assert(dim <= Elements(coords)); + /* fetch coordinates */ + for (i = 0; i < dim; i++) { + FETCH(&coords[i], 0, TGSI_CHAN_X + i); + args[i] = &coords[i]; + } + for (i = dim; i < Elements(coords); i++) { + args[i] = &ZeroVec; + } + mach->Sampler->query_lod(mach->Sampler, unit, unit, + args[0]->f, + args[1]->f, + args[2]->f, + args[3]->f, + TGSI_SAMPLER_LOD_NONE, + r[0].f, + r[1].f); + + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, + TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y, + TGSI_EXEC_DATA_FLOAT); + } +} static void exec_txd(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { - const uint unit = inst->Src[3].Register.Index; union tgsi_exec_channel r[4]; float derivs[3][2][TGSI_QUAD_SIZE]; uint chan; + uint unit; int8_t offsets[3]; + unit = fetch_sampler_unit(mach, inst, 3); /* always fetch all 3 offsets, overkill but keeps code simple */ fetch_texel_offsets(mach, inst, offsets); @@ -2140,7 +2207,7 @@ exec_txd(struct tgsi_exec_machine *mach, fetch_texel(mach->Sampler, unit, unit, &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ - derivs, offsets, tgsi_sampler_derivs_explicit, + derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; @@ -2156,7 +2223,7 @@ exec_txd(struct tgsi_exec_machine *mach, fetch_texel(mach->Sampler, unit, unit, &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ - derivs, offsets, tgsi_sampler_derivs_explicit, + derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; @@ -2170,7 +2237,7 @@ exec_txd(struct tgsi_exec_machine *mach, fetch_texel(mach->Sampler, unit, unit, &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ - derivs, offsets, tgsi_sampler_derivs_explicit, + derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; @@ -2190,7 +2257,7 @@ exec_txd(struct tgsi_exec_machine *mach, fetch_texel(mach->Sampler, unit, unit, &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ - derivs, offsets, tgsi_sampler_derivs_explicit, + derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; @@ -2210,7 +2277,7 @@ exec_txd(struct tgsi_exec_machine *mach, fetch_texel(mach->Sampler, unit, unit, &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ - derivs, offsets, tgsi_sampler_derivs_explicit, + derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; @@ -2230,20 +2297,22 @@ static void exec_txf(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { - const uint unit = inst->Src[1].Register.Index; union tgsi_exec_channel r[4]; uint chan; + uint unit; float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; int j; int8_t offsets[3]; unsigned target; + unit = fetch_sampler_unit(mach, inst, 1); /* always fetch all 3 offsets, overkill but keeps code simple */ fetch_texel_offsets(mach, inst, offsets); IFETCH(&r[3], 0, TGSI_CHAN_W); - if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I) { + if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I || + inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) { target = mach->SamplerViews[unit].Resource; } else { @@ -2285,7 +2354,8 @@ exec_txf(struct tgsi_exec_machine *mach, r[3].f[j] = rgba[3][j]; } - if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I) { + if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I || + inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) { unsigned char swizzles[4]; swizzles[0] = inst->Src[1].Register.SwizzleX; swizzles[1] = inst->Src[1].Register.SwizzleY; @@ -2312,12 +2382,14 @@ static void exec_txq(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { - const uint unit = inst->Src[1].Register.Index; int result[4]; union tgsi_exec_channel r[4], src; uint chan; + uint unit; int i,j; + unit = fetch_sampler_unit(mach, inst, 1); + fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); /* XXX: This interface can't return per-pixel values */ @@ -2344,9 +2416,9 @@ exec_sample(struct tgsi_exec_machine *mach, { const uint resource_unit = inst->Src[1].Register.Index; const uint sampler_unit = inst->Src[2].Register.Index; - union tgsi_exec_channel r[4], c1; + union tgsi_exec_channel r[5], c1; const union tgsi_exec_channel *lod = &ZeroVec; - enum tgsi_sampler_control control = tgsi_sampler_lod_none; + enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE; uint chan; unsigned char swizzles[4]; int8_t offsets[3]; @@ -2360,16 +2432,16 @@ exec_sample(struct tgsi_exec_machine *mach, if (modifier == TEX_MODIFIER_LOD_BIAS) { FETCH(&c1, 3, TGSI_CHAN_X); lod = &c1; - control = tgsi_sampler_lod_bias; + control = TGSI_SAMPLER_LOD_BIAS; } else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { FETCH(&c1, 3, TGSI_CHAN_X); lod = &c1; - control = tgsi_sampler_lod_explicit; + control = TGSI_SAMPLER_LOD_EXPLICIT; } else { assert(modifier == TEX_MODIFIER_LEVEL_ZERO); - control = tgsi_sampler_lod_zero; + control = TGSI_SAMPLER_LOD_ZERO; } } @@ -2495,7 +2567,7 @@ exec_sample_d(struct tgsi_exec_machine *mach, fetch_texel(mach->Sampler, resource_unit, sampler_unit, &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ - derivs, offsets, tgsi_sampler_derivs_explicit, + derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; @@ -2511,7 +2583,7 @@ exec_sample_d(struct tgsi_exec_machine *mach, fetch_texel(mach->Sampler, resource_unit, sampler_unit, &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* inputs */ - derivs, offsets, tgsi_sampler_derivs_explicit, + derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; @@ -2529,7 +2601,7 @@ exec_sample_d(struct tgsi_exec_machine *mach, fetch_texel(mach->Sampler, resource_unit, sampler_unit, &r[0], &r[1], &r[2], &r[3], &ZeroVec, - derivs, offsets, tgsi_sampler_derivs_explicit, + derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, &r[0], &r[1], &r[2], &r[3]); break; @@ -2999,6 +3071,45 @@ exec_dp2(struct tgsi_exec_machine *mach, } } +static void +exec_pk2h(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned chan; + union tgsi_exec_channel arg[2], dst; + + fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); + for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { + dst.u[chan] = util_float_to_half(arg[0].f[chan]) | + (util_float_to_half(arg[1].f[chan]) << 16); + } + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_UINT); + } + } +} + +static void +exec_up2h(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned chan; + union tgsi_exec_channel arg, dst[2]; + + fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); + for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { + dst[0].f[chan] = util_half_to_float(arg.u[chan] & 0xffff); + dst[1].f[chan] = util_half_to_float(arg.u[chan] >> 16); + } + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + static void exec_scs(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) @@ -3315,16 +3426,14 @@ store_double_channel(struct tgsi_exec_machine *mach, union tgsi_double_channel temp; const uint execmask = mach->ExecMask; - switch (inst->Instruction.Saturate) { - case TGSI_SAT_NONE: + if (!inst->Instruction.Saturate) { for (i = 0; i < TGSI_QUAD_SIZE; i++) if (execmask & (1 << i)) { dst[0].u[i] = chan->u[i][0]; dst[1].u[i] = chan->u[i][1]; } - break; - - case TGSI_SAT_ZERO_ONE: + } + else { for (i = 0; i < TGSI_QUAD_SIZE; i++) if (execmask & (1 << i)) { if (chan->d[i] < 0.0) @@ -3337,25 +3446,6 @@ store_double_channel(struct tgsi_exec_machine *mach, dst[0].u[i] = temp.u[i][0]; dst[1].u[i] = temp.u[i][1]; } - break; - - case TGSI_SAT_MINUS_PLUS_ONE: - for (i = 0; i < TGSI_QUAD_SIZE; i++) - if (execmask & (1 << i)) { - if (chan->d[i] < -1.0) - temp.d[i] = -1.0; - else if (chan->d[i] > 1.0) - temp.d[i] = 1.0; - else - temp.d[i] = chan->d[i]; - - dst[0].u[i] = temp.u[i][0]; - dst[1].u[i] = temp.u[i][1]; - } - break; - - default: - assert( 0 ); } store_dest_double(mach, &dst[0], reg, inst, chan_0, TGSI_EXEC_DATA_UINT); @@ -3621,6 +3711,444 @@ exec_dfracexp(struct tgsi_exec_machine *mach, } } +static int +get_image_coord_dim(unsigned tgsi_tex) +{ + int dim; + switch (tgsi_tex) { + case TGSI_TEXTURE_BUFFER: + case TGSI_TEXTURE_1D: + dim = 1; + break; + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_1D_ARRAY: + case TGSI_TEXTURE_2D_MSAA: + dim = 2; + break; + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + case TGSI_TEXTURE_2D_ARRAY: + case TGSI_TEXTURE_2D_ARRAY_MSAA: + case TGSI_TEXTURE_CUBE_ARRAY: + dim = 3; + break; + default: + assert(!"unknown texture target"); + dim = 0; + break; + } + + return dim; +} + +static int +get_image_coord_sample(unsigned tgsi_tex) +{ + int sample = 0; + switch (tgsi_tex) { + case TGSI_TEXTURE_2D_MSAA: + sample = 3; + break; + case TGSI_TEXTURE_2D_ARRAY_MSAA: + sample = 4; + break; + default: + break; + } + return sample; +} + +static void +exec_load_img(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[4], sample_r; + uint unit; + int sample; + int i, j; + int dim; + uint chan; + float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; + struct tgsi_image_params params; + int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; + + unit = fetch_sampler_unit(mach, inst, 0); + dim = get_image_coord_dim(inst->Memory.Texture); + sample = get_image_coord_sample(inst->Memory.Texture); + assert(dim <= 3); + + params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; + params.unit = unit; + params.tgsi_tex_instr = inst->Memory.Texture; + params.format = inst->Memory.Format; + + for (i = 0; i < dim; i++) { + IFETCH(&r[i], 1, TGSI_CHAN_X + i); + } + + if (sample) + IFETCH(&sample_r, 1, TGSI_CHAN_X + sample); + + mach->Image->load(mach->Image, ¶ms, + r[0].i, r[1].i, r[2].i, sample_r.i, + rgba); + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + r[0].f[j] = rgba[0][j]; + r[1].f[j] = rgba[1][j]; + r[2].f[j] = rgba[2][j]; + r[3].f[j] = rgba[3][j]; + } + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_load_buf(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[4]; + uint unit; + int j; + uint chan; + float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; + struct tgsi_buffer_params params; + int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; + + unit = fetch_sampler_unit(mach, inst, 0); + + params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; + params.unit = unit; + IFETCH(&r[0], 1, TGSI_CHAN_X); + + mach->Buffer->load(mach->Buffer, ¶ms, + r[0].i, rgba); + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + r[0].f[j] = rgba[0][j]; + r[1].f[j] = rgba[1][j]; + r[2].f[j] = rgba[2][j]; + r[3].f[j] = rgba[3][j]; + } + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_load(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) + exec_load_img(mach, inst); + else + exec_load_buf(mach, inst); +} + +static void +exec_store_img(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[3], sample_r; + union tgsi_exec_channel value[4]; + float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; + struct tgsi_image_params params; + int dim; + int sample; + int i, j; + uint unit; + int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; + unit = inst->Dst[0].Register.Index; + dim = get_image_coord_dim(inst->Memory.Texture); + sample = get_image_coord_sample(inst->Memory.Texture); + assert(dim <= 3); + + params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; + params.unit = unit; + params.tgsi_tex_instr = inst->Memory.Texture; + params.format = inst->Memory.Format; + + for (i = 0; i < dim; i++) { + IFETCH(&r[i], 0, TGSI_CHAN_X + i); + } + + for (i = 0; i < 4; i++) { + FETCH(&value[i], 1, TGSI_CHAN_X + i); + } + if (sample) + IFETCH(&sample_r, 0, TGSI_CHAN_X + sample); + + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + rgba[0][j] = value[0].f[j]; + rgba[1][j] = value[1].f[j]; + rgba[2][j] = value[2].f[j]; + rgba[3][j] = value[3].f[j]; + } + + mach->Image->store(mach->Image, ¶ms, + r[0].i, r[1].i, r[2].i, sample_r.i, + rgba); +} + +static void +exec_store_buf(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[3]; + union tgsi_exec_channel value[4]; + float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; + struct tgsi_buffer_params params; + int i, j; + uint unit; + int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; + + unit = inst->Dst[0].Register.Index; + + params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; + params.unit = unit; + params.writemask = inst->Dst[0].Register.WriteMask; + + IFETCH(&r[0], 0, TGSI_CHAN_X); + for (i = 0; i < 4; i++) { + FETCH(&value[i], 1, TGSI_CHAN_X + i); + } + + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + rgba[0][j] = value[0].f[j]; + rgba[1][j] = value[1].f[j]; + rgba[2][j] = value[2].f[j]; + rgba[3][j] = value[3].f[j]; + } + + mach->Buffer->store(mach->Buffer, ¶ms, + r[0].i, + rgba); +} + +static void +exec_store(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE) + exec_store_img(mach, inst); + else + exec_store_buf(mach, inst); +} + +static void +exec_atomop_img(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[4], sample_r; + union tgsi_exec_channel value[4], value2[4]; + float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; + float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; + struct tgsi_image_params params; + int dim; + int sample; + int i, j; + uint unit, chan; + int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; + unit = fetch_sampler_unit(mach, inst, 0); + dim = get_image_coord_dim(inst->Memory.Texture); + sample = get_image_coord_sample(inst->Memory.Texture); + assert(dim <= 3); + + params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; + params.unit = unit; + params.tgsi_tex_instr = inst->Memory.Texture; + params.format = inst->Memory.Format; + + for (i = 0; i < dim; i++) { + IFETCH(&r[i], 1, TGSI_CHAN_X + i); + } + + for (i = 0; i < 4; i++) { + FETCH(&value[i], 2, TGSI_CHAN_X + i); + if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) + FETCH(&value2[i], 3, TGSI_CHAN_X + i); + } + if (sample) + IFETCH(&sample_r, 1, TGSI_CHAN_X + sample); + + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + rgba[0][j] = value[0].f[j]; + rgba[1][j] = value[1].f[j]; + rgba[2][j] = value[2].f[j]; + rgba[3][j] = value[3].f[j]; + } + if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + rgba2[0][j] = value2[0].f[j]; + rgba2[1][j] = value2[1].f[j]; + rgba2[2][j] = value2[2].f[j]; + rgba2[3][j] = value2[3].f[j]; + } + } + + mach->Image->op(mach->Image, ¶ms, inst->Instruction.Opcode, + r[0].i, r[1].i, r[2].i, sample_r.i, + rgba, rgba2); + + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + r[0].f[j] = rgba[0][j]; + r[1].f[j] = rgba[1][j]; + r[2].f[j] = rgba[2][j]; + r[3].f[j] = rgba[3][j]; + } + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_atomop_buf(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[4]; + union tgsi_exec_channel value[4], value2[4]; + float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; + float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; + struct tgsi_buffer_params params; + int i, j; + uint unit, chan; + int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; + + unit = fetch_sampler_unit(mach, inst, 0); + + params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; + params.unit = unit; + params.writemask = inst->Dst[0].Register.WriteMask; + + IFETCH(&r[0], 1, TGSI_CHAN_X); + + for (i = 0; i < 4; i++) { + FETCH(&value[i], 2, TGSI_CHAN_X + i); + if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) + FETCH(&value2[i], 3, TGSI_CHAN_X + i); + } + + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + rgba[0][j] = value[0].f[j]; + rgba[1][j] = value[1].f[j]; + rgba[2][j] = value[2].f[j]; + rgba[3][j] = value[3].f[j]; + } + if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + rgba2[0][j] = value2[0].f[j]; + rgba2[1][j] = value2[1].f[j]; + rgba2[2][j] = value2[2].f[j]; + rgba2[3][j] = value2[3].f[j]; + } + } + + mach->Buffer->op(mach->Buffer, ¶ms, inst->Instruction.Opcode, + r[0].i, + rgba, rgba2); + + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + r[0].f[j] = rgba[0][j]; + r[1].f[j] = rgba[1][j]; + r[2].f[j] = rgba[2][j]; + r[3].f[j] = rgba[3][j]; + } + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_atomop(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) + exec_atomop_img(mach, inst); + else + exec_atomop_buf(mach, inst); +} + +static void +exec_resq_img(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + int result[4]; + union tgsi_exec_channel r[4]; + uint unit; + int i, chan, j; + struct tgsi_image_params params; + int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; + + unit = fetch_sampler_unit(mach, inst, 0); + + params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; + params.unit = unit; + params.tgsi_tex_instr = inst->Memory.Texture; + params.format = inst->Memory.Format; + + mach->Image->get_dims(mach->Image, ¶ms, result); + + for (i = 0; i < TGSI_QUAD_SIZE; i++) { + for (j = 0; j < 4; j++) { + r[j].i[i] = result[j]; + } + } + + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, + TGSI_EXEC_DATA_INT); + } + } +} + +static void +exec_resq_buf(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + int result; + union tgsi_exec_channel r[4]; + uint unit; + int i, chan; + struct tgsi_buffer_params params; + int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; + + unit = fetch_sampler_unit(mach, inst, 0); + + params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; + params.unit = unit; + + mach->Buffer->get_dims(mach->Buffer, ¶ms, &result); + + for (i = 0; i < TGSI_QUAD_SIZE; i++) { + r[0].i[i] = result; + } + + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, + TGSI_EXEC_DATA_INT); + } + } +} + +static void +exec_resq(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) + exec_resq_img(mach, inst); + else + exec_resq_buf(mach, inst); +} static void micro_i2f(union tgsi_exec_channel *dst, @@ -4302,7 +4830,7 @@ exec_instruction( break; case TGSI_OPCODE_PK2H: - assert (0); + exec_pk2h(mach, inst); break; case TGSI_OPCODE_PK2US: @@ -4374,8 +4902,21 @@ exec_instruction( exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1); break; + case TGSI_OPCODE_TG4: + /* src[0] = texcoord */ + /* src[1] = component */ + /* src[2] = sampler unit */ + exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2); + break; + + case TGSI_OPCODE_LODQ: + /* src[0] = texcoord */ + /* src[1] = sampler unit */ + exec_lodq(mach, inst); + break; + case TGSI_OPCODE_UP2H: - assert (0); + exec_up2h(mach, inst); break; case TGSI_OPCODE_UP2US: @@ -4431,8 +4972,12 @@ exec_instruction( mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; - /* Finally, jump to the subroutine */ + /* Finally, jump to the subroutine. The label is a pointer + * (an instruction number) to the BGNSUB instruction. + */ *pc = inst->Label.Label; + assert(mach->Instructions[*pc].Instruction.Opcode + == TGSI_OPCODE_BGNSUB); } break; @@ -4873,7 +5418,7 @@ exec_instruction( break; case TGSI_OPCODE_SAMPLE_I_MS: - assert(0); + exec_txf(mach, inst); break; case TGSI_OPCODE_SAMPLE: @@ -5070,6 +5615,34 @@ exec_instruction( case TGSI_OPCODE_D2U: exec_d2u(mach, inst); break; + + case TGSI_OPCODE_LOAD: + exec_load(mach, inst); + break; + + case TGSI_OPCODE_STORE: + exec_store(mach, inst); + break; + + case TGSI_OPCODE_ATOMUADD: + case TGSI_OPCODE_ATOMXCHG: + case TGSI_OPCODE_ATOMCAS: + case TGSI_OPCODE_ATOMAND: + case TGSI_OPCODE_ATOMOR: + case TGSI_OPCODE_ATOMXOR: + case TGSI_OPCODE_ATOMUMIN: + case TGSI_OPCODE_ATOMUMAX: + case TGSI_OPCODE_ATOMIMIN: + case TGSI_OPCODE_ATOMIMAX: + exec_atomop(mach, inst); + break; + + case TGSI_OPCODE_RESQ: + exec_resq(mach, inst); + break; + case TGSI_OPCODE_BARRIER: + case TGSI_OPCODE_MEMBAR: + break; default: assert( 0 ); } @@ -5097,6 +5670,8 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) default_mask = 0x1; } + if (mach->NonHelperMask == 0) + mach->NonHelperMask = default_mask; mach->CondMask = default_mask; mach->LoopMask = default_mask; mach->ContMask = default_mask;