#include <stdio.h>
#include <errno.h>
-/* CAYMAN notes
+/* CAYMAN notes
Why CAYMAN got loops for lots of instructions is explained here.
-These 8xx t-slot only ops are implemented in all vector slots.
MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
-These 8xx t-slot only opcodes become vector ops, with all four
-slots expecting the arguments on sources a and b. Result is
+These 8xx t-slot only opcodes become vector ops, with all four
+slots expecting the arguments on sources a and b. Result is
broadcast to all channels.
MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT, MUL_64
-These 8xx t-slot only opcodes become vector ops in the z, y, and
+These 8xx t-slot only opcodes become vector ops in the z, y, and
x slots.
EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
SQRT_IEEE/_64
SIN/COS
-The w slot may have an independent co-issued operation, or if the
-result is required to be in the w slot, the opcode above may be
+The w slot may have an independent co-issued operation, or if the
+result is required to be in the w slot, the opcode above may be
issued in the w slot as well.
The compiler must issue the source argument to slots z, y, and x
*/
/* disable SB for shaders using doubles */
use_sb &= !shader->shader.uses_doubles;
+ use_sb &= !shader->shader.uses_atomics;
+
/* Check if the bytecode has already been built. */
if (!shader->shader.bc.bytecode) {
r = r600_bytecode_build(&shader->shader.bc);
R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
return -EINVAL;
}
- if (i->Instruction.Predicate) {
- R600_ERR("predicate unsupported\n");
- return -EINVAL;
- }
#if 0
if (i->Instruction.Label) {
R600_ERR("label unsupported\n");
if (i->Src[j].Register.Dimension) {
switch (i->Src[j].Register.File) {
case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_HW_ATOMIC:
break;
case TGSI_FILE_INPUT:
if (ctx->type == PIPE_SHADER_GEOMETRY ||
case TGSI_FILE_ADDRESS:
break;
+ case TGSI_FILE_HW_ATOMIC:
+ i = ctx->shader->nhwatomic_ranges;
+ ctx->shader->atomics[i].start = d->Range.First;
+ ctx->shader->atomics[i].end = d->Range.Last;
+ ctx->shader->atomics[i].hw_idx = ctx->shader->atomic_base + ctx->shader->nhwatomic;
+ ctx->shader->atomics[i].array_id = d->Array.ArrayID;
+ ctx->shader->atomics[i].buffer_id = d->Dim.Index2D;
+ ctx->shader->nhwatomic_ranges++;
+ ctx->shader->nhwatomic += count;
+ break;
+
case TGSI_FILE_SYSTEM_VALUE:
if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK ||
d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID ||
interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index];
k = eg_get_interpolator_index(interpolate, location);
- ctx->eg_interpolators[k].enabled = true;
+ if (k >= 0)
+ ctx->eg_interpolators[k].enabled = true;
}
} else if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) {
struct tgsi_full_declaration *d = &parse.FullToken.FullDeclaration;
if (enabled) {
int gpr = gpr_offset + num_regs++;
+ ctx->shader->nsys_inputs++;
// add to inputs, allocate a gpr
- k = ctx->shader->ninput ++;
+ k = ctx->shader->ninput++;
ctx->shader->input[k].name = name;
ctx->shader->input[k].sid = 0;
ctx->shader->input[k].interpolate = TGSI_INTERPOLATE_CONSTANT;
interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index];
k = eg_get_interpolator_index(interpolate, location);
- ctx->eg_interpolators[k].enabled = true;
+ if (k >= 0)
+ ctx->eg_interpolators[k].enabled = true;
}
}
}
int out_idx = i >= outer_comps ? tessinner_idx : tessouter_idx;
int out_comp = i >= outer_comps ? i - outer_comps : i;
+ if (ctx->shader->tcs_prim_mode == PIPE_PRIM_LINES) {
+ if (out_comp == 1)
+ out_comp = 0;
+ else if (out_comp == 0)
+ out_comp = 1;
+ }
+
r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
treg[i / 2], (2 * (i % 2)),
temp_reg, 0,
struct pipe_stream_output_info so = pipeshader->selector->so;
struct tgsi_full_immediate *immediate;
struct r600_shader_ctx ctx;
- struct r600_bytecode_output output[32];
+ struct r600_bytecode_output output[ARRAY_SIZE(shader->output)];
unsigned output_done, noutput;
unsigned opcode;
int i, j, k, r = 0;
shader->indirect_files = ctx.info.indirect_files;
shader->uses_doubles = ctx.info.uses_doubles;
+ shader->uses_atomics = ctx.info.file_mask[TGSI_FILE_HW_ATOMIC];
+ shader->nsys_inputs = 0;
indirect_gprs = ctx.info.indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER));
tgsi_parse_init(&ctx.parse, tokens);
shader->vs_as_gs_a = key.vs.as_gs_a;
shader->vs_as_es = key.vs.as_es;
shader->vs_as_ls = key.vs.as_ls;
+ shader->atomic_base = key.vs.first_atomic_counter;
if (shader->vs_as_es)
ring_outputs = true;
if (shader->vs_as_ls)
break;
case PIPE_SHADER_GEOMETRY:
ring_outputs = true;
+ shader->atomic_base = key.gs.first_atomic_counter;
break;
case PIPE_SHADER_TESS_CTRL:
shader->tcs_prim_mode = key.tcs.prim_mode;
+ shader->atomic_base = key.tcs.first_atomic_counter;
lds_outputs = true;
lds_inputs = true;
break;
case PIPE_SHADER_TESS_EVAL:
shader->tes_as_es = key.tes.as_es;
+ shader->atomic_base = key.tes.first_atomic_counter;
lds_inputs = true;
if (shader->tes_as_es)
ring_outputs = true;
break;
case PIPE_SHADER_FRAGMENT:
shader->two_side = key.ps.color_two_side;
+ shader->atomic_base = key.ps.first_atomic_counter;
break;
default:
break;
ctx.file_offset[i] = 0;
}
- if (ctx.type == PIPE_SHADER_VERTEX) {
+ if (ctx.type == PIPE_SHADER_VERTEX) {
+
ctx.file_offset[TGSI_FILE_INPUT] = 1;
- r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS);
+ if (ctx.info.num_inputs)
+ r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS);
}
if (ctx.type == PIPE_SHADER_FRAGMENT) {
if (ctx.bc->chip_class >= EVERGREEN)
goto out_err;
}
}
-
+
shader->ring_item_sizes[0] = ctx.next_ring_offset;
shader->ring_item_sizes[1] = 0;
shader->ring_item_sizes[2] = 0;
int i, j, r, lasti = tgsi_last_instruction(write_mask);
/* use temp register if trans_only and more than one dst component */
int use_tmp = trans_only && (write_mask ^ (1 << lasti));
+ unsigned op = ctx->inst_info->op;
+
+ if (op == ALU_OP2_MUL_IEEE &&
+ ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS])
+ op = ALU_OP2_MUL;
for (i = 0; i <= lasti; i++) {
if (!(write_mask & (1 << i)))
} else
tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- alu.op = ctx->inst_info->op;
+ alu.op = op;
if (!swap) {
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
struct r600_bytecode_alu alu;
unsigned write_mask = inst->Dst[0].Register.WriteMask;
int i, j, r;
- int firsti = write_mask == 0xc ? 2 : 0;
for (i = 0; i <= 3; i++) {
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
return r;
}
- /* MOV first two channels to writemask dst0 */
- for (i = 0; i <= 1; i++) {
+ /* Replicate significand result across channels. */
+ for (i = 0; i <= 3; i++) {
+ if (!(write_mask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_MOV;
- alu.src[0].chan = i + 2;
+ alu.src[0].chan = (i & 1) + 2;
alu.src[0].sel = ctx->temp_reg;
- tgsi_dst(ctx, &inst->Dst[0], firsti + i, &alu.dst);
- alu.dst.write = (inst->Dst[0].Register.WriteMask >> (firsti + i)) & 1;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.dst.write = 1;
alu.last = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
int i, j, r;
struct r600_bytecode_alu alu;
int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
-
+
for (i = 0 ; i < last_slot; i++) {
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ctx->inst_info->op;
return 0;
}
-static int tgsi_scs(struct r600_shader_ctx *ctx)
-{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bytecode_alu alu;
- int i, r;
-
- /* We'll only need the trig stuff if we are going to write to the
- * X or Y components of the destination vector.
- */
- if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
- r = tgsi_setup_trig(ctx);
- if (r)
- return r;
- }
-
- /* dst.x = COS */
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
- if (ctx->bc->chip_class == CAYMAN) {
- for (i = 0 ; i < 3; i++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP1_COS;
- tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
-
- if (i == 0)
- alu.dst.write = 1;
- else
- alu.dst.write = 0;
- alu.src[0].sel = ctx->temp_reg;
- alu.src[0].chan = 0;
- if (i == 2)
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- } else {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP1_COS;
- tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
-
- alu.src[0].sel = ctx->temp_reg;
- alu.src[0].chan = 0;
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- }
-
- /* dst.y = SIN */
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
- if (ctx->bc->chip_class == CAYMAN) {
- for (i = 0 ; i < 3; i++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP1_SIN;
- tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (i == 1)
- alu.dst.write = 1;
- else
- alu.dst.write = 0;
- alu.src[0].sel = ctx->temp_reg;
- alu.src[0].chan = 0;
- if (i == 2)
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- } else {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP1_SIN;
- tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
-
- alu.src[0].sel = ctx->temp_reg;
- alu.src[0].chan = 0;
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- }
-
- /* dst.z = 0.0; */
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-
- alu.op = ALU_OP1_MOV;
-
- tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
-
- alu.src[0].sel = V_SQ_ALU_SRC_0;
- alu.src[0].chan = 0;
-
- alu.last = 1;
-
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
-
- /* dst.w = 1.0; */
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-
- alu.op = ALU_OP1_MOV;
-
- tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
-
- alu.src[0].sel = V_SQ_ALU_SRC_1;
- alu.src[0].chan = 0;
-
- alu.last = 1;
-
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
-
- return 0;
-}
-
static int tgsi_kill(struct r600_shader_ctx *ctx)
{
const struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
alu.last = 1;
} else
alu.dst.write = 0;
-
+
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_FLT_TO_UINT;
-
+
alu.dst.sel = tmp0;
alu.dst.chan = 0;
alu.dst.write = 1;
} else {
r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
}
-
+
alu.last = 1;
if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
return r;
} else {
r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
}
-
+
alu.src[1].sel = tmp0;
alu.src[1].chan = 2;
struct r600_bytecode_alu alu;
int r, i = 0, k, interp_gpr, interp_base_chan, tmp, lasti;
unsigned location;
- int input;
+ const int input = inst->Src[0].Register.Index + ctx->shader->nsys_inputs;
assert(inst->Src[0].Register.File == TGSI_FILE_INPUT);
- input = inst->Src[0].Register.Index;
-
/* Interpolators have been marked for use already by allocate_system_value_inputs */
if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
int i, j, r;
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
int temp_regs[4];
+ unsigned op = ctx->inst_info->op;
+
+ if (op == ALU_OP3_MULADD_IEEE &&
+ ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS])
+ op = ALU_OP3_MULADD;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
temp_regs[j] = 0;
continue;
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ctx->inst_info->op;
+ alu.op = op;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
r = tgsi_make_src_for_op3(ctx, temp_regs[j], i, &alu.src[j], &ctx->src[j]);
if (r)
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_alu alu;
int i, j, r;
+ unsigned op = ctx->inst_info->op;
+ if (op == ALU_OP2_DOT4_IEEE &&
+ ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS])
+ op = ALU_OP2_DOT4;
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ctx->inst_info->op;
+ alu.op = op;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
}
alu.src[0].chan = alu.src[1].chan = 0;
}
break;
- case TGSI_OPCODE_DPH:
- if (i == 3) {
- alu.src[0].sel = V_SQ_ALU_SRC_1;
- alu.src[0].chan = 0;
- alu.src[0].neg = 0;
- }
- break;
default:
break;
}
/* Texture fetch instructions can only use gprs as source.
* Also they cannot negate the source or take the absolute value */
- const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
- inst->Instruction.Opcode != TGSI_OPCODE_TXQS &&
+ const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQS &&
tgsi_tex_src_requires_loading(ctx, 0)) ||
read_compressed_msaa || txf_add_offsets;
boolean src_loaded = FALSE;
- unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1;
+ unsigned sampler_src_reg = 1;
int8_t offset_x = 0, offset_y = 0, offset_z = 0;
boolean has_txq_cube_array_z = false;
unsigned sampler_index_mode;
inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
- inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
- inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
+ inst->Instruction.Opcode != TGSI_OPCODE_TXQ) {
static const unsigned src0_swizzle[] = {2, 2, 0, 1};
static const unsigned src1_swizzle[] = {1, 0, 2, 2};
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
- /* write initial compare value into Z component
+ /* write initial compare value into Z component
- W src 0 for shadow cube
- X src 1 for shadow cube array */
if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
-
+
r = r600_bytecode_add_tex(ctx->bc, &tex);
if (r)
return r;
/* does this shader want a num layers from TXQ for a cube array? */
if (has_txq_cube_array_z) {
int id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
-
+
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_MOV;
}
- if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ||
- inst->Instruction.Opcode == TGSI_OPCODE_TXQS) {
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TXQS) {
tex.src_sel_x = 4;
tex.src_sel_y = 4;
tex.src_sel_z = 4;
return 0;
}
+static int find_hw_atomic_counter(struct r600_shader_ctx *ctx,
+ struct tgsi_full_src_register *src)
+{
+ int i;
+
+ if (src->Register.Indirect) {
+ for (i = 0; i < ctx->shader->nhwatomic_ranges; i++) {
+ if (src->Indirect.ArrayID == ctx->shader->atomics[i].array_id)
+ return ctx->shader->atomics[i].hw_idx;
+ }
+ } else {
+ uint32_t index = src->Register.Index;
+ for (i = 0; i < ctx->shader->nhwatomic_ranges; i++) {
+ if (ctx->shader->atomics[i].buffer_id != src->Dimension.Index)
+ continue;
+ if (index > ctx->shader->atomics[i].end)
+ continue;
+ if (index < ctx->shader->atomics[i].start)
+ continue;
+ uint32_t offset = (index - ctx->shader->atomics[i].start);
+ return ctx->shader->atomics[i].hw_idx + offset;
+ }
+ }
+ assert(0);
+ return -1;
+}
+
+
+static int tgsi_load_gds(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ int r;
+ struct r600_bytecode_gds gds;
+ int uav_id = 0;
+ int uav_index_mode = 0;
+
+ uav_id = find_hw_atomic_counter(ctx, &inst->Src[0]);
+
+ if (inst->Src[0].Register.Indirect)
+ uav_index_mode = 2;
+
+ memset(&gds, 0, sizeof(struct r600_bytecode_gds));
+ gds.op = FETCH_OP_GDS_READ_RET;
+ gds.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
+ gds.uav_id = uav_id;
+ gds.uav_index_mode = uav_index_mode;
+ gds.src_gpr = ctx->temp_reg;
+ gds.src_sel_x = 4;
+ gds.src_sel_y = 4;
+ gds.src_sel_z = 4;
+ gds.dst_sel_x = 0;
+ gds.dst_sel_y = 7;
+ gds.dst_sel_z = 7;
+ gds.dst_sel_w = 7;
+ gds.src_gpr2 = ctx->temp_reg;
+ gds.alloc_consume = 1;
+ r = r600_bytecode_add_gds(ctx->bc, &gds);
+ if (r)
+ return r;
+
+ ctx->bc->cf_last->vpm = 1;
+ return 0;
+}
+
+static int tgsi_load(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC)
+ return tgsi_load_gds(ctx);
+ return 0;
+}
+
+static int get_gds_op(int opcode)
+{
+ switch (opcode) {
+ case TGSI_OPCODE_ATOMUADD:
+ return FETCH_OP_GDS_ADD_RET;
+ case TGSI_OPCODE_ATOMAND:
+ return FETCH_OP_GDS_AND_RET;
+ case TGSI_OPCODE_ATOMOR:
+ return FETCH_OP_GDS_OR_RET;
+ case TGSI_OPCODE_ATOMXOR:
+ return FETCH_OP_GDS_XOR_RET;
+ case TGSI_OPCODE_ATOMUMIN:
+ return FETCH_OP_GDS_MIN_UINT_RET;
+ case TGSI_OPCODE_ATOMUMAX:
+ return FETCH_OP_GDS_MAX_UINT_RET;
+ case TGSI_OPCODE_ATOMXCHG:
+ return FETCH_OP_GDS_XCHG_RET;
+ case TGSI_OPCODE_ATOMCAS:
+ return FETCH_OP_GDS_CMP_XCHG_RET;
+ default:
+ return -1;
+ }
+}
+
+static int tgsi_atomic_op_gds(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_gds gds;
+ struct r600_bytecode_alu alu;
+ int gds_op = get_gds_op(inst->Instruction.Opcode);
+ int r;
+ int uav_id = 0;
+ int uav_index_mode = 0;
+
+ if (gds_op == -1) {
+ fprintf(stderr, "unknown GDS op for opcode %d\n", inst->Instruction.Opcode);
+ return -1;
+ }
+
+ uav_id = find_hw_atomic_counter(ctx, &inst->Src[0]);
+
+ if (inst->Src[0].Register.Indirect)
+ uav_index_mode = 2;
+
+ if (inst->Src[2].Register.File == TGSI_FILE_IMMEDIATE) {
+ int value = (ctx->literals[4 * inst->Src[2].Register.Index + inst->Src[2].Register.SwizzleX]);
+ int abs_value = abs(value);
+ if (abs_value != value && gds_op == FETCH_OP_GDS_ADD_RET)
+ gds_op = FETCH_OP_GDS_SUB_RET;
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 0;
+ alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[0].value = abs_value;
+ alu.last = 1;
+ alu.dst.write = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ } else {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 0;
+ r600_bytecode_src(&alu.src[0], &ctx->src[2], 0);
+ alu.last = 1;
+ alu.dst.write = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ memset(&gds, 0, sizeof(struct r600_bytecode_gds));
+ gds.op = gds_op;
+ gds.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
+ gds.uav_id = uav_id;
+ gds.uav_index_mode = uav_index_mode;
+ gds.src_gpr = ctx->temp_reg;
+ gds.src_gpr2 = ctx->temp_reg;
+ gds.src_sel_x = 4;
+ gds.src_sel_y = 0;
+ gds.src_sel_z = 4;
+ gds.dst_sel_x = 0;
+ gds.dst_sel_y = 7;
+ gds.dst_sel_z = 7;
+ gds.dst_sel_w = 7;
+ gds.alloc_consume = 1;
+ r = r600_bytecode_add_gds(ctx->bc, &gds);
+ if (r)
+ return r;
+ ctx->bc->cf_last->vpm = 1;
+ return 0;
+}
+
+static int tgsi_atomic_op(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC)
+ return tgsi_atomic_op_gds(ctx);
+ return 0;
+}
+
static int tgsi_lrp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
return 0;
}
-static int tgsi_xpd(struct r600_shader_ctx *ctx)
-{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- static const unsigned int src0_swizzle[] = {2, 0, 1};
- static const unsigned int src1_swizzle[] = {1, 2, 0};
- struct r600_bytecode_alu alu;
- uint32_t use_temp = 0;
- int i, r;
-
- if (inst->Dst[0].Register.WriteMask != 0xf)
- use_temp = 1;
-
- for (i = 0; i < 4; i++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP2_MUL;
- if (i < 3) {
- r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
- r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
- } else {
- alu.src[0].sel = V_SQ_ALU_SRC_0;
- alu.src[0].chan = i;
- alu.src[1].sel = V_SQ_ALU_SRC_0;
- alu.src[1].chan = i;
- }
-
- alu.dst.sel = ctx->temp_reg;
- alu.dst.chan = i;
- alu.dst.write = 1;
-
- if (i == 3)
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
-
- for (i = 0; i < 4; i++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP3_MULADD;
-
- if (i < 3) {
- r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
- r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
- } else {
- alu.src[0].sel = V_SQ_ALU_SRC_0;
- alu.src[0].chan = i;
- alu.src[1].sel = V_SQ_ALU_SRC_0;
- alu.src[1].chan = i;
- }
-
- alu.src[2].sel = ctx->temp_reg;
- alu.src[2].neg = 1;
- alu.src[2].chan = i;
-
- if (use_temp)
- alu.dst.sel = ctx->temp_reg;
- else
- tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- alu.dst.chan = i;
- alu.dst.write = 1;
- alu.is_op3 = 1;
- if (i == 3)
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- if (use_temp)
- return tgsi_helper_copy(ctx, inst);
- return 0;
-}
-
static int tgsi_exp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
alu.op = ALU_OP1_LOG_IEEE;
r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
r600_bytecode_src_set_abs(&alu.src[0]);
-
+
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
- if (i == 0)
+ if (i == 0)
alu.dst.write = 1;
if (i == 2)
alu.last = 1;
alu.op = ALU_OP1_LOG_IEEE;
r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
r600_bytecode_src_set_abs(&alu.src[0]);
-
+
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
alu.dst.write = 1;
alu.dst.write = 1;
if (i == 2)
alu.last = 1;
-
+
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
- return r;
+ return r;
}
} else {
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.dst.write = 1;
if (i == 2)
alu.last = 1;
-
+
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
{
- ctx->bc->fc_sp++;
+ assert(ctx->bc->fc_sp < ARRAY_SIZE(ctx->bc->fc_stack));
ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
+ ctx->bc->fc_sp++;
}
static void fc_poplevel(struct r600_shader_ctx *ctx)
{
- struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
+ struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp - 1];
free(sp->mid);
sp->mid = NULL;
sp->num_mid = 0;
r600_bytecode_add_cfinst(ctx->bc, CF_OP_ELSE);
ctx->bc->cf_last->pop_count = 1;
- fc_set_mid(ctx, ctx->bc->fc_sp);
- ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
+ fc_set_mid(ctx, ctx->bc->fc_sp - 1);
+ ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id;
return 0;
}
static int tgsi_endif(struct r600_shader_ctx *ctx)
{
pops(ctx, 1);
- if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
+ if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].type != FC_IF) {
R600_ERR("if/endif unbalanced in shader\n");
return -1;
}
- if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
- ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
- ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
+ if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid == NULL) {
+ ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id + 2;
+ ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->pop_count = 1;
} else {
- ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
+ ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
}
fc_poplevel(ctx);
r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_END);
- if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
+ if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].type != FC_LOOP) {
R600_ERR("loop/endloop in shader code are not paired.\n");
return -EINVAL;
}
LOOP START point to CF after LOOP END
BRK/CONT point to LOOP END CF
*/
- ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
+ ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->id + 2;
- ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
+ ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id + 2;
- for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
- ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
+ for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp - 1].num_mid; i++) {
+ ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid[i]->cf_addr = ctx->bc->cf_last->id;
}
/* XXX add LOOPRET support */
fc_poplevel(ctx);
return 0;
}
-static int tgsi_loop_breakc(struct r600_shader_ctx *ctx)
-{
- int r;
- unsigned int fscp;
-
- for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
- {
- if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
- break;
- }
- if (fscp == 0) {
- R600_ERR("BREAKC not inside loop/endloop pair\n");
- return -EINVAL;
- }
-
- if (ctx->bc->chip_class == EVERGREEN &&
- ctx->bc->family != CHIP_CYPRESS &&
- ctx->bc->family != CHIP_JUNIPER) {
- /* HW bug: ALU_BREAK does not save the active mask correctly */
- r = tgsi_uif(ctx);
- if (r)
- return r;
-
- r = r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_BREAK);
- if (r)
- return r;
- fc_set_mid(ctx, fscp);
-
- return tgsi_endif(ctx);
- } else {
- r = emit_logic_pred(ctx, ALU_OP2_PRED_SETE_INT, CF_OP_ALU_BREAK);
- if (r)
- return r;
- fc_set_mid(ctx, fscp);
- }
-
- return 0;
-}
-
static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
{
unsigned int fscp;
for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
{
- if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
+ if (FC_LOOP == ctx->bc->fc_stack[fscp - 1].type)
break;
}
r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
- fc_set_mid(ctx, fscp);
+ fc_set_mid(ctx, fscp - 1);
return 0;
}
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = i;
-
+
r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
if (i == lasti) {
alu.last = 1;
[TGSI_OPCODE_RSQ] = { ALU_OP0_NOP, tgsi_rsq},
[TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp},
[TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log},
- [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2},
+ [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2},
[TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2},
- [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp},
- [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
+ [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst},
[TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2},
[TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2},
[TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap},
[TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2},
- [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3},
+ [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3},
[TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp},
[TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
- [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported},
+ [21] = { ALU_OP0_NOP, tgsi_unsupported},
[22] = { ALU_OP0_NOP, tgsi_unsupported},
[23] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2},
- [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported},
+ [25] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2},
[TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2},
[TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
[TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
[TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow},
- [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd},
+ [31] = { ALU_OP0_NOP, tgsi_unsupported},
[32] = { ALU_OP0_NOP, tgsi_unsupported},
[33] = { ALU_OP0_NOP, tgsi_unsupported},
[34] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp},
+ [35] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig},
[TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
[TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
[TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg},
[TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp},
- [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs},
+ [67] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
[69] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex},
[TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
[TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if},
[TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif},
[TGSI_OPCODE_DDX_FINE] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_DDY_FINE] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported},
+ [81] = { ALU_OP0_NOP, tgsi_unsupported},
+ [82] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2},
[TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans},
[TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2},
[TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2},
[TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod},
[TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2},
- [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported},
+ [93] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex},
[TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
[TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
[TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop},
[TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
+ [103] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
[TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex},
[TGSI_OPCODE_RESQ] = { ALU_OP0_NOP, tgsi_unsupported},
[106] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap},
[TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap},
[TGSI_OPCODE_MEMBAR] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported},
+ [113] = { ALU_OP0_NOP, tgsi_unsupported},
[114] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_loop_breakc},
+ [115] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
[TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */
[TGSI_OPCODE_DFMA] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_ISSG] = { 0, tgsi_issg},
[TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [163] = { ALU_OP0_NOP, tgsi_unsupported},
+ [164] = { ALU_OP0_NOP, tgsi_unsupported},
+ [165] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq},
[TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp},
[TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log},
- [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2},
+ [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2},
[TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2},
- [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp},
- [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
+ [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst},
[TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2},
[TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2},
[TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap},
[TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2},
- [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3},
+ [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3},
[TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp},
[TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3},
[TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
- [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported},
+ [21] = { ALU_OP0_NOP, tgsi_unsupported},
[22] = { ALU_OP0_NOP, tgsi_unsupported},
[23] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2},
- [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported},
+ [25] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2},
[TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2},
[TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
[TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
[TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow},
- [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd},
+ [31] = { ALU_OP0_NOP, tgsi_unsupported},
[32] = { ALU_OP0_NOP, tgsi_unsupported},
[33] = { ALU_OP0_NOP, tgsi_unsupported},
[34] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp},
+ [35] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig},
[TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
[TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
[TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg},
[TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp},
- [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs},
+ [67] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
[69] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex},
[TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
[TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if},
[TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif},
[TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
[TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
- [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported},
+ [82] = { ALU_OP0_NOP, tgsi_unsupported},
+ [83] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2},
[TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans},
[TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2},
[TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2},
[TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod},
[TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2},
- [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported},
+ [93] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex},
[TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
[TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
[TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop},
[TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
+ [103] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
[TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex},
[TGSI_OPCODE_RESQ] = { ALU_OP0_NOP, tgsi_unsupported},
[106] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap},
[TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap},
[TGSI_OPCODE_MEMBAR] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported},
+ [113] = { ALU_OP0_NOP, tgsi_unsupported},
[114] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported},
+ [115] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
[TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */
/* Refer below for TGSI_OPCODE_DFMA */
[TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp},
[TGSI_OPCODE_IABS] = { 0, tgsi_iabs},
[TGSI_OPCODE_ISSG] = { 0, tgsi_issg},
- [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_load},
[TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [163] = { ALU_OP0_NOP, tgsi_unsupported},
+ [164] = { ALU_OP0_NOP, tgsi_unsupported},
+ [165] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_BARRIER] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier},
- [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMUADD] = { V_RAT_INST_ADD_RTN, tgsi_atomic_op},
+ [TGSI_OPCODE_ATOMXCHG] = { V_RAT_INST_XCHG_RTN, tgsi_atomic_op},
+ [TGSI_OPCODE_ATOMCAS] = { V_RAT_INST_CMPXCHG_INT_RTN, tgsi_atomic_op},
+ [TGSI_OPCODE_ATOMAND] = { V_RAT_INST_AND_RTN, tgsi_atomic_op},
+ [TGSI_OPCODE_ATOMOR] = { V_RAT_INST_OR_RTN, tgsi_atomic_op},
+ [TGSI_OPCODE_ATOMXOR] = { V_RAT_INST_XOR_RTN, tgsi_atomic_op},
+ [TGSI_OPCODE_ATOMUMIN] = { V_RAT_INST_MIN_UINT_RTN, tgsi_atomic_op},
+ [TGSI_OPCODE_ATOMUMAX] = { V_RAT_INST_MAX_UINT_RTN, tgsi_atomic_op},
+ [TGSI_OPCODE_ATOMIMIN] = { V_RAT_INST_MIN_INT_RTN, tgsi_atomic_op},
+ [TGSI_OPCODE_ATOMIMAX] = { V_RAT_INST_MAX_INT_RTN, tgsi_atomic_op},
[TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex},
[TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
[TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex},
[TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, cayman_emit_float_instr},
[TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp},
[TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log},
- [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2},
+ [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2},
[TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2},
- [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp},
- [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
+ [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst},
[TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2},
[TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2},
[TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap},
[TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2},
- [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3},
+ [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3},
[TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp},
[TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3},
[TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr},
- [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported},
+ [21] = { ALU_OP0_NOP, tgsi_unsupported},
[22] = { ALU_OP0_NOP, tgsi_unsupported},
[23] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2},
- [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported},
+ [25] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2},
[TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2},
[TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, cayman_emit_float_instr},
[TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, cayman_emit_float_instr},
[TGSI_OPCODE_POW] = { ALU_OP0_NOP, cayman_pow},
- [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd},
+ [31] = { ALU_OP0_NOP, tgsi_unsupported},
[32] = { ALU_OP0_NOP, tgsi_unsupported},
[33] = { ALU_OP0_NOP, tgsi_unsupported},
[34] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp},
+ [35] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_COS] = { ALU_OP1_COS, cayman_trig},
[TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
[TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
[TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg},
[TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp},
- [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs},
+ [67] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
[69] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex},
[TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
[TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if},
[TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif},
[TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
[TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
- [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported},
+ [82] = { ALU_OP0_NOP, tgsi_unsupported},
+ [83] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2},
[TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2},
[TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2},
[TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2},
[TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod},
[TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2},
- [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported},
+ [93] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex},
[TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
[TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
[TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop},
[TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
+ [103] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
[TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex},
[TGSI_OPCODE_RESQ] = { ALU_OP0_NOP, tgsi_unsupported},
[106] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap},
[TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap},
[TGSI_OPCODE_MEMBAR] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported},
+ [113] = { ALU_OP0_NOP, tgsi_unsupported},
[114] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported},
+ [115] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
[TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */
/* Refer below for TGSI_OPCODE_DFMA */
[TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp},
[TGSI_OPCODE_IABS] = { 0, tgsi_iabs},
[TGSI_OPCODE_ISSG] = { 0, tgsi_issg},
- [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_load},
[TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [163] = { ALU_OP0_NOP, tgsi_unsupported},
+ [164] = { ALU_OP0_NOP, tgsi_unsupported},
+ [165] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_BARRIER] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier},
- [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMUADD] = { V_RAT_INST_ADD_RTN, tgsi_atomic_op},
+ [TGSI_OPCODE_ATOMXCHG] = { V_RAT_INST_XCHG_RTN, tgsi_atomic_op},
+ [TGSI_OPCODE_ATOMCAS] = { V_RAT_INST_CMPXCHG_INT_RTN, tgsi_atomic_op},
+ [TGSI_OPCODE_ATOMAND] = { V_RAT_INST_AND_RTN, tgsi_atomic_op},
+ [TGSI_OPCODE_ATOMOR] = { V_RAT_INST_OR_RTN, tgsi_atomic_op},
+ [TGSI_OPCODE_ATOMXOR] = { V_RAT_INST_XOR_RTN, tgsi_atomic_op},
+ [TGSI_OPCODE_ATOMUMIN] = { V_RAT_INST_MIN_UINT_RTN, tgsi_atomic_op},
+ [TGSI_OPCODE_ATOMUMAX] = { V_RAT_INST_MAX_UINT_RTN, tgsi_atomic_op},
+ [TGSI_OPCODE_ATOMIMIN] = { V_RAT_INST_MIN_INT_RTN, tgsi_atomic_op},
+ [TGSI_OPCODE_ATOMIMAX] = { V_RAT_INST_MAX_INT_RTN, tgsi_atomic_op},
[TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex},
[TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
[TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex},