These 8xx t-slot only opcodes become vector ops, with all four
slots expecting the arguments on sources a and b. Result is
broadcast to all channels.
-MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
+MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT, MUL_64
These 8xx t-slot only opcodes become vector ops in the z, y, and
x slots.
EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
static int r600_shader_from_tgsi(struct r600_context *rctx,
struct r600_pipe_shader *pipeshader,
- struct r600_shader_key key);
+ union r600_shader_key key);
static void r600_add_gpr_array(struct r600_shader *ps, int start_gpr,
for (i = 0; i < so->num_outputs; i++) {
unsigned mask = ((1 << so->output[i].num_components) - 1) <<
so->output[i].start_component;
- fprintf(stderr, " %i: MEM_STREAM0_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n",
- i, so->output[i].output_buffer,
+ fprintf(stderr, " %i: MEM_STREAM%d_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n",
+ i,
+ so->output[i].stream,
+ so->output[i].output_buffer,
so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1,
so->output[i].register_index,
mask & 1 ? "x" : "",
int r600_pipe_shader_create(struct pipe_context *ctx,
struct r600_pipe_shader *shader,
- struct r600_shader_key key)
+ union r600_shader_key key)
{
struct r600_context *rctx = (struct r600_context *)ctx;
struct r600_pipe_shader_selector *sel = shader->selector;
bool dump = r600_can_dump_shader(&rctx->screen->b, sel->tokens);
unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB);
unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM);
- unsigned export_shader = key.vs_as_es;
+ unsigned export_shader;
shader->shader.bc.isa = rctx->isa;
goto error;
}
- /* disable SB for geom shaders - it can't handle the CF_EMIT instructions */
- use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_GEOMETRY);
+ /* disable SB for geom shaders on R6xx/R7xx due to some mysterious gs piglit regressions with it enabled. */
+ if (rctx->b.chip_class <= R700) {
+ use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_GEOMETRY);
+ }
/* disable SB for shaders using CF_INDEX_0/1 (sampler/ubo array indexing) as it doesn't handle those currently */
use_sb &= !shader->shader.uses_index_registers;
+ /* disable SB for shaders using doubles */
+ use_sb &= !shader->shader.uses_doubles;
/* Check if the bytecode has already been built. When using the llvm
* backend, r600_shader_from_tgsi() will take care of building the
}
break;
case TGSI_PROCESSOR_VERTEX:
+ export_shader = key.vs.as_es;
if (rctx->b.chip_class >= EVERGREEN) {
if (export_shader)
evergreen_update_es_state(ctx, shader);
unsigned type;
unsigned file_offset[TGSI_FILE_COUNT];
unsigned temp_reg;
- struct r600_shader_tgsi_instruction *inst_info;
+ const struct r600_shader_tgsi_instruction *inst_info;
struct r600_bytecode *bc;
struct r600_shader *shader;
struct r600_shader_src src[4];
int gs_out_ring_offset;
int gs_next_vertex;
struct r600_shader *gs_for_vs;
- int gs_export_gpr_treg;
+ int gs_export_gpr_tregs[4];
+ const struct pipe_stream_output_info *gs_stream_output_info;
+ unsigned enabled_stream_buffers_mask;
};
struct r600_shader_tgsi_instruction {
- unsigned tgsi_opcode;
- unsigned is_op3;
unsigned op;
int (*process)(struct r600_shader_ctx *ctx);
};
-static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind);
-static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
+static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind);
+static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
int j;
- if (i->Instruction.NumDstRegs > 1) {
+ if (i->Instruction.NumDstRegs > 1 && i->Instruction.Opcode != TGSI_OPCODE_DFRACEXP) {
R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
return -EINVAL;
}
return 0;
}
+static int vs_add_primid_output(struct r600_shader_ctx *ctx, int prim_id_sid)
+{
+ int i;
+ i = ctx->shader->noutput++;
+ ctx->shader->output[i].name = TGSI_SEMANTIC_PRIMID;
+ ctx->shader->output[i].sid = 0;
+ ctx->shader->output[i].gpr = 0;
+ ctx->shader->output[i].interpolate = TGSI_INTERPOLATE_CONSTANT;
+ ctx->shader->output[i].write_mask = 0x4;
+ ctx->shader->output[i].spi_sid = prim_id_sid;
+
+ return 0;
+}
+
static int tgsi_declaration(struct r600_shader_ctx *ctx)
{
struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
switch (d->Declaration.File) {
case TGSI_FILE_INPUT:
- i = ctx->shader->ninput;
- assert(i < Elements(ctx->shader->input));
- ctx->shader->ninput += count;
- ctx->shader->input[i].name = d->Semantic.Name;
- ctx->shader->input[i].sid = d->Semantic.Index;
- ctx->shader->input[i].interpolate = d->Interp.Interpolate;
- ctx->shader->input[i].interpolate_location = d->Interp.Location;
- ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
- if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
- ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
- switch (ctx->shader->input[i].name) {
- case TGSI_SEMANTIC_FACE:
- if (ctx->face_gpr != -1)
- ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */
- else
- ctx->face_gpr = ctx->shader->input[i].gpr;
- break;
- case TGSI_SEMANTIC_COLOR:
- ctx->colors_used++;
- break;
- case TGSI_SEMANTIC_POSITION:
- ctx->fragcoord_input = i;
- break;
- }
- if (ctx->bc->chip_class >= EVERGREEN) {
- if ((r = evergreen_interp_input(ctx, i)))
- return r;
+ for (j = 0; j < count; j++) {
+ i = ctx->shader->ninput + j;
+ assert(i < Elements(ctx->shader->input));
+ ctx->shader->input[i].name = d->Semantic.Name;
+ ctx->shader->input[i].sid = d->Semantic.Index + j;
+ ctx->shader->input[i].interpolate = d->Interp.Interpolate;
+ ctx->shader->input[i].interpolate_location = d->Interp.Location;
+ ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First + j;
+ if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+ ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
+ switch (ctx->shader->input[i].name) {
+ case TGSI_SEMANTIC_FACE:
+ if (ctx->face_gpr != -1)
+ ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */
+ else
+ ctx->face_gpr = ctx->shader->input[i].gpr;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ ctx->colors_used++;
+ break;
+ case TGSI_SEMANTIC_POSITION:
+ ctx->fragcoord_input = i;
+ break;
+ case TGSI_SEMANTIC_PRIMID:
+ /* set this for now */
+ ctx->shader->gs_prim_id_input = true;
+ ctx->shader->ps_prim_id_input = i;
+ break;
+ }
+ if (ctx->bc->chip_class >= EVERGREEN) {
+ if ((r = evergreen_interp_input(ctx, i)))
+ return r;
+ }
+ } else if (ctx->type == TGSI_PROCESSOR_GEOMETRY) {
+ /* FIXME probably skip inputs if they aren't passed in the ring */
+ ctx->shader->input[i].ring_offset = ctx->next_ring_offset;
+ ctx->next_ring_offset += 16;
+ if (ctx->shader->input[i].name == TGSI_SEMANTIC_PRIMID)
+ ctx->shader->gs_prim_id_input = true;
}
- } else if (ctx->type == TGSI_PROCESSOR_GEOMETRY) {
- /* FIXME probably skip inputs if they aren't passed in the ring */
- ctx->shader->input[i].ring_offset = ctx->next_ring_offset;
- ctx->next_ring_offset += 16;
- if (ctx->shader->input[i].name == TGSI_SEMANTIC_PRIMID)
- ctx->shader->gs_prim_id_input = true;
- }
- for (j = 1; j < count; ++j) {
- ctx->shader->input[i + j] = ctx->shader->input[i];
- ctx->shader->input[i + j].gpr += j;
}
+ ctx->shader->ninput += count;
break;
case TGSI_FILE_OUTPUT:
- i = ctx->shader->noutput++;
- assert(i < Elements(ctx->shader->output));
- ctx->shader->output[i].name = d->Semantic.Name;
- ctx->shader->output[i].sid = d->Semantic.Index;
- ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First;
- ctx->shader->output[i].interpolate = d->Interp.Interpolate;
- ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
- if (ctx->type == TGSI_PROCESSOR_VERTEX ||
- ctx->type == TGSI_PROCESSOR_GEOMETRY) {
- ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
- switch (d->Semantic.Name) {
- case TGSI_SEMANTIC_CLIPDIST:
- ctx->shader->clip_dist_write |= d->Declaration.UsageMask << (d->Semantic.Index << 2);
- break;
- case TGSI_SEMANTIC_PSIZE:
- ctx->shader->vs_out_misc_write = 1;
- ctx->shader->vs_out_point_size = 1;
- break;
- case TGSI_SEMANTIC_EDGEFLAG:
- ctx->shader->vs_out_misc_write = 1;
- ctx->shader->vs_out_edgeflag = 1;
- ctx->edgeflag_output = i;
- break;
- case TGSI_SEMANTIC_VIEWPORT_INDEX:
- ctx->shader->vs_out_misc_write = 1;
- ctx->shader->vs_out_viewport = 1;
- break;
- case TGSI_SEMANTIC_LAYER:
- ctx->shader->vs_out_misc_write = 1;
- ctx->shader->vs_out_layer = 1;
- break;
- case TGSI_SEMANTIC_CLIPVERTEX:
- ctx->clip_vertex_write = TRUE;
- ctx->cv_output = i;
- break;
- }
- if (ctx->type == TGSI_PROCESSOR_GEOMETRY) {
- ctx->gs_out_ring_offset += 16;
- }
- } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
- switch (d->Semantic.Name) {
- case TGSI_SEMANTIC_COLOR:
- ctx->shader->nr_ps_max_color_exports++;
- break;
+ for (j = 0; j < count; j++) {
+ i = ctx->shader->noutput + j;
+ assert(i < Elements(ctx->shader->output));
+ ctx->shader->output[i].name = d->Semantic.Name;
+ ctx->shader->output[i].sid = d->Semantic.Index + j;
+ ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First + j;
+ ctx->shader->output[i].interpolate = d->Interp.Interpolate;
+ ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
+ if (ctx->type == TGSI_PROCESSOR_VERTEX ||
+ ctx->type == TGSI_PROCESSOR_GEOMETRY) {
+ ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
+ switch (d->Semantic.Name) {
+ case TGSI_SEMANTIC_CLIPDIST:
+ ctx->shader->clip_dist_write |= d->Declaration.UsageMask <<
+ ((d->Semantic.Index + j) << 2);
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ ctx->shader->vs_out_misc_write = 1;
+ ctx->shader->vs_out_point_size = 1;
+ break;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ ctx->shader->vs_out_misc_write = 1;
+ ctx->shader->vs_out_edgeflag = 1;
+ ctx->edgeflag_output = i;
+ break;
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ ctx->shader->vs_out_misc_write = 1;
+ ctx->shader->vs_out_viewport = 1;
+ break;
+ case TGSI_SEMANTIC_LAYER:
+ ctx->shader->vs_out_misc_write = 1;
+ ctx->shader->vs_out_layer = 1;
+ break;
+ case TGSI_SEMANTIC_CLIPVERTEX:
+ ctx->clip_vertex_write = TRUE;
+ ctx->cv_output = i;
+ break;
+ }
+ if (ctx->type == TGSI_PROCESSOR_GEOMETRY) {
+ ctx->gs_out_ring_offset += 16;
+ }
+ } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+ switch (d->Semantic.Name) {
+ case TGSI_SEMANTIC_COLOR:
+ ctx->shader->nr_ps_max_color_exports++;
+ break;
+ }
}
}
+ ctx->shader->noutput += count;
break;
case TGSI_FILE_TEMPORARY:
if (ctx->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
case TGSI_FILE_CONSTANT:
case TGSI_FILE_SAMPLER:
+ case TGSI_FILE_SAMPLER_VIEW:
case TGSI_FILE_ADDRESS:
break;
memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
vtx.op = FETCH_OP_VFETCH;
vtx.buffer_id = R600_SAMPLE_POSITIONS_CONST_BUFFER;
- vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
+ vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
if (sample_id == NULL) {
vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w;
vtx.src_sel_x = 3;
memset(&vtx, 0, sizeof(vtx));
vtx.buffer_id = cb_idx;
- vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
+ vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
vtx.src_gpr = ar_reg;
vtx.src_sel_x = ar_chan;
vtx.mega_fetch_count = 16;
for (i = 0; i < 3; i++) {
treg[i] = r600_get_temp(ctx);
}
+ r600_add_gpr_array(ctx->shader, treg[0], 3, 0x0F);
+
t2 = r600_get_temp(ctx);
for (i = 0; i < 3; i++) {
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
memset(&vtx, 0, sizeof(vtx));
vtx.buffer_id = R600_GS_RING_CONST_BUFFER;
- vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
+ vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
vtx.src_gpr = offset_reg;
vtx.src_sel_x = offset_chan;
vtx.offset = index * 16; /*bytes*/
return 0;
}
-static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so)
+static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so,
+ int stream, unsigned *stream_item_size)
{
unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS];
+ unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS];
int i, j, r;
/* Sanity checking. */
- if (so->num_outputs > PIPE_MAX_SHADER_OUTPUTS) {
+ if (so->num_outputs > PIPE_MAX_SO_OUTPUTS) {
R600_ERR("Too many stream outputs: %d\n", so->num_outputs);
r = -EINVAL;
goto out_err;
/* Initialize locations where the outputs are stored. */
for (i = 0; i < so->num_outputs; i++) {
- so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr;
+ so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr;
+ start_comp[i] = so->output[i].start_component;
/* Lower outputs with dst_offset < start_component.
*
* We can only output 4D vectors with a write mask, e.g. we can
if (r)
return r;
}
- so->output[i].start_component = 0;
+ start_comp[i] = 0;
so_gpr[i] = tmp;
}
}
for (i = 0; i < so->num_outputs; i++) {
struct r600_bytecode_output output;
+ if (stream != -1 && stream != so->output[i].output_buffer)
+ continue;
+
memset(&output, 0, sizeof(struct r600_bytecode_output));
output.gpr = so_gpr[i];
- output.elem_size = so->output[i].num_components;
- output.array_base = so->output[i].dst_offset - so->output[i].start_component;
+ output.elem_size = so->output[i].num_components - 1;
+ if (output.elem_size == 2)
+ output.elem_size = 3; // 3 not supported, write 4 with junk at end
+ output.array_base = so->output[i].dst_offset - start_comp[i];
output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
output.burst_count = 1;
/* array_size is an upper limit for the burst_count
* with MEM_STREAM instructions */
output.array_size = 0xFFF;
- output.comp_mask = ((1 << so->output[i].num_components) - 1) << so->output[i].start_component;
+ output.comp_mask = ((1 << so->output[i].num_components) - 1) << start_comp[i];
+
if (ctx->bc->chip_class >= EVERGREEN) {
switch (so->output[i].output_buffer) {
case 0:
output.op = CF_OP_MEM_STREAM0_BUF3;
break;
}
+ output.op += so->output[i].stream * 4;
+ assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3);
+ ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer) << so->output[i].stream * 4;
} else {
switch (so->output[i].output_buffer) {
case 0:
output.op = CF_OP_MEM_STREAM3;
break;
}
+ ctx->enabled_stream_buffers_mask |= 1 << so->output[i].output_buffer;
}
r = r600_bytecode_add_output(ctx->bc, &output);
if (r)
struct r600_bytecode_output output;
struct r600_bytecode_cf *cf_jump, *cf_pop,
*last_exp_pos = NULL, *last_exp_param = NULL;
- int i, next_clip_pos = 61, next_param = 0;
+ int i, j, next_clip_pos = 61, next_param = 0;
+ int ring;
cshader = calloc(1, sizeof(struct r600_pipe_shader));
if (!cshader)
ctx.bc->isa = rctx->isa;
+ cf_jump = NULL;
+ memset(cshader->shader.ring_item_sizes, 0, sizeof(cshader->shader.ring_item_sizes));
+
/* R0.x = R0.x & 0x3fffffff */
memset(&alu, 0, sizeof(alu));
alu.op = ALU_OP2_AND_INT;
alu.last = 1;
r600_bytecode_add_alu(ctx.bc, &alu);
- /* PRED_SETE_INT __, R0.y, 0 */
- memset(&alu, 0, sizeof(alu));
- alu.op = ALU_OP2_PRED_SETE_INT;
- alu.src[0].chan = 1;
- alu.src[1].sel = V_SQ_ALU_SRC_0;
- alu.execute_mask = 1;
- alu.update_pred = 1;
- alu.last = 1;
- r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE);
-
- r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP);
- cf_jump = ctx.bc->cf_last;
-
/* fetch vertex data from GSVS ring */
for (i = 0; i < ocnt; ++i) {
struct r600_shader_io *out = &ctx.shader->output[i];
+
out->gpr = i + 1;
out->ring_offset = i * 16;
memset(&vtx, 0, sizeof(vtx));
vtx.op = FETCH_OP_VFETCH;
vtx.buffer_id = R600_GS_RING_CONST_BUFFER;
- vtx.fetch_type = 2;
+ vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
vtx.offset = out->ring_offset;
vtx.dst_gpr = out->gpr;
+ vtx.src_gpr = 0;
vtx.dst_sel_x = 0;
vtx.dst_sel_y = 1;
vtx.dst_sel_z = 2;
r600_bytecode_add_vtx(ctx.bc, &vtx);
}
+ ctx.temp_reg = i + 1;
+ for (ring = 3; ring >= 0; --ring) {
+ bool enabled = false;
+ for (i = 0; i < so->num_outputs; i++) {
+ if (so->output[i].stream == ring) {
+ enabled = true;
+ break;
+ }
+ }
+ if (ring != 0 && !enabled) {
+ cshader->shader.ring_item_sizes[ring] = 0;
+ continue;
+ }
+
+ if (cf_jump) {
+ // Patch up jump label
+ r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP);
+ cf_pop = ctx.bc->cf_last;
- /* XXX handle clipvertex, streamout? */
- emit_streamout(&ctx, so);
+ cf_jump->cf_addr = cf_pop->id + 2;
+ cf_jump->pop_count = 1;
+ cf_pop->cf_addr = cf_pop->id + 2;
+ cf_pop->pop_count = 1;
+ }
+
+ /* PRED_SETE_INT __, R0.y, ring */
+ memset(&alu, 0, sizeof(alu));
+ alu.op = ALU_OP2_PRED_SETE_INT;
+ alu.src[0].chan = 1;
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = ring;
+ alu.execute_mask = 1;
+ alu.update_pred = 1;
+ alu.last = 1;
+ r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE);
+
+ r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP);
+ cf_jump = ctx.bc->cf_last;
+
+ if (enabled)
+ emit_streamout(&ctx, so, ring, &cshader->shader.ring_item_sizes[ring]);
+ cshader->shader.ring_item_sizes[ring] = ocnt * 16;
+ }
/* export vertex data */
/* XXX factor out common code with r600_shader_from_tgsi ? */
for (i = 0; i < ocnt; ++i) {
struct r600_shader_io *out = &ctx.shader->output[i];
-
+ bool instream0 = true;
if (out->name == TGSI_SEMANTIC_CLIPVERTEX)
continue;
+ for (j = 0; j < so->num_outputs; j++) {
+ if (so->output[j].register_index == i) {
+ if (so->output[j].stream == 0)
+ break;
+ if (so->output[j].stream > 0)
+ instream0 = false;
+ }
+ }
+ if (!instream0)
+ continue;
memset(&output, 0, sizeof(output));
output.gpr = out->gpr;
output.elem_size = 3;
}
gs->gs_copy_shader = cshader;
+ cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
ctx.bc->nstack = 1;
- cshader->shader.ring_item_size = ocnt * 16;
return r600_bytecode_build(ctx.bc);
}
-static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind)
+static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind)
{
struct r600_bytecode_output output;
int i, k, ring_offset;
+ int effective_stream = stream == -1 ? 0 : stream;
+ int idx = 0;
for (i = 0; i < ctx->shader->noutput; i++) {
if (ctx->gs_for_vs) {
if (ring_offset == -1)
continue;
- } else
- ring_offset = i * 16;
+ } else {
+ ring_offset = idx * 16;
+ idx++;
+ }
+ if (stream > 0 && ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION)
+ continue;
/* next_ring_offset after parsing input decls contains total size of
* single vertex data, gs_next_vertex - current vertex index */
if (!ind)
ring_offset += ctx->gs_out_ring_offset * ctx->gs_next_vertex;
- /* get a temp and add the ring offset to the next vertex base in the shader */
memset(&output, 0, sizeof(struct r600_bytecode_output));
output.gpr = ctx->shader->output[i].gpr;
output.elem_size = 3;
output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
else
output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
- output.op = CF_OP_MEM_RING;
+ switch (stream) {
+ default:
+ case 0:
+ output.op = CF_OP_MEM_RING; break;
+ case 1:
+ output.op = CF_OP_MEM_RING1; break;
+ case 2:
+ output.op = CF_OP_MEM_RING2; break;
+ case 3:
+ output.op = CF_OP_MEM_RING3; break;
+ }
if (ind) {
output.array_base = ring_offset >> 2; /* in dwords */
output.array_size = 0xfff;
- output.index_gpr = ctx->gs_export_gpr_treg;
+ output.index_gpr = ctx->gs_export_gpr_tregs[effective_stream];
} else
output.array_base = ring_offset >> 2; /* in dwords */
r600_bytecode_add_output(ctx->bc, &output);
}
if (ind) {
+ /* get a temp and add the ring offset to the next vertex base in the shader */
struct r600_bytecode_alu alu;
int r;
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP2_ADD_INT;
- alu.src[0].sel = ctx->gs_export_gpr_treg;
+ alu.src[0].sel = ctx->gs_export_gpr_tregs[effective_stream];
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[1].value = ctx->gs_out_ring_offset >> 4;
- alu.dst.sel = ctx->gs_export_gpr_treg;
+ alu.dst.sel = ctx->gs_export_gpr_tregs[effective_stream];
alu.dst.write = 1;
alu.last = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
static int r600_shader_from_tgsi(struct r600_context *rctx,
struct r600_pipe_shader *pipeshader,
- struct r600_shader_key key)
+ union r600_shader_key key)
{
struct r600_screen *rscreen = rctx->screen;
struct r600_shader *shader = &pipeshader->shader;
struct tgsi_token *tokens = pipeshader->selector->tokens;
struct pipe_stream_output_info so = pipeshader->selector->so;
struct tgsi_full_immediate *immediate;
- struct tgsi_full_property *property;
struct r600_shader_ctx ctx;
struct r600_bytecode_output output[32];
unsigned output_done, noutput;
unsigned opcode;
int i, j, k, r = 0;
int next_param_base = 0, next_clip_base;
- int max_color_exports = MAX2(key.nr_cbufs, 1);
+ int max_color_exports = MAX2(key.ps.nr_cbufs, 1);
/* Declarations used by llvm code */
bool use_llvm = false;
bool indirect_gprs;
ctx.shader = shader;
ctx.native_integers = true;
- shader->vs_as_es = key.vs_as_es;
r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family,
rscreen->has_compressed_msaa_texturing);
ctx.tokens = tokens;
tgsi_scan_shader(tokens, &ctx.info);
shader->indirect_files = ctx.info.indirect_files;
+
+ shader->uses_doubles = ctx.info.uses_doubles;
+
indirect_gprs = ctx.info.indirect_files & ~(1 << TGSI_FILE_CONSTANT);
tgsi_parse_init(&ctx.parse, tokens);
- ctx.type = ctx.parse.FullHeader.Processor.Processor;
+ ctx.type = ctx.info.processor;
shader->processor_type = ctx.type;
ctx.bc->type = shader->processor_type;
- ring_outputs = key.vs_as_es || (ctx.type == TGSI_PROCESSOR_GEOMETRY);
+ if (ctx.type == TGSI_PROCESSOR_VERTEX) {
+ shader->vs_as_gs_a = key.vs.as_gs_a;
+ shader->vs_as_es = key.vs.as_es;
+ }
+
+ ring_outputs = shader->vs_as_es || ctx.type == TGSI_PROCESSOR_GEOMETRY;
- if (key.vs_as_es) {
+ if (shader->vs_as_es) {
ctx.gs_for_vs = &rctx->gs_shader->current->shader;
} else {
ctx.gs_for_vs = NULL;
ctx.next_ring_offset = 0;
ctx.gs_out_ring_offset = 0;
ctx.gs_next_vertex = 0;
+ ctx.gs_stream_output_info = &so;
shader->uses_index_registers = false;
ctx.face_gpr = -1;
shader->nr_ps_color_exports = 0;
shader->nr_ps_max_color_exports = 0;
- shader->two_side = key.color_two_side;
+ if (ctx.type == TGSI_PROCESSOR_FRAGMENT)
+ shader->two_side = key.ps.color_two_side;
/* register allocations */
/* Values [0,127] correspond to GPR[0..127].
ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
+ ctx.bc->index_reg[0] = ctx.bc->ar_reg + 1;
+ ctx.bc->index_reg[1] = ctx.bc->ar_reg + 2;
+
if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
- ctx.gs_export_gpr_treg = ctx.bc->ar_reg + 1;
- ctx.temp_reg = ctx.bc->ar_reg + 2;
- ctx.bc->index_reg[0] = ctx.bc->ar_reg + 3;
- ctx.bc->index_reg[1] = ctx.bc->ar_reg + 4;
+ ctx.gs_export_gpr_tregs[0] = ctx.bc->ar_reg + 3;
+ ctx.gs_export_gpr_tregs[1] = ctx.bc->ar_reg + 4;
+ ctx.gs_export_gpr_tregs[2] = ctx.bc->ar_reg + 5;
+ ctx.gs_export_gpr_tregs[3] = ctx.bc->ar_reg + 6;
+ ctx.temp_reg = ctx.bc->ar_reg + 7;
} else {
- ctx.temp_reg = ctx.bc->ar_reg + 1;
- ctx.bc->index_reg[0] = ctx.bc->ar_reg + 2;
- ctx.bc->index_reg[1] = ctx.bc->ar_reg + 3;
+ ctx.temp_reg = ctx.bc->ar_reg + 3;
}
+ shader->max_arrays = 0;
+ shader->num_arrays = 0;
if (indirect_gprs) {
- shader->max_arrays = 0;
- shader->num_arrays = 0;
if (ctx.info.indirect_files & (1 << TGSI_FILE_INPUT)) {
r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_INPUT],
ctx.nliterals = 0;
ctx.literals = NULL;
- shader->fs_write_all = FALSE;
+
+ shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS];
+ shader->vs_position_window_space = ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
+
+ if (shader->vs_as_gs_a)
+ vs_add_primid_output(&ctx, key.vs.prim_id_out);
+
while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
tgsi_parse_token(&ctx.parse);
switch (ctx.parse.FullToken.Token.Type) {
goto out_err;
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
- break;
case TGSI_TOKEN_TYPE_PROPERTY:
- property = &ctx.parse.FullToken.FullProperty;
- switch (property->Property.PropertyName) {
- case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
- if (property->u[0].Data == 1)
- shader->fs_write_all = TRUE;
- break;
- case TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION:
- if (property->u[0].Data == 1)
- shader->vs_position_window_space = TRUE;
- break;
- case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
- /* we don't need this one */
- break;
- case TGSI_PROPERTY_GS_INPUT_PRIM:
- shader->gs_input_prim = property->u[0].Data;
- break;
- case TGSI_PROPERTY_GS_OUTPUT_PRIM:
- shader->gs_output_prim = property->u[0].Data;
- break;
- case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
- shader->gs_max_out_vertices = property->u[0].Data;
- break;
- case TGSI_PROPERTY_GS_INVOCATIONS:
- shader->gs_num_invocations = property->u[0].Data;
- break;
- }
break;
default:
R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
}
}
- shader->ring_item_size = ctx.next_ring_offset;
+ shader->ring_item_sizes[0] = ctx.next_ring_offset;
+ shader->ring_item_sizes[1] = 0;
+ shader->ring_item_sizes[2] = 0;
+ shader->ring_item_sizes[3] = 0;
/* Process two side if needed */
if (shader->two_side && ctx.colors_used) {
radeon_llvm_ctx.chip_class = ctx.bc->chip_class;
radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN);
radeon_llvm_ctx.stream_outputs = &so;
- radeon_llvm_ctx.clip_vertex = ctx.cv_output;
- radeon_llvm_ctx.alpha_to_one = key.alpha_to_one;
+ radeon_llvm_ctx.alpha_to_one = key.ps.alpha_to_one;
radeon_llvm_ctx.has_compressed_msaa_texturing =
ctx.bc->has_compressed_msaa_texturing;
mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
struct r600_bytecode_alu alu;
int r;
-
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP1_MOV;
- alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
- alu.src[0].value = 0;
- alu.dst.sel = ctx.gs_export_gpr_treg;
- alu.dst.write = 1;
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx.bc, &alu);
- if (r)
- return r;
+ for (j = 0; j < 4; j++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[0].value = 0;
+ alu.dst.sel = ctx.gs_export_gpr_tregs[j];
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx.bc, &alu);
+ if (r)
+ return r;
+ }
}
if (shader->two_side && ctx.colors_used) {
if ((r = process_twoside_color_inputs(&ctx)))
/* Add stream outputs. */
if (!ring_outputs && ctx.type == TGSI_PROCESSOR_VERTEX &&
so.num_outputs && !use_llvm)
- emit_streamout(&ctx, &so);
+ emit_streamout(&ctx, &so, -1, NULL);
+ pipeshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
convert_edgeflag_to_int(&ctx);
if (ring_outputs) {
- if (key.vs_as_es)
- emit_gs_ring_writes(&ctx, FALSE);
+ if (shader->vs_as_es) {
+ ctx.gs_export_gpr_tregs[0] = r600_get_temp(&ctx);
+ ctx.gs_export_gpr_tregs[1] = -1;
+ ctx.gs_export_gpr_tregs[2] = -1;
+ ctx.gs_export_gpr_tregs[3] = -1;
+
+ emit_gs_ring_writes(&ctx, &so, -1, FALSE);
+ }
} else {
/* Export output */
next_clip_base = shader->vs_out_misc_write ? 62 : 61;
output[j].swizzle_z = 4; /* 0 */
output[j].swizzle_w = 5; /* 1 */
break;
+ case TGSI_SEMANTIC_PRIMID:
+ output[j].swizzle_x = 2;
+ output[j].swizzle_y = 4; /* 0 */
+ output[j].swizzle_z = 4; /* 0 */
+ output[j].swizzle_w = 4; /* 0 */
+ break;
}
+
break;
case TGSI_PROCESSOR_FRAGMENT:
if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
j--;
continue;
}
- output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
+ output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3;
output[j].array_base = shader->output[i].sid;
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
shader->nr_ps_color_exports++;
output[j].swizzle_x = 0;
output[j].swizzle_y = 1;
output[j].swizzle_z = 2;
- output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
+ output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3;
output[j].burst_count = 1;
output[j].array_base = k;
output[j].op = CF_OP_EXPORT;
output[j].array_base = 0;
output[j].op = CF_OP_EXPORT;
j++;
+ shader->nr_ps_color_exports++;
}
noutput = j;
static int tgsi_unsupported(struct r600_shader_ctx *ctx)
{
+ const unsigned tgsi_opcode =
+ ctx->parse.FullToken.FullInstruction.Instruction.Opcode;
R600_ERR("%s tgsi opcode unsupported\n",
- tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
+ tgsi_get_opcode_name(tgsi_opcode));
return -EINVAL;
}
return lasti;
}
+
+
+static int tgsi_op2_64_params(struct r600_shader_ctx *ctx, bool singledest, bool swap)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ unsigned write_mask = inst->Dst[0].Register.WriteMask;
+ struct r600_bytecode_alu alu;
+ int i, j, r, lasti = tgsi_last_instruction(write_mask);
+ int use_tmp = 0;
+
+ if (singledest) {
+ switch (write_mask) {
+ case 0x1:
+ write_mask = 0x3;
+ break;
+ case 0x2:
+ use_tmp = 1;
+ write_mask = 0x3;
+ break;
+ case 0x4:
+ write_mask = 0xc;
+ break;
+ case 0x8:
+ write_mask = 0xc;
+ use_tmp = 3;
+ break;
+ }
+ }
+
+ lasti = tgsi_last_instruction(write_mask);
+ for (i = 0; i <= lasti; i++) {
+
+ if (!(write_mask & (1 << i)))
+ continue;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+
+ if (singledest) {
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (use_tmp) {
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ }
+ if (i == 1 || i == 3)
+ alu.dst.write = 0;
+ } else
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+
+ alu.op = ctx->inst_info->op;
+ if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DABS) {
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+ } else if (!swap) {
+ for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+ r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i));
+ }
+ } else {
+ r600_bytecode_src(&alu.src[0], &ctx->src[1], fp64_switch(i));
+ r600_bytecode_src(&alu.src[1], &ctx->src[0], fp64_switch(i));
+ }
+
+ /* handle some special cases */
+ if (i == 1 || i == 3) {
+ switch (ctx->parse.FullToken.FullInstruction.Instruction.Opcode) {
+ case TGSI_OPCODE_SUB:
+ r600_bytecode_src_toggle_neg(&alu.src[1]);
+ break;
+ case TGSI_OPCODE_DABS:
+ r600_bytecode_src_set_abs(&alu.src[0]);
+ break;
+ default:
+ break;
+ }
+ }
+ if (i == lasti) {
+ alu.last = 1;
+ }
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ if (use_tmp) {
+ write_mask = inst->Dst[0].Register.WriteMask;
+
+ /* move result from temp to dst */
+ for (i = 0; i <= lasti; i++) {
+ if (!(write_mask & (1 << i)))
+ continue;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = use_tmp - 1;
+ alu.last = (i == lasti);
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ }
+ return 0;
+}
+
+static int tgsi_op2_64(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ unsigned write_mask = inst->Dst[0].Register.WriteMask;
+ /* confirm writemasking */
+ if ((write_mask & 0x3) != 0x3 &&
+ (write_mask & 0xc) != 0xc) {
+ fprintf(stderr, "illegal writemask for 64-bit: 0x%x\n", write_mask);
+ return -1;
+ }
+ return tgsi_op2_64_params(ctx, false, false);
+}
+
+static int tgsi_op2_64_single_dest(struct r600_shader_ctx *ctx)
+{
+ return tgsi_op2_64_params(ctx, true, false);
+}
+
+static int tgsi_op2_64_single_dest_s(struct r600_shader_ctx *ctx)
+{
+ return tgsi_op2_64_params(ctx, true, true);
+}
+
+static int tgsi_op3_64(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ int i, j, r;
+ int lasti = 3;
+ int tmp = r600_get_temp(ctx);
+
+ for (i = 0; i < lasti + 1; i++) {
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ctx->inst_info->op;
+ for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+ r600_bytecode_src(&alu.src[j], &ctx->src[j], i == 3 ? 0 : 1);
+ }
+
+ if (inst->Dst[0].Register.WriteMask & (1 << i))
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ else
+ alu.dst.sel = tmp;
+
+ alu.dst.chan = i;
+ alu.is_op3 = 1;
+ if (i == lasti) {
+ alu.last = 1;
+ }
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
}
/* handle some special cases */
- switch (ctx->inst_info->tgsi_opcode) {
+ switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_SUB:
r600_bytecode_src_toggle_neg(&alu.src[1]);
break;
return r;
}
- if (use_tmp) {
- /* move result from temp to dst */
- for (i = 0; i <= lasti; i++) {
- if (!(write_mask & (1 << i)))
- continue;
-
+ if (use_tmp) {
+ /* move result from temp to dst */
+ for (i = 0; i <= lasti; i++) {
+ if (!(write_mask & (1 << i)))
+ continue;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = i;
+ alu.last = (i == lasti);
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ }
+ return 0;
+}
+
+static int tgsi_op2(struct r600_shader_ctx *ctx)
+{
+ return tgsi_op2_s(ctx, 0, 0);
+}
+
+static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
+{
+ return tgsi_op2_s(ctx, 1, 0);
+}
+
+static int tgsi_op2_trans(struct r600_shader_ctx *ctx)
+{
+ return tgsi_op2_s(ctx, 0, 1);
+}
+
+static int tgsi_ineg(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ int i, r;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+
+ for (i = 0; i < lasti + 1; i++) {
+
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ctx->inst_info->op;
+
+ alu.src[0].sel = V_SQ_ALU_SRC_0;
+
+ r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
+
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+
+ if (i == lasti) {
+ alu.last = 1;
+ }
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+
+}
+
+static int tgsi_dneg(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ int i, r;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+
+ for (i = 0; i < lasti + 1; i++) {
+
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+
+ if (i == 1 || i == 3)
+ r600_bytecode_src_toggle_neg(&alu.src[0]);
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+
+ if (i == lasti) {
+ alu.last = 1;
+ }
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+
+}
+
+static int tgsi_dfracexp(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ unsigned write_mask = inst->Dst[0].Register.WriteMask;
+ int i, j, r;
+ int firsti = write_mask == 0xc ? 2 : 0;
+
+ for (i = 0; i <= 3; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ctx->inst_info->op;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+ r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i));
+ }
+
+ if (i == 3)
+ alu.last = 1;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ /* MOV first two channels to writemask dst0 */
+ for (i = 0; i <= 1; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].chan = i + 2;
+ alu.src[0].sel = ctx->temp_reg;
+
+ tgsi_dst(ctx, &inst->Dst[0], firsti + i, &alu.dst);
+ alu.dst.write = (inst->Dst[0].Register.WriteMask >> (firsti + i)) & 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i <= 3; i++) {
+ if (inst->Dst[1].Register.WriteMask & (1 << i)) {
+ /* MOV third channels to writemask dst1 */
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_MOV;
- tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.src[0].chan = 1;
alu.src[0].sel = ctx->temp_reg;
- alu.src[0].chan = i;
- alu.last = (i == lasti);
+ tgsi_dst(ctx, &inst->Dst[1], i, &alu.dst);
+ alu.last = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
+ break;
}
}
return 0;
}
-static int tgsi_op2(struct r600_shader_ctx *ctx)
-{
- return tgsi_op2_s(ctx, 0, 0);
-}
-
-static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
-{
- return tgsi_op2_s(ctx, 1, 0);
-}
-
-static int tgsi_op2_trans(struct r600_shader_ctx *ctx)
-{
- return tgsi_op2_s(ctx, 0, 1);
-}
-
-static int tgsi_ineg(struct r600_shader_ctx *ctx)
+static int cayman_emit_double_instr(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bytecode_alu alu;
int i, r;
+ struct r600_bytecode_alu alu;
+ int last_slot = 3;
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+ int t1 = ctx->temp_reg;
- for (i = 0; i < lasti + 1; i++) {
-
- if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
- continue;
+ /* these have to write the result to X/Y by the looks of it */
+ for (i = 0 ; i < last_slot; i++) {
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ctx->inst_info->op;
- alu.src[0].sel = V_SQ_ALU_SRC_0;
+ /* should only be one src regs */
+ assert (inst->Instruction.NumSrcRegs == 1);
- r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
+ r600_bytecode_src(&alu.src[1], &ctx->src[0], 0);
- tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ /* RSQ should take the absolute value of src */
+ if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DRSQ ||
+ ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DSQRT) {
+ r600_bytecode_src_set_abs(&alu.src[1]);
+ }
+ alu.dst.sel = t1;
+ alu.dst.chan = i;
+ alu.dst.write = (i == 0 || i == 1);
- if (i == lasti) {
+ if (ctx->bc->chip_class != CAYMAN || i == last_slot - 1)
alu.last = 1;
- }
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
}
- return 0;
+ for (i = 0 ; i <= lasti; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = t1;
+ alu.src[0].chan = (i == 0 || i == 2) ? 0 : 1;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.dst.write = 1;
+ if (i == lasti)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
}
static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
/* RSQ should take the absolute value of src */
- if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_RSQ) {
+ if (inst->Instruction.Opcode == TGSI_OPCODE_RSQ) {
r600_bytecode_src_set_abs(&alu.src[j]);
}
}
return 0;
}
+
+static int cayman_mul_double_instr(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ int i, j, k, r;
+ struct r600_bytecode_alu alu;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+ int t1 = ctx->temp_reg;
+
+ for (k = 0; k < 2; k++) {
+ if (!(inst->Dst[0].Register.WriteMask & (0x3 << (k * 2))))
+ continue;
+
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ctx->inst_info->op;
+ for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+ r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));;
+ }
+ alu.dst.sel = t1;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ }
+
+ for (i = 0; i <= lasti; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = t1;
+ alu.src[0].chan = i;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.dst.write = 1;
+ if (i == lasti)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
/*
* r600 - trunc to -PI..PI range
* r700 - normalize by dividing by 2PI
static int tgsi_kill(struct r600_shader_ctx *ctx)
{
+ const struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_alu alu;
int i, r;
alu.src[0].sel = V_SQ_ALU_SRC_0;
- if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILL) {
+ if (inst->Instruction.Opcode == TGSI_OPCODE_KILL) {
alu.src[1].sel = V_SQ_ALU_SRC_1;
alu.src[1].neg = 1;
} else {
return 0;
}
+static int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx,
+ unsigned temp, int chan,
+ struct r600_bytecode_alu_src *bc_src,
+ const struct r600_shader_src *shader_src)
+{
+ struct r600_bytecode_alu alu;
+ int r;
+
+ r600_bytecode_src(bc_src, shader_src, chan);
+
+ /* op3 operands don't support abs modifier */
+ if (bc_src->abs) {
+ assert(temp!=0); /* we actually need the extra register, make sure it is allocated. */
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.dst.sel = temp;
+ alu.dst.chan = chan;
+ alu.dst.write = 1;
+
+ alu.src[0] = *bc_src;
+ alu.last = true; // sufficient?
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ memset(bc_src, 0, sizeof(*bc_src));
+ bc_src->sel = temp;
+ bc_src->chan = chan;
+ }
+ return 0;
+}
+
static int tgsi_op3(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_alu alu;
int i, j, r;
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+ int temp_regs[4];
+ for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+ temp_regs[j] = 0;
+ if (ctx->src[j].abs)
+ temp_regs[j] = r600_get_temp(ctx);
+ }
for (i = 0; i < lasti + 1; i++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ctx->inst_info->op;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
- r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
+ r = tgsi_make_src_for_op3(ctx, temp_regs[j], i, &alu.src[j], &ctx->src[j]);
+ if (r)
+ return r;
}
tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
/* handle some special cases */
- switch (ctx->inst_info->tgsi_opcode) {
+ switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_DP2:
if (i > 1) {
alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
inst->Src[index].Register.File != TGSI_FILE_INPUT &&
inst->Src[index].Register.File != TGSI_FILE_OUTPUT) ||
- ctx->src[index].neg || ctx->src[index].abs;
+ ctx->src[index].neg || ctx->src[index].abs ||
+ (inst->Src[index].Register.File == TGSI_FILE_INPUT && ctx->type == TGSI_PROCESSOR_GEOMETRY);
}
static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
memset(&vtx, 0, sizeof(vtx));
vtx.op = FETCH_OP_VFETCH;
vtx.buffer_id = id + R600_MAX_CONST_BUFFERS;
- vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
+ vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
vtx.src_gpr = src_gpr;
vtx.mega_fetch_count = 16;
vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
alu.op = ALU_OP1_MOV;
if (ctx->bc->chip_class >= EVERGREEN) {
- alu.src[0].sel = 512 + (id / 4);
- alu.src[0].chan = id % 4;
+ /* channel 0 or 2 of each word */
+ alu.src[0].sel = 512 + (id / 2);
+ alu.src[0].chan = (id % 2) * 2;
} else {
/* r600 we have them at channel 2 of the second dword */
alu.src[0].sel = 512 + (id * 2) + 1;
inst->Instruction.Opcode == TGSI_OPCODE_TG4)
sampler_src_reg = 2;
+ /* TGSI moves the sampler to src reg 3 for TXD */
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TXD)
+ sampler_src_reg = 3;
+
+ sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
+ if (sampler_index_mode)
+ ctx->shader->uses_index_registers = true;
+
src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
}
}
- if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
- /* TGSI moves the sampler to src reg 3 for TXD */
- sampler_src_reg = 3;
-
- sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
-
- for (i = 1; i < 3; i++) {
- /* set gradients h/v */
- memset(&tex, 0, sizeof(struct r600_bytecode_tex));
- tex.op = (i == 1) ? FETCH_OP_SET_GRADIENTS_H :
- FETCH_OP_SET_GRADIENTS_V;
- tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
- tex.sampler_index_mode = sampler_index_mode;
- tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
- tex.resource_index_mode = sampler_index_mode;
-
- if (tgsi_tex_src_requires_loading(ctx, i)) {
- tex.src_gpr = r600_get_temp(ctx);
- tex.src_sel_x = 0;
- tex.src_sel_y = 1;
- tex.src_sel_z = 2;
- tex.src_sel_w = 3;
-
- for (j = 0; j < 4; j++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP1_MOV;
- r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
- alu.dst.sel = tex.src_gpr;
- alu.dst.chan = j;
- if (j == 3)
- alu.last = 1;
- alu.dst.write = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
-
- } else {
- tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
- tex.src_sel_x = ctx->src[i].swizzle[0];
- tex.src_sel_y = ctx->src[i].swizzle[1];
- tex.src_sel_z = ctx->src[i].swizzle[2];
- tex.src_sel_w = ctx->src[i].swizzle[3];
- tex.src_rel = ctx->src[i].rel;
- }
- tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
- tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
- if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
- tex.coord_type_x = 1;
- tex.coord_type_y = 1;
- tex.coord_type_z = 1;
- tex.coord_type_w = 1;
- }
- r = r600_bytecode_add_tex(ctx->bc, &tex);
- if (r)
- return r;
- }
- } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
int out_chan;
/* Add perspective divide */
if (ctx->bc->chip_class == CAYMAN) {
src_gpr = ctx->temp_reg;
}
- sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
- if (sampler_index_mode)
- ctx->shader->uses_index_registers = true;
if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
src_gpr = ctx->temp_reg;
}
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
+ int temp_h = 0, temp_v = 0;
+ int start_val = 0;
+
+ /* if we've already loaded the src (i.e. CUBE don't reload it). */
+ if (src_loaded == TRUE)
+ start_val = 1;
+ else
+ src_loaded = TRUE;
+ for (i = start_val; i < 3; i++) {
+ int treg = r600_get_temp(ctx);
+
+ if (i == 0)
+ src_gpr = treg;
+ else if (i == 1)
+ temp_h = treg;
+ else
+ temp_v = treg;
+
+ for (j = 0; j < 4; j++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
+ alu.dst.sel = treg;
+ alu.dst.chan = j;
+ if (j == 3)
+ alu.last = 1;
+ alu.dst.write = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ }
+ for (i = 1; i < 3; i++) {
+ /* set gradients h/v */
+ memset(&tex, 0, sizeof(struct r600_bytecode_tex));
+ tex.op = (i == 1) ? FETCH_OP_SET_GRADIENTS_H :
+ FETCH_OP_SET_GRADIENTS_V;
+ tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
+ tex.sampler_index_mode = sampler_index_mode;
+ tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
+ tex.resource_index_mode = sampler_index_mode;
+
+ tex.src_gpr = (i == 1) ? temp_h : temp_v;
+ tex.src_sel_x = 0;
+ tex.src_sel_y = 1;
+ tex.src_sel_z = 2;
+ tex.src_sel_w = 3;
+
+ tex.dst_gpr = r600_get_temp(ctx); /* just to avoid confusing the asm scheduler */
+ tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
+ if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
+ tex.coord_type_x = 1;
+ tex.coord_type_y = 1;
+ tex.coord_type_z = 1;
+ tex.coord_type_w = 1;
+ }
+ r = r600_bytecode_add_tex(ctx->bc, &tex);
+ if (r)
+ return r;
+ }
+ }
+
if (src_requires_loading && !src_loaded) {
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
/* texture offsets do not apply to other texture targets */
}
} else {
- offset_x = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
- offset_y = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
- offset_z = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
+ switch (inst->Texture.Texture) {
+ case TGSI_TEXTURE_3D:
+ offset_z = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
+ /* fallthrough */
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_RECT:
+ case TGSI_TEXTURE_SHADOWRECT:
+ case TGSI_TEXTURE_2D_ARRAY:
+ case TGSI_TEXTURE_SHADOW2D_ARRAY:
+ offset_y = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
+ /* fallthrough */
+ case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_SHADOW1D:
+ case TGSI_TEXTURE_1D_ARRAY:
+ case TGSI_TEXTURE_SHADOW1D_ARRAY:
+ offset_x = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
+ }
}
}
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_MOV;
- alu.src[0].sel = 512 + (id / 4);
- alu.src[0].kc_bank = R600_TXQ_CONST_BUFFER;
- alu.src[0].chan = id % 4;
+ if (ctx->bc->chip_class >= EVERGREEN) {
+ /* channel 1 or 3 of each word */
+ alu.src[0].sel = 512 + (id / 2);
+ alu.src[0].chan = ((id % 2) * 2) + 1;
+ } else {
+ /* r600 we have them at channel 2 of the second dword */
+ alu.src[0].sel = 512 + (id * 2) + 1;
+ alu.src[0].chan = 2;
+ }
+ alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
alu.last = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
int8_t texture_component_select = ctx->literals[4 * inst->Src[1].Register.Index + inst->Src[1].Register.SwizzleX];
tex.inst_mod = texture_component_select;
+ if (ctx->bc->chip_class == CAYMAN) {
/* GATHER4 result order is different from TGSI TG4 */
- tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
- tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
- tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
- tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
+ tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 0 : 7;
+ tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 1 : 7;
+ tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 2 : 7;
+ tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
+ } else {
+ tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
+ tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
+ tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
+ tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
+ }
}
else if (inst->Instruction.Opcode == TGSI_OPCODE_LODQ) {
tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_alu alu;
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
- unsigned i;
+ unsigned i, temp_regs[2];
int r;
/* optimize if it's just an equal balance */
}
/* src0 * src1 + (1 - src0) * src2 */
+ if (ctx->src[0].abs)
+ temp_regs[0] = r600_get_temp(ctx);
+ else
+ temp_regs[0] = 0;
+ if (ctx->src[1].abs)
+ temp_regs[1] = r600_get_temp(ctx);
+ else
+ temp_regs[1] = 0;
+
for (i = 0; i < lasti + 1; i++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP3_MULADD;
alu.is_op3 = 1;
- r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
- r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
+ r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]);
+ if (r)
+ return r;
+ r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[1], &ctx->src[1]);
+ if (r)
+ return r;
alu.src[2].sel = ctx->temp_reg;
alu.src[2].chan = i;
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_alu alu;
- int i, r;
+ int i, r, j;
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+ int temp_regs[3];
+
+ for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+ temp_regs[j] = 0;
+ if (ctx->src[j].abs)
+ temp_regs[j] = r600_get_temp(ctx);
+ }
for (i = 0; i < lasti + 1; i++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP3_CNDGE;
- r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
- r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
- r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
+ r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]);
+ if (r)
+ return r;
+ r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[1], &ctx->src[2]);
+ if (r)
+ return r;
+ r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[2], &ctx->src[1]);
+ if (r)
+ return r;
tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
alu.dst.write = 1;
continue;
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP3_CNDGE_INT;
+ alu.op = ALU_OP3_CNDE_INT;
r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
static int tgsi_gs_emit(struct r600_shader_ctx *ctx)
{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ int stream = ctx->literals[inst->Src[0].Register.Index * 4 + inst->Src[0].Register.SwizzleX];
+ int r;
+
if (ctx->inst_info->op == CF_OP_EMIT_VERTEX)
- emit_gs_ring_writes(ctx, TRUE);
+ emit_gs_ring_writes(ctx, ctx->gs_stream_output_info, stream, TRUE);
- return r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
+ r = r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
+ if (!r)
+ ctx->bc->cf_last->count = stream; // Count field for CUT/EMIT_VERTEX indicates which stream
+ return r;
}
static int tgsi_umad(struct r600_shader_ctx *ctx)
return 0;
}
-static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
- {TGSI_OPCODE_ARL, 0, ALU_OP0_NOP, tgsi_r600_arl},
- {TGSI_OPCODE_MOV, 0, ALU_OP1_MOV, tgsi_op2},
- {TGSI_OPCODE_LIT, 0, ALU_OP0_NOP, tgsi_lit},
+static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
+ [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_r600_arl},
+ [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2},
+ [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit},
/* XXX:
* For state trackers other than OpenGL, we'll want to use
* _RECIP_IEEE instead.
*/
- {TGSI_OPCODE_RCP, 0, ALU_OP1_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
-
- {TGSI_OPCODE_RSQ, 0, ALU_OP0_NOP, tgsi_rsq},
- {TGSI_OPCODE_EXP, 0, ALU_OP0_NOP, tgsi_exp},
- {TGSI_OPCODE_LOG, 0, ALU_OP0_NOP, tgsi_log},
- {TGSI_OPCODE_MUL, 0, ALU_OP2_MUL, tgsi_op2},
- {TGSI_OPCODE_ADD, 0, ALU_OP2_ADD, tgsi_op2},
- {TGSI_OPCODE_DP3, 0, ALU_OP2_DOT4, tgsi_dp},
- {TGSI_OPCODE_DP4, 0, ALU_OP2_DOT4, tgsi_dp},
- {TGSI_OPCODE_DST, 0, ALU_OP0_NOP, tgsi_opdst},
- {TGSI_OPCODE_MIN, 0, ALU_OP2_MIN, tgsi_op2},
- {TGSI_OPCODE_MAX, 0, ALU_OP2_MAX, tgsi_op2},
- {TGSI_OPCODE_SLT, 0, ALU_OP2_SETGT, tgsi_op2_swap},
- {TGSI_OPCODE_SGE, 0, ALU_OP2_SETGE, tgsi_op2},
- {TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3},
- {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2},
- {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp},
- {TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
- {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
- {22, 0, ALU_OP0_NOP, tgsi_unsupported},
- {23, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_FRC, 0, ALU_OP1_FRACT, tgsi_op2},
- {TGSI_OPCODE_CLAMP, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_FLR, 0, ALU_OP1_FLOOR, tgsi_op2},
- {TGSI_OPCODE_ROUND, 0, ALU_OP1_RNDNE, tgsi_op2},
- {TGSI_OPCODE_EX2, 0, ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
- {TGSI_OPCODE_LG2, 0, ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
- {TGSI_OPCODE_POW, 0, ALU_OP0_NOP, tgsi_pow},
- {TGSI_OPCODE_XPD, 0, ALU_OP0_NOP, tgsi_xpd},
- /* gap */
- {32, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ABS, 0, ALU_OP1_MOV, tgsi_op2},
- {TGSI_OPCODE_RCC, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_DPH, 0, ALU_OP2_DOT4, tgsi_dp},
- {TGSI_OPCODE_COS, 0, ALU_OP1_COS, tgsi_trig},
- {TGSI_OPCODE_DDX, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
- {TGSI_OPCODE_DDY, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
- {TGSI_OPCODE_KILL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */
- {TGSI_OPCODE_PK2H, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_PK2US, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_PK4B, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_PK4UB, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_RFL, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SEQ, 0, ALU_OP2_SETE, tgsi_op2},
- {TGSI_OPCODE_SFL, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SGT, 0, ALU_OP2_SETGT, tgsi_op2},
- {TGSI_OPCODE_SIN, 0, ALU_OP1_SIN, tgsi_trig},
- {TGSI_OPCODE_SLE, 0, ALU_OP2_SETGE, tgsi_op2_swap},
- {TGSI_OPCODE_SNE, 0, ALU_OP2_SETNE, tgsi_op2},
- {TGSI_OPCODE_STR, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TEX, 0, FETCH_OP_SAMPLE, tgsi_tex},
- {TGSI_OPCODE_TXD, 0, FETCH_OP_SAMPLE_G, tgsi_tex},
- {TGSI_OPCODE_TXP, 0, FETCH_OP_SAMPLE, tgsi_tex},
- {TGSI_OPCODE_UP2H, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_UP2US, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_UP4B, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_UP4UB, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_X2D, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ARA, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ARR, 0, ALU_OP0_NOP, tgsi_r600_arl},
- {TGSI_OPCODE_BRA, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_CAL, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_RET, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SSG, 0, ALU_OP0_NOP, tgsi_ssg},
- {TGSI_OPCODE_CMP, 0, ALU_OP0_NOP, tgsi_cmp},
- {TGSI_OPCODE_SCS, 0, ALU_OP0_NOP, tgsi_scs},
- {TGSI_OPCODE_TXB, 0, FETCH_OP_SAMPLE_LB, tgsi_tex},
- {TGSI_OPCODE_NRM, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_DIV, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_DP2, 0, ALU_OP2_DOT4, tgsi_dp},
- {TGSI_OPCODE_TXL, 0, FETCH_OP_SAMPLE_L, tgsi_tex},
- {TGSI_OPCODE_BRK, 0, CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
- {TGSI_OPCODE_IF, 0, ALU_OP0_NOP, tgsi_if},
- {TGSI_OPCODE_UIF, 0, ALU_OP0_NOP, tgsi_uif},
- {76, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ELSE, 0, ALU_OP0_NOP, tgsi_else},
- {TGSI_OPCODE_ENDIF, 0, ALU_OP0_NOP, tgsi_endif},
- {TGSI_OPCODE_DDX_FINE, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_DDY_FINE, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_PUSHA, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_POPA, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_CEIL, 0, ALU_OP1_CEIL, tgsi_op2},
- {TGSI_OPCODE_I2F, 0, ALU_OP1_INT_TO_FLT, tgsi_op2_trans},
- {TGSI_OPCODE_NOT, 0, ALU_OP1_NOT_INT, tgsi_op2},
- {TGSI_OPCODE_TRUNC, 0, ALU_OP1_TRUNC, tgsi_op2},
- {TGSI_OPCODE_SHL, 0, ALU_OP2_LSHL_INT, tgsi_op2_trans},
- /* gap */
- {88, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_AND, 0, ALU_OP2_AND_INT, tgsi_op2},
- {TGSI_OPCODE_OR, 0, ALU_OP2_OR_INT, tgsi_op2},
- {TGSI_OPCODE_MOD, 0, ALU_OP0_NOP, tgsi_imod},
- {TGSI_OPCODE_XOR, 0, ALU_OP2_XOR_INT, tgsi_op2},
- {TGSI_OPCODE_SAD, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex},
- {TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
- {TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
- {TGSI_OPCODE_EMIT, 0, CF_OP_EMIT_VERTEX, tgsi_gs_emit},
- {TGSI_OPCODE_ENDPRIM, 0, CF_OP_CUT_VERTEX, tgsi_gs_emit},
- {TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop},
- {TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop},
- {TGSI_OPCODE_ENDSUB, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TXQ_LZ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
- /* gap */
- {104, 0, ALU_OP0_NOP, tgsi_unsupported},
- {105, 0, ALU_OP0_NOP, tgsi_unsupported},
- {106, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2},
- {TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2},
- {TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap},
- {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap},
- {TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
- {114, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_loop_breakc},
- {TGSI_OPCODE_KILL_IF, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
- {TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */
- /* gap */
- {118, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_F2I, 0, ALU_OP1_FLT_TO_INT, tgsi_op2_trans},
- {TGSI_OPCODE_IDIV, 0, ALU_OP0_NOP, tgsi_idiv},
- {TGSI_OPCODE_IMAX, 0, ALU_OP2_MAX_INT, tgsi_op2},
- {TGSI_OPCODE_IMIN, 0, ALU_OP2_MIN_INT, tgsi_op2},
- {TGSI_OPCODE_INEG, 0, ALU_OP2_SUB_INT, tgsi_ineg},
- {TGSI_OPCODE_ISGE, 0, ALU_OP2_SETGE_INT, tgsi_op2},
- {TGSI_OPCODE_ISHR, 0, ALU_OP2_ASHR_INT, tgsi_op2_trans},
- {TGSI_OPCODE_ISLT, 0, ALU_OP2_SETGT_INT, tgsi_op2_swap},
- {TGSI_OPCODE_F2U, 0, ALU_OP1_FLT_TO_UINT, tgsi_op2_trans},
- {TGSI_OPCODE_U2F, 0, ALU_OP1_UINT_TO_FLT, tgsi_op2_trans},
- {TGSI_OPCODE_UADD, 0, ALU_OP2_ADD_INT, tgsi_op2},
- {TGSI_OPCODE_UDIV, 0, ALU_OP0_NOP, tgsi_udiv},
- {TGSI_OPCODE_UMAD, 0, ALU_OP0_NOP, tgsi_umad},
- {TGSI_OPCODE_UMAX, 0, ALU_OP2_MAX_UINT, tgsi_op2},
- {TGSI_OPCODE_UMIN, 0, ALU_OP2_MIN_UINT, tgsi_op2},
- {TGSI_OPCODE_UMOD, 0, ALU_OP0_NOP, tgsi_umod},
- {TGSI_OPCODE_UMUL, 0, ALU_OP2_MULLO_UINT, tgsi_op2_trans},
- {TGSI_OPCODE_USEQ, 0, ALU_OP2_SETE_INT, tgsi_op2},
- {TGSI_OPCODE_USGE, 0, ALU_OP2_SETGE_UINT, tgsi_op2},
- {TGSI_OPCODE_USHR, 0, ALU_OP2_LSHR_INT, tgsi_op2_trans},
- {TGSI_OPCODE_USLT, 0, ALU_OP2_SETGT_UINT, tgsi_op2_swap},
- {TGSI_OPCODE_USNE, 0, ALU_OP2_SETNE_INT, tgsi_op2_swap},
- {TGSI_OPCODE_SWITCH, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_CASE, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_DEFAULT, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ENDSWITCH, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_UARL, 0, ALU_OP1_MOVA_INT, tgsi_r600_arl},
- {TGSI_OPCODE_UCMP, 0, ALU_OP0_NOP, tgsi_ucmp},
- {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
- {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
- {TGSI_OPCODE_LOAD, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_STORE, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_MFENCE, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_LFENCE, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SFENCE, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_BARRIER, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMUADD, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMXCHG, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMCAS, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMAND, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMOR, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMXOR, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMUMIN, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMUMAX, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMIMIN, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMIMAX, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TEX2, 0, FETCH_OP_SAMPLE, tgsi_tex},
- {TGSI_OPCODE_TXB2, 0, FETCH_OP_SAMPLE_LB, tgsi_tex},
- {TGSI_OPCODE_TXL2, 0, FETCH_OP_SAMPLE_L, tgsi_tex},
- {TGSI_OPCODE_IMUL_HI, 0, ALU_OP2_MULHI_INT, tgsi_op2_trans},
- {TGSI_OPCODE_UMUL_HI, 0, ALU_OP2_MULHI_UINT, tgsi_op2_trans},
- {TGSI_OPCODE_TG4, 0, FETCH_OP_GATHER4, tgsi_unsupported},
- {TGSI_OPCODE_LODQ, 0, FETCH_OP_GET_LOD, tgsi_unsupported},
- {TGSI_OPCODE_IBFE, 1, ALU_OP3_BFE_INT, tgsi_unsupported},
- {TGSI_OPCODE_UBFE, 1, ALU_OP3_BFE_UINT, tgsi_unsupported},
- {TGSI_OPCODE_BFI, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_BREV, 0, ALU_OP1_BFREV_INT, tgsi_unsupported},
- {TGSI_OPCODE_POPC, 0, ALU_OP1_BCNT_INT, tgsi_unsupported},
- {TGSI_OPCODE_LSB, 0, ALU_OP1_FFBL_INT, tgsi_unsupported},
- {TGSI_OPCODE_IMSB, 0, ALU_OP1_FFBH_INT, tgsi_unsupported},
- {TGSI_OPCODE_UMSB, 0, ALU_OP1_FFBH_UINT, tgsi_unsupported},
- {TGSI_OPCODE_INTERP_CENTROID, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_INTERP_SAMPLE, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_INTERP_OFFSET, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_LAST, 0, ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
+
+ [TGSI_OPCODE_RSQ] = { ALU_OP0_NOP, tgsi_rsq},
+ [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp},
+ [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log},
+ [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2},
+ [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2},
+ [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst},
+ [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2},
+ [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2},
+ [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap},
+ [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2},
+ [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3},
+ [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2},
+ [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp},
+ [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
+ [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported},
+ [22] = { ALU_OP0_NOP, tgsi_unsupported},
+ [23] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2},
+ [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2},
+ [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2},
+ [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
+ [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
+ [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow},
+ [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd},
+ [32] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2},
+ [34] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig},
+ [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
+ [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
+ [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */
+ [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported},
+ [44] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2},
+ [46] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2},
+ [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, tgsi_trig},
+ [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap},
+ [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2},
+ [51] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex},
+ [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex},
+ [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex},
+ [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported},
+ [59] = { ALU_OP0_NOP, tgsi_unsupported},
+ [60] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_r600_arl},
+ [62] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg},
+ [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp},
+ [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs},
+ [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
+ [69] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex},
+ [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
+ [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if},
+ [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif},
+ [76] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else},
+ [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif},
+ [TGSI_OPCODE_DDX_FINE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_DDY_FINE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2},
+ [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans},
+ [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2},
+ [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2},
+ [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2_trans},
+ [88] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2},
+ [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2},
+ [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod},
+ [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2},
+ [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex},
+ [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
+ [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
+ [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit},
+ [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit},
+ [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop},
+ [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop},
+ [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
+ [104] = { ALU_OP0_NOP, tgsi_unsupported},
+ [105] = { ALU_OP0_NOP, tgsi_unsupported},
+ [106] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2},
+ [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2},
+ [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap},
+ [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap},
+ [112] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported},
+ [114] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_loop_breakc},
+ [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
+ [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */
+ [118] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2_trans},
+ [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv},
+ [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2},
+ [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2},
+ [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg},
+ [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2},
+ [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2_trans},
+ [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap},
+ [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_op2_trans},
+ [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans},
+ [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2},
+ [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv},
+ [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad},
+ [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2},
+ [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2},
+ [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod},
+ [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_UINT, tgsi_op2_trans},
+ [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2},
+ [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2},
+ [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2_trans},
+ [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap},
+ [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2_swap},
+ [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_r600_arl},
+ [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp},
+ [TGSI_OPCODE_IABS] = { 0, tgsi_iabs},
+ [TGSI_OPCODE_ISSG] = { 0, tgsi_issg},
+ [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex},
+ [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
+ [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex},
+ [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, tgsi_op2_trans},
+ [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, tgsi_op2_trans},
+ [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_unsupported},
+ [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_unsupported},
+ [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_unsupported},
+ [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_unsupported},
+ [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_unsupported},
+ [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_unsupported},
+ [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_unsupported},
+ [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_unsupported},
+ [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_unsupported},
+ [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported},
};
-static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
- {TGSI_OPCODE_ARL, 0, ALU_OP0_NOP, tgsi_eg_arl},
- {TGSI_OPCODE_MOV, 0, ALU_OP1_MOV, tgsi_op2},
- {TGSI_OPCODE_LIT, 0, ALU_OP0_NOP, tgsi_lit},
- {TGSI_OPCODE_RCP, 0, ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate},
- {TGSI_OPCODE_RSQ, 0, ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq},
- {TGSI_OPCODE_EXP, 0, ALU_OP0_NOP, tgsi_exp},
- {TGSI_OPCODE_LOG, 0, ALU_OP0_NOP, tgsi_log},
- {TGSI_OPCODE_MUL, 0, ALU_OP2_MUL, tgsi_op2},
- {TGSI_OPCODE_ADD, 0, ALU_OP2_ADD, tgsi_op2},
- {TGSI_OPCODE_DP3, 0, ALU_OP2_DOT4, tgsi_dp},
- {TGSI_OPCODE_DP4, 0, ALU_OP2_DOT4, tgsi_dp},
- {TGSI_OPCODE_DST, 0, ALU_OP0_NOP, tgsi_opdst},
- {TGSI_OPCODE_MIN, 0, ALU_OP2_MIN, tgsi_op2},
- {TGSI_OPCODE_MAX, 0, ALU_OP2_MAX, tgsi_op2},
- {TGSI_OPCODE_SLT, 0, ALU_OP2_SETGT, tgsi_op2_swap},
- {TGSI_OPCODE_SGE, 0, ALU_OP2_SETGE, tgsi_op2},
- {TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3},
- {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2},
- {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp},
- {TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
- {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
- {22, 0, ALU_OP0_NOP, tgsi_unsupported},
- {23, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_FRC, 0, ALU_OP1_FRACT, tgsi_op2},
- {TGSI_OPCODE_CLAMP, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_FLR, 0, ALU_OP1_FLOOR, tgsi_op2},
- {TGSI_OPCODE_ROUND, 0, ALU_OP1_RNDNE, tgsi_op2},
- {TGSI_OPCODE_EX2, 0, ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
- {TGSI_OPCODE_LG2, 0, ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
- {TGSI_OPCODE_POW, 0, ALU_OP0_NOP, tgsi_pow},
- {TGSI_OPCODE_XPD, 0, ALU_OP0_NOP, tgsi_xpd},
- /* gap */
- {32, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ABS, 0, ALU_OP1_MOV, tgsi_op2},
- {TGSI_OPCODE_RCC, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_DPH, 0, ALU_OP2_DOT4, tgsi_dp},
- {TGSI_OPCODE_COS, 0, ALU_OP1_COS, tgsi_trig},
- {TGSI_OPCODE_DDX, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
- {TGSI_OPCODE_DDY, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
- {TGSI_OPCODE_KILL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */
- {TGSI_OPCODE_PK2H, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_PK2US, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_PK4B, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_PK4UB, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_RFL, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SEQ, 0, ALU_OP2_SETE, tgsi_op2},
- {TGSI_OPCODE_SFL, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SGT, 0, ALU_OP2_SETGT, tgsi_op2},
- {TGSI_OPCODE_SIN, 0, ALU_OP1_SIN, tgsi_trig},
- {TGSI_OPCODE_SLE, 0, ALU_OP2_SETGE, tgsi_op2_swap},
- {TGSI_OPCODE_SNE, 0, ALU_OP2_SETNE, tgsi_op2},
- {TGSI_OPCODE_STR, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TEX, 0, FETCH_OP_SAMPLE, tgsi_tex},
- {TGSI_OPCODE_TXD, 0, FETCH_OP_SAMPLE_G, tgsi_tex},
- {TGSI_OPCODE_TXP, 0, FETCH_OP_SAMPLE, tgsi_tex},
- {TGSI_OPCODE_UP2H, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_UP2US, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_UP4B, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_UP4UB, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_X2D, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ARA, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ARR, 0, ALU_OP0_NOP, tgsi_eg_arl},
- {TGSI_OPCODE_BRA, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_CAL, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_RET, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SSG, 0, ALU_OP0_NOP, tgsi_ssg},
- {TGSI_OPCODE_CMP, 0, ALU_OP0_NOP, tgsi_cmp},
- {TGSI_OPCODE_SCS, 0, ALU_OP0_NOP, tgsi_scs},
- {TGSI_OPCODE_TXB, 0, FETCH_OP_SAMPLE_LB, tgsi_tex},
- {TGSI_OPCODE_NRM, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_DIV, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_DP2, 0, ALU_OP2_DOT4, tgsi_dp},
- {TGSI_OPCODE_TXL, 0, FETCH_OP_SAMPLE_L, tgsi_tex},
- {TGSI_OPCODE_BRK, 0, CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
- {TGSI_OPCODE_IF, 0, ALU_OP0_NOP, tgsi_if},
- {TGSI_OPCODE_UIF, 0, ALU_OP0_NOP, tgsi_uif},
- {76, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ELSE, 0, ALU_OP0_NOP, tgsi_else},
- {TGSI_OPCODE_ENDIF, 0, ALU_OP0_NOP, tgsi_endif},
- {TGSI_OPCODE_DDX_FINE, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
- {TGSI_OPCODE_DDY_FINE, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
- {TGSI_OPCODE_PUSHA, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_POPA, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_CEIL, 0, ALU_OP1_CEIL, tgsi_op2},
- {TGSI_OPCODE_I2F, 0, ALU_OP1_INT_TO_FLT, tgsi_op2_trans},
- {TGSI_OPCODE_NOT, 0, ALU_OP1_NOT_INT, tgsi_op2},
- {TGSI_OPCODE_TRUNC, 0, ALU_OP1_TRUNC, tgsi_op2},
- {TGSI_OPCODE_SHL, 0, ALU_OP2_LSHL_INT, tgsi_op2},
- /* gap */
- {88, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_AND, 0, ALU_OP2_AND_INT, tgsi_op2},
- {TGSI_OPCODE_OR, 0, ALU_OP2_OR_INT, tgsi_op2},
- {TGSI_OPCODE_MOD, 0, ALU_OP0_NOP, tgsi_imod},
- {TGSI_OPCODE_XOR, 0, ALU_OP2_XOR_INT, tgsi_op2},
- {TGSI_OPCODE_SAD, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex},
- {TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
- {TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
- {TGSI_OPCODE_EMIT, 0, CF_OP_EMIT_VERTEX, tgsi_gs_emit},
- {TGSI_OPCODE_ENDPRIM, 0, CF_OP_CUT_VERTEX, tgsi_gs_emit},
- {TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop},
- {TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop},
- {TGSI_OPCODE_ENDSUB, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TXQ_LZ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
- /* gap */
- {104, 0, ALU_OP0_NOP, tgsi_unsupported},
- {105, 0, ALU_OP0_NOP, tgsi_unsupported},
- {106, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2},
- {TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2},
- {TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap},
- {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap},
- {TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
- {114, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_KILL_IF, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
- {TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */
- /* gap */
- {118, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_F2I, 0, ALU_OP1_FLT_TO_INT, tgsi_f2i},
- {TGSI_OPCODE_IDIV, 0, ALU_OP0_NOP, tgsi_idiv},
- {TGSI_OPCODE_IMAX, 0, ALU_OP2_MAX_INT, tgsi_op2},
- {TGSI_OPCODE_IMIN, 0, ALU_OP2_MIN_INT, tgsi_op2},
- {TGSI_OPCODE_INEG, 0, ALU_OP2_SUB_INT, tgsi_ineg},
- {TGSI_OPCODE_ISGE, 0, ALU_OP2_SETGE_INT, tgsi_op2},
- {TGSI_OPCODE_ISHR, 0, ALU_OP2_ASHR_INT, tgsi_op2},
- {TGSI_OPCODE_ISLT, 0, ALU_OP2_SETGT_INT, tgsi_op2_swap},
- {TGSI_OPCODE_F2U, 0, ALU_OP1_FLT_TO_UINT, tgsi_f2i},
- {TGSI_OPCODE_U2F, 0, ALU_OP1_UINT_TO_FLT, tgsi_op2_trans},
- {TGSI_OPCODE_UADD, 0, ALU_OP2_ADD_INT, tgsi_op2},
- {TGSI_OPCODE_UDIV, 0, ALU_OP0_NOP, tgsi_udiv},
- {TGSI_OPCODE_UMAD, 0, ALU_OP0_NOP, tgsi_umad},
- {TGSI_OPCODE_UMAX, 0, ALU_OP2_MAX_UINT, tgsi_op2},
- {TGSI_OPCODE_UMIN, 0, ALU_OP2_MIN_UINT, tgsi_op2},
- {TGSI_OPCODE_UMOD, 0, ALU_OP0_NOP, tgsi_umod},
- {TGSI_OPCODE_UMUL, 0, ALU_OP2_MULLO_UINT, tgsi_op2_trans},
- {TGSI_OPCODE_USEQ, 0, ALU_OP2_SETE_INT, tgsi_op2},
- {TGSI_OPCODE_USGE, 0, ALU_OP2_SETGE_UINT, tgsi_op2},
- {TGSI_OPCODE_USHR, 0, ALU_OP2_LSHR_INT, tgsi_op2},
- {TGSI_OPCODE_USLT, 0, ALU_OP2_SETGT_UINT, tgsi_op2_swap},
- {TGSI_OPCODE_USNE, 0, ALU_OP2_SETNE_INT, tgsi_op2},
- {TGSI_OPCODE_SWITCH, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_CASE, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_DEFAULT, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ENDSWITCH, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_UARL, 0, ALU_OP1_MOVA_INT, tgsi_eg_arl},
- {TGSI_OPCODE_UCMP, 0, ALU_OP0_NOP, tgsi_ucmp},
- {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
- {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
- {TGSI_OPCODE_LOAD, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_STORE, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_MFENCE, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_LFENCE, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SFENCE, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_BARRIER, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMUADD, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMXCHG, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMCAS, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMAND, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMOR, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMXOR, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMUMIN, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMUMAX, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMIMIN, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMIMAX, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TEX2, 0, FETCH_OP_SAMPLE, tgsi_tex},
- {TGSI_OPCODE_TXB2, 0, FETCH_OP_SAMPLE_LB, tgsi_tex},
- {TGSI_OPCODE_TXL2, 0, FETCH_OP_SAMPLE_L, tgsi_tex},
- {TGSI_OPCODE_IMUL_HI, 0, ALU_OP2_MULHI_INT, tgsi_op2_trans},
- {TGSI_OPCODE_UMUL_HI, 0, ALU_OP2_MULHI_UINT, tgsi_op2_trans},
- {TGSI_OPCODE_TG4, 0, FETCH_OP_GATHER4, tgsi_tex},
- {TGSI_OPCODE_LODQ, 0, FETCH_OP_GET_LOD, tgsi_tex},
- {TGSI_OPCODE_IBFE, 1, ALU_OP3_BFE_INT, tgsi_op3},
- {TGSI_OPCODE_UBFE, 1, ALU_OP3_BFE_UINT, tgsi_op3},
- {TGSI_OPCODE_BFI, 0, ALU_OP0_NOP, tgsi_bfi},
- {TGSI_OPCODE_BREV, 0, ALU_OP1_BFREV_INT, tgsi_op2},
- {TGSI_OPCODE_POPC, 0, ALU_OP1_BCNT_INT, tgsi_op2},
- {TGSI_OPCODE_LSB, 0, ALU_OP1_FFBL_INT, tgsi_op2},
- {TGSI_OPCODE_IMSB, 0, ALU_OP1_FFBH_INT, tgsi_msb},
- {TGSI_OPCODE_UMSB, 0, ALU_OP1_FFBH_UINT, tgsi_msb},
- {TGSI_OPCODE_INTERP_CENTROID, 0, ALU_OP0_NOP, tgsi_interp_egcm},
- {TGSI_OPCODE_INTERP_SAMPLE, 0, ALU_OP0_NOP, tgsi_interp_egcm},
- {TGSI_OPCODE_INTERP_OFFSET, 0, ALU_OP0_NOP, tgsi_interp_egcm},
- {TGSI_OPCODE_LAST, 0, ALU_OP0_NOP, tgsi_unsupported},
+static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
+ [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl},
+ [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2},
+ [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit},
+ [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate},
+ [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq},
+ [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp},
+ [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log},
+ [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2},
+ [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2},
+ [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst},
+ [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2},
+ [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2},
+ [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap},
+ [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2},
+ [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3},
+ [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2},
+ [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp},
+ [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
+ [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported},
+ [22] = { ALU_OP0_NOP, tgsi_unsupported},
+ [23] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2},
+ [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2},
+ [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2},
+ [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
+ [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
+ [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow},
+ [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd},
+ [32] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2},
+ [34] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig},
+ [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
+ [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
+ [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */
+ [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported},
+ [44] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2},
+ [46] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2},
+ [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, tgsi_trig},
+ [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap},
+ [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2},
+ [51] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex},
+ [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex},
+ [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex},
+ [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported},
+ [59] = { ALU_OP0_NOP, tgsi_unsupported},
+ [60] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_eg_arl},
+ [62] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg},
+ [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp},
+ [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs},
+ [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
+ [69] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex},
+ [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
+ [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if},
+ [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif},
+ [76] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else},
+ [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif},
+ [TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
+ [TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
+ [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2},
+ [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans},
+ [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2},
+ [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2},
+ [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2},
+ [88] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2},
+ [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2},
+ [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod},
+ [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2},
+ [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex},
+ [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
+ [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
+ [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit},
+ [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit},
+ [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop},
+ [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop},
+ [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
+ [104] = { ALU_OP0_NOP, tgsi_unsupported},
+ [105] = { ALU_OP0_NOP, tgsi_unsupported},
+ [106] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2},
+ [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2},
+ [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap},
+ [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap},
+ [112] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported},
+ [114] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
+ [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */
+ [118] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_f2i},
+ [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv},
+ [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2},
+ [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2},
+ [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg},
+ [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2},
+ [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2},
+ [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap},
+ [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_f2i},
+ [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans},
+ [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2},
+ [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv},
+ [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad},
+ [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2},
+ [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2},
+ [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod},
+ [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_UINT, tgsi_op2_trans},
+ [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2},
+ [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2},
+ [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2},
+ [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap},
+ [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2},
+ [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_eg_arl},
+ [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp},
+ [TGSI_OPCODE_IABS] = { 0, tgsi_iabs},
+ [TGSI_OPCODE_ISSG] = { 0, tgsi_issg},
+ [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex},
+ [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
+ [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex},
+ [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, tgsi_op2_trans},
+ [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, tgsi_op2_trans},
+ [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_tex},
+ [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_tex},
+ [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_op3},
+ [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_op3},
+ [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_bfi},
+ [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_op2},
+ [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_op2},
+ [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_op2},
+ [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_msb},
+ [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_msb},
+ [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm},
+ [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm},
+ [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm},
+ [TGSI_OPCODE_F2D] = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64},
+ [TGSI_OPCODE_D2F] = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest},
+ [TGSI_OPCODE_DABS] = { ALU_OP1_MOV, tgsi_op2_64},
+ [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg},
+ [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64},
+ [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr},
+ [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64},
+ [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64},
+ [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s},
+ [TGSI_OPCODE_DSGE] = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest},
+ [TGSI_OPCODE_DSEQ] = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest},
+ [TGSI_OPCODE_DSNE] = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest},
+ [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr},
+ [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr},
+ [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64},
+ [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64},
+ [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64},
+ [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp},
+ [TGSI_OPCODE_D2I] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_I2D] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_D2U] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_U2D] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr},
+ [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported},
};
-static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
- {TGSI_OPCODE_ARL, 0, ALU_OP0_NOP, tgsi_eg_arl},
- {TGSI_OPCODE_MOV, 0, ALU_OP1_MOV, tgsi_op2},
- {TGSI_OPCODE_LIT, 0, ALU_OP0_NOP, tgsi_lit},
- {TGSI_OPCODE_RCP, 0, ALU_OP1_RECIP_IEEE, cayman_emit_float_instr},
- {TGSI_OPCODE_RSQ, 0, ALU_OP1_RECIPSQRT_IEEE, cayman_emit_float_instr},
- {TGSI_OPCODE_EXP, 0, ALU_OP0_NOP, tgsi_exp},
- {TGSI_OPCODE_LOG, 0, ALU_OP0_NOP, tgsi_log},
- {TGSI_OPCODE_MUL, 0, ALU_OP2_MUL, tgsi_op2},
- {TGSI_OPCODE_ADD, 0, ALU_OP2_ADD, tgsi_op2},
- {TGSI_OPCODE_DP3, 0, ALU_OP2_DOT4, tgsi_dp},
- {TGSI_OPCODE_DP4, 0, ALU_OP2_DOT4, tgsi_dp},
- {TGSI_OPCODE_DST, 0, ALU_OP0_NOP, tgsi_opdst},
- {TGSI_OPCODE_MIN, 0, ALU_OP2_MIN, tgsi_op2},
- {TGSI_OPCODE_MAX, 0, ALU_OP2_MAX, tgsi_op2},
- {TGSI_OPCODE_SLT, 0, ALU_OP2_SETGT, tgsi_op2_swap},
- {TGSI_OPCODE_SGE, 0, ALU_OP2_SETGE, tgsi_op2},
- {TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3},
- {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2},
- {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp},
- {TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, cayman_emit_float_instr},
- {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
- {22, 0, ALU_OP0_NOP, tgsi_unsupported},
- {23, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_FRC, 0, ALU_OP1_FRACT, tgsi_op2},
- {TGSI_OPCODE_CLAMP, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_FLR, 0, ALU_OP1_FLOOR, tgsi_op2},
- {TGSI_OPCODE_ROUND, 0, ALU_OP1_RNDNE, tgsi_op2},
- {TGSI_OPCODE_EX2, 0, ALU_OP1_EXP_IEEE, cayman_emit_float_instr},
- {TGSI_OPCODE_LG2, 0, ALU_OP1_LOG_IEEE, cayman_emit_float_instr},
- {TGSI_OPCODE_POW, 0, ALU_OP0_NOP, cayman_pow},
- {TGSI_OPCODE_XPD, 0, ALU_OP0_NOP, tgsi_xpd},
- /* gap */
- {32, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ABS, 0, ALU_OP1_MOV, tgsi_op2},
- {TGSI_OPCODE_RCC, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_DPH, 0, ALU_OP2_DOT4, tgsi_dp},
- {TGSI_OPCODE_COS, 0, ALU_OP1_COS, cayman_trig},
- {TGSI_OPCODE_DDX, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
- {TGSI_OPCODE_DDY, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
- {TGSI_OPCODE_KILL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */
- {TGSI_OPCODE_PK2H, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_PK2US, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_PK4B, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_PK4UB, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_RFL, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SEQ, 0, ALU_OP2_SETE, tgsi_op2},
- {TGSI_OPCODE_SFL, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SGT, 0, ALU_OP2_SETGT, tgsi_op2},
- {TGSI_OPCODE_SIN, 0, ALU_OP1_SIN, cayman_trig},
- {TGSI_OPCODE_SLE, 0, ALU_OP2_SETGE, tgsi_op2_swap},
- {TGSI_OPCODE_SNE, 0, ALU_OP2_SETNE, tgsi_op2},
- {TGSI_OPCODE_STR, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TEX, 0, FETCH_OP_SAMPLE, tgsi_tex},
- {TGSI_OPCODE_TXD, 0, FETCH_OP_SAMPLE_G, tgsi_tex},
- {TGSI_OPCODE_TXP, 0, FETCH_OP_SAMPLE, tgsi_tex},
- {TGSI_OPCODE_UP2H, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_UP2US, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_UP4B, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_UP4UB, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_X2D, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ARA, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ARR, 0, ALU_OP0_NOP, tgsi_eg_arl},
- {TGSI_OPCODE_BRA, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_CAL, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_RET, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SSG, 0, ALU_OP0_NOP, tgsi_ssg},
- {TGSI_OPCODE_CMP, 0, ALU_OP0_NOP, tgsi_cmp},
- {TGSI_OPCODE_SCS, 0, ALU_OP0_NOP, tgsi_scs},
- {TGSI_OPCODE_TXB, 0, FETCH_OP_SAMPLE_LB, tgsi_tex},
- {TGSI_OPCODE_NRM, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_DIV, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_DP2, 0, ALU_OP2_DOT4, tgsi_dp},
- {TGSI_OPCODE_TXL, 0, FETCH_OP_SAMPLE_L, tgsi_tex},
- {TGSI_OPCODE_BRK, 0, CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
- {TGSI_OPCODE_IF, 0, ALU_OP0_NOP, tgsi_if},
- {TGSI_OPCODE_UIF, 0, ALU_OP0_NOP, tgsi_uif},
- {76, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ELSE, 0, ALU_OP0_NOP, tgsi_else},
- {TGSI_OPCODE_ENDIF, 0, ALU_OP0_NOP, tgsi_endif},
- {TGSI_OPCODE_DDX_FINE, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
- {TGSI_OPCODE_DDY_FINE, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
- {TGSI_OPCODE_PUSHA, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_POPA, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_CEIL, 0, ALU_OP1_CEIL, tgsi_op2},
- {TGSI_OPCODE_I2F, 0, ALU_OP1_INT_TO_FLT, tgsi_op2},
- {TGSI_OPCODE_NOT, 0, ALU_OP1_NOT_INT, tgsi_op2},
- {TGSI_OPCODE_TRUNC, 0, ALU_OP1_TRUNC, tgsi_op2},
- {TGSI_OPCODE_SHL, 0, ALU_OP2_LSHL_INT, tgsi_op2},
- /* gap */
- {88, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_AND, 0, ALU_OP2_AND_INT, tgsi_op2},
- {TGSI_OPCODE_OR, 0, ALU_OP2_OR_INT, tgsi_op2},
- {TGSI_OPCODE_MOD, 0, ALU_OP0_NOP, tgsi_imod},
- {TGSI_OPCODE_XOR, 0, ALU_OP2_XOR_INT, tgsi_op2},
- {TGSI_OPCODE_SAD, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex},
- {TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
- {TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
- {TGSI_OPCODE_EMIT, 0, CF_OP_EMIT_VERTEX, tgsi_gs_emit},
- {TGSI_OPCODE_ENDPRIM, 0, CF_OP_CUT_VERTEX, tgsi_gs_emit},
- {TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop},
- {TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop},
- {TGSI_OPCODE_ENDSUB, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TXQ_LZ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
- /* gap */
- {104, 0, ALU_OP0_NOP, tgsi_unsupported},
- {105, 0, ALU_OP0_NOP, tgsi_unsupported},
- {106, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
- {TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2},
- {TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2},
- {TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap},
- {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap},
- {TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
- {114, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_KILL_IF, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
- {TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */
- /* gap */
- {118, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_F2I, 0, ALU_OP1_FLT_TO_INT, tgsi_op2},
- {TGSI_OPCODE_IDIV, 0, ALU_OP0_NOP, tgsi_idiv},
- {TGSI_OPCODE_IMAX, 0, ALU_OP2_MAX_INT, tgsi_op2},
- {TGSI_OPCODE_IMIN, 0, ALU_OP2_MIN_INT, tgsi_op2},
- {TGSI_OPCODE_INEG, 0, ALU_OP2_SUB_INT, tgsi_ineg},
- {TGSI_OPCODE_ISGE, 0, ALU_OP2_SETGE_INT, tgsi_op2},
- {TGSI_OPCODE_ISHR, 0, ALU_OP2_ASHR_INT, tgsi_op2},
- {TGSI_OPCODE_ISLT, 0, ALU_OP2_SETGT_INT, tgsi_op2_swap},
- {TGSI_OPCODE_F2U, 0, ALU_OP1_FLT_TO_UINT, tgsi_op2},
- {TGSI_OPCODE_U2F, 0, ALU_OP1_UINT_TO_FLT, tgsi_op2},
- {TGSI_OPCODE_UADD, 0, ALU_OP2_ADD_INT, tgsi_op2},
- {TGSI_OPCODE_UDIV, 0, ALU_OP0_NOP, tgsi_udiv},
- {TGSI_OPCODE_UMAD, 0, ALU_OP0_NOP, tgsi_umad},
- {TGSI_OPCODE_UMAX, 0, ALU_OP2_MAX_UINT, tgsi_op2},
- {TGSI_OPCODE_UMIN, 0, ALU_OP2_MIN_UINT, tgsi_op2},
- {TGSI_OPCODE_UMOD, 0, ALU_OP0_NOP, tgsi_umod},
- {TGSI_OPCODE_UMUL, 0, ALU_OP2_MULLO_INT, cayman_mul_int_instr},
- {TGSI_OPCODE_USEQ, 0, ALU_OP2_SETE_INT, tgsi_op2},
- {TGSI_OPCODE_USGE, 0, ALU_OP2_SETGE_UINT, tgsi_op2},
- {TGSI_OPCODE_USHR, 0, ALU_OP2_LSHR_INT, tgsi_op2},
- {TGSI_OPCODE_USLT, 0, ALU_OP2_SETGT_UINT, tgsi_op2_swap},
- {TGSI_OPCODE_USNE, 0, ALU_OP2_SETNE_INT, tgsi_op2},
- {TGSI_OPCODE_SWITCH, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_CASE, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_DEFAULT, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ENDSWITCH, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
- {TGSI_OPCODE_UARL, 0, ALU_OP1_MOVA_INT, tgsi_eg_arl},
- {TGSI_OPCODE_UCMP, 0, ALU_OP0_NOP, tgsi_ucmp},
- {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs},
- {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg},
- {TGSI_OPCODE_LOAD, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_STORE, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_MFENCE, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_LFENCE, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SFENCE, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_BARRIER, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMUADD, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMXCHG, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMCAS, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMAND, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMOR, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMXOR, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMUMIN, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMUMAX, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMIMIN, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ATOMIMAX, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TEX2, 0, FETCH_OP_SAMPLE, tgsi_tex},
- {TGSI_OPCODE_TXB2, 0, FETCH_OP_SAMPLE_LB, tgsi_tex},
- {TGSI_OPCODE_TXL2, 0, FETCH_OP_SAMPLE_L, tgsi_tex},
- {TGSI_OPCODE_IMUL_HI, 0, ALU_OP2_MULHI_INT, cayman_mul_int_instr},
- {TGSI_OPCODE_UMUL_HI, 0, ALU_OP2_MULHI_UINT, cayman_mul_int_instr},
- {TGSI_OPCODE_TG4, 0, FETCH_OP_GATHER4, tgsi_tex},
- {TGSI_OPCODE_LODQ, 0, FETCH_OP_GET_LOD, tgsi_tex},
- {TGSI_OPCODE_IBFE, 1, ALU_OP3_BFE_INT, tgsi_op3},
- {TGSI_OPCODE_UBFE, 1, ALU_OP3_BFE_UINT, tgsi_op3},
- {TGSI_OPCODE_BFI, 0, ALU_OP0_NOP, tgsi_bfi},
- {TGSI_OPCODE_BREV, 0, ALU_OP1_BFREV_INT, tgsi_op2},
- {TGSI_OPCODE_POPC, 0, ALU_OP1_BCNT_INT, tgsi_op2},
- {TGSI_OPCODE_LSB, 0, ALU_OP1_FFBL_INT, tgsi_op2},
- {TGSI_OPCODE_IMSB, 0, ALU_OP1_FFBH_INT, tgsi_msb},
- {TGSI_OPCODE_UMSB, 0, ALU_OP1_FFBH_UINT, tgsi_msb},
- {TGSI_OPCODE_INTERP_CENTROID, 0, ALU_OP0_NOP, tgsi_interp_egcm},
- {TGSI_OPCODE_INTERP_SAMPLE, 0, ALU_OP0_NOP, tgsi_interp_egcm},
- {TGSI_OPCODE_INTERP_OFFSET, 0, ALU_OP0_NOP, tgsi_interp_egcm},
- {TGSI_OPCODE_LAST, 0, ALU_OP0_NOP, tgsi_unsupported},
+static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
+ [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl},
+ [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2},
+ [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit},
+ [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, cayman_emit_float_instr},
+ [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, cayman_emit_float_instr},
+ [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp},
+ [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log},
+ [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2},
+ [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2},
+ [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst},
+ [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2},
+ [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2},
+ [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap},
+ [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2},
+ [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3},
+ [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2},
+ [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp},
+ [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr},
+ [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported},
+ [22] = { ALU_OP0_NOP, tgsi_unsupported},
+ [23] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2},
+ [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2},
+ [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2},
+ [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, cayman_emit_float_instr},
+ [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, cayman_emit_float_instr},
+ [TGSI_OPCODE_POW] = { ALU_OP0_NOP, cayman_pow},
+ [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd},
+ [32] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2},
+ [34] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_COS] = { ALU_OP1_COS, cayman_trig},
+ [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
+ [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
+ [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */
+ [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported},
+ [44] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2},
+ [46] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2},
+ [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, cayman_trig},
+ [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap},
+ [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2},
+ [51] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex},
+ [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex},
+ [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex},
+ [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported},
+ [59] = { ALU_OP0_NOP, tgsi_unsupported},
+ [60] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_eg_arl},
+ [62] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg},
+ [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp},
+ [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs},
+ [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
+ [69] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex},
+ [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
+ [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if},
+ [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif},
+ [76] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else},
+ [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif},
+ [TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
+ [TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
+ [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2},
+ [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2},
+ [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2},
+ [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2},
+ [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2},
+ [88] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2},
+ [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2},
+ [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod},
+ [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2},
+ [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex},
+ [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
+ [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
+ [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit},
+ [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit},
+ [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop},
+ [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop},
+ [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
+ [104] = { ALU_OP0_NOP, tgsi_unsupported},
+ [105] = { ALU_OP0_NOP, tgsi_unsupported},
+ [106] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2},
+ [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2},
+ [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap},
+ [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap},
+ [112] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported},
+ [114] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
+ [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */
+ [118] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2},
+ [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv},
+ [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2},
+ [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2},
+ [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg},
+ [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2},
+ [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2},
+ [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap},
+ [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_op2},
+ [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2},
+ [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2},
+ [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv},
+ [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad},
+ [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2},
+ [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2},
+ [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod},
+ [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_INT, cayman_mul_int_instr},
+ [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2},
+ [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2},
+ [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2},
+ [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap},
+ [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2},
+ [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported},
+ [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_eg_arl},
+ [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp},
+ [TGSI_OPCODE_IABS] = { 0, tgsi_iabs},
+ [TGSI_OPCODE_ISSG] = { 0, tgsi_issg},
+ [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex},
+ [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
+ [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex},
+ [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, cayman_mul_int_instr},
+ [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, cayman_mul_int_instr},
+ [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_tex},
+ [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_tex},
+ [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_op3},
+ [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_op3},
+ [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_bfi},
+ [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_op2},
+ [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_op2},
+ [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_op2},
+ [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_msb},
+ [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_msb},
+ [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm},
+ [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm},
+ [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm},
+ [TGSI_OPCODE_F2D] = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64},
+ [TGSI_OPCODE_D2F] = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest},
+ [TGSI_OPCODE_DABS] = { ALU_OP1_MOV, tgsi_op2_64},
+ [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg},
+ [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64},
+ [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr},
+ [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64},
+ [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64},
+ [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s},
+ [TGSI_OPCODE_DSGE] = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest},
+ [TGSI_OPCODE_DSEQ] = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest},
+ [TGSI_OPCODE_DSNE] = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest},
+ [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr},
+ [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr},
+ [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64},
+ [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64},
+ [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64},
+ [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp},
+ [TGSI_OPCODE_D2I] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_I2D] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_D2U] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_U2D] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr},
+ [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported},
};