The compiler must issue the source argument to slots z, y, and x
*/
-static int r600_shader_from_tgsi(struct r600_screen *rscreen,
+static int r600_shader_from_tgsi(struct r600_context *rctx,
struct r600_pipe_shader *pipeshader,
struct r600_shader_key key);
ps->arrays[n].gpr_count = size;
}
-static unsigned tgsi_get_processor_type(const struct tgsi_token *tokens)
-{
- struct tgsi_parse_context parse;
-
- if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) {
- debug_printf("tgsi_parse_init() failed in %s:%i!\n", __func__, __LINE__);
- return ~0;
- }
- return parse.FullHeader.Processor.Processor;
-}
-
-static bool r600_can_dump_shader(struct r600_screen *rscreen, unsigned processor_type)
-{
- switch (processor_type) {
- case TGSI_PROCESSOR_VERTEX:
- return (rscreen->debug_flags & DBG_VS) != 0;
- case TGSI_PROCESSOR_GEOMETRY:
- return (rscreen->debug_flags & DBG_GS) != 0;
- case TGSI_PROCESSOR_FRAGMENT:
- return (rscreen->debug_flags & DBG_PS) != 0;
- case TGSI_PROCESSOR_COMPUTE:
- return (rscreen->debug_flags & DBG_CS) != 0;
- default:
- return false;
- }
-}
-
static void r600_dump_streamout(struct pipe_stream_output_info *so)
{
unsigned i;
}
}
+static int store_shader(struct pipe_context *ctx,
+ struct r600_pipe_shader *shader)
+{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+ uint32_t *ptr, i;
+
+ if (shader->bo == NULL) {
+ shader->bo = (struct r600_resource*)
+ pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4);
+ if (shader->bo == NULL) {
+ return -ENOMEM;
+ }
+ ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE);
+ if (R600_BIG_ENDIAN) {
+ for (i = 0; i < shader->shader.bc.ndw; ++i) {
+ ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]);
+ }
+ } else {
+ memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr));
+ }
+ rctx->b.ws->buffer_unmap(shader->bo->cs_buf);
+ }
+
+ return 0;
+}
+
int r600_pipe_shader_create(struct pipe_context *ctx,
struct r600_pipe_shader *shader,
struct r600_shader_key key)
{
struct r600_context *rctx = (struct r600_context *)ctx;
struct r600_pipe_shader_selector *sel = shader->selector;
- int r, i;
- uint32_t *ptr;
- bool dump = r600_can_dump_shader(rctx->screen, tgsi_get_processor_type(sel->tokens));
- unsigned use_sb = rctx->screen->debug_flags & DBG_SB;
- unsigned sb_disasm = use_sb || (rctx->screen->debug_flags & DBG_SB_DISASM);
+ int r;
+ bool dump = r600_can_dump_shader(&rctx->screen->b, sel->tokens);
+ unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB);
+ unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM);
+ unsigned export_shader = key.vs_as_es;
shader->shader.bc.isa = rctx->isa;
r600_dump_streamout(&sel->so);
}
}
- r = r600_shader_from_tgsi(rctx->screen, shader, key);
+ r = r600_shader_from_tgsi(rctx, shader, key);
if (r) {
R600_ERR("translation from TGSI failed !\n");
- return r;
+ goto error;
}
+ /* disable SB for geom shaders - it can't handle the CF_EMIT instructions */
+ use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_GEOMETRY);
+
/* Check if the bytecode has already been built. When using the llvm
* backend, r600_shader_from_tgsi() will take care of building the
* bytecode.
r = r600_bytecode_build(&shader->shader.bc);
if (r) {
R600_ERR("building bytecode failed !\n");
- return r;
+ goto error;
}
}
dump, use_sb);
if (r) {
R600_ERR("r600_sb_bytecode_process failed !\n");
- return r;
+ goto error;
}
}
- /* Store the shader in a buffer. */
- if (shader->bo == NULL) {
- shader->bo = (struct r600_resource*)
- pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4);
- if (shader->bo == NULL) {
- return -ENOMEM;
- }
- ptr = r600_buffer_mmap_sync_with_rings(rctx, shader->bo, PIPE_TRANSFER_WRITE);
- if (R600_BIG_ENDIAN) {
- for (i = 0; i < shader->shader.bc.ndw; ++i) {
- ptr[i] = util_bswap32(shader->shader.bc.bytecode[i]);
- }
- } else {
- memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr));
+ if (shader->gs_copy_shader) {
+ if (dump) {
+ // dump copy shader
+ r = r600_sb_bytecode_process(rctx, &shader->gs_copy_shader->shader.bc,
+ &shader->gs_copy_shader->shader, dump, 0);
+ if (r)
+ goto error;
}
- rctx->ws->buffer_unmap(shader->bo->cs_buf);
+
+ if ((r = store_shader(ctx, shader->gs_copy_shader)))
+ goto error;
}
+ /* Store the shader in a buffer. */
+ if ((r = store_shader(ctx, shader)))
+ goto error;
+
/* Build state. */
switch (shader->shader.processor_type) {
+ case TGSI_PROCESSOR_GEOMETRY:
+ if (rctx->b.chip_class >= EVERGREEN) {
+ evergreen_update_gs_state(ctx, shader);
+ evergreen_update_vs_state(ctx, shader->gs_copy_shader);
+ } else {
+ r600_update_gs_state(ctx, shader);
+ r600_update_vs_state(ctx, shader->gs_copy_shader);
+ }
+ break;
case TGSI_PROCESSOR_VERTEX:
- if (rctx->chip_class >= EVERGREEN) {
- evergreen_update_vs_state(ctx, shader);
+ if (rctx->b.chip_class >= EVERGREEN) {
+ if (export_shader)
+ evergreen_update_es_state(ctx, shader);
+ else
+ evergreen_update_vs_state(ctx, shader);
} else {
- r600_update_vs_state(ctx, shader);
+ if (export_shader)
+ r600_update_es_state(ctx, shader);
+ else
+ r600_update_vs_state(ctx, shader);
}
break;
case TGSI_PROCESSOR_FRAGMENT:
- if (rctx->chip_class >= EVERGREEN) {
+ if (rctx->b.chip_class >= EVERGREEN) {
evergreen_update_ps_state(ctx, shader);
} else {
r600_update_ps_state(ctx, shader);
}
break;
default:
- return -EINVAL;
+ r = -EINVAL;
+ goto error;
}
return 0;
+
+error:
+ r600_pipe_shader_destroy(ctx, shader);
+ return r;
}
void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
int colors_used;
boolean clip_vertex_write;
unsigned cv_output;
+ unsigned edgeflag_output;
int fragcoord_input;
int native_integers;
+ int next_ring_offset;
+ int gs_out_ring_offset;
+ int gs_next_vertex;
+ struct r600_shader *gs_for_vs;
+ int gs_export_gpr_treg;
};
struct r600_shader_tgsi_instruction {
int (*process)(struct r600_shader_ctx *ctx);
};
+static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind);
static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
static int tgsi_endloop(struct r600_shader_ctx *ctx);
static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
-#ifdef HAVE_OPENCL
-int r600_compute_shader_create(struct pipe_context * ctx,
- LLVMModuleRef mod, struct r600_bytecode * bytecode)
-{
- struct r600_context *r600_ctx = (struct r600_context *)ctx;
- struct r600_shader_ctx shader_ctx;
- boolean use_kill = false;
- bool dump = (r600_ctx->screen->debug_flags & DBG_CS) != 0;
- unsigned use_sb = r600_ctx->screen->debug_flags & DBG_SB_CS;
- unsigned sb_disasm = use_sb ||
- (r600_ctx->screen->debug_flags & DBG_SB_DISASM);
-
- shader_ctx.bc = bytecode;
- r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family,
- r600_ctx->screen->has_compressed_msaa_texturing);
- shader_ctx.bc->type = TGSI_PROCESSOR_COMPUTE;
- shader_ctx.bc->isa = r600_ctx->isa;
- r600_llvm_compile(mod, r600_ctx->family,
- shader_ctx.bc, &use_kill, dump);
-
- if (dump && !sb_disasm) {
- r600_bytecode_disasm(shader_ctx.bc);
- } else if ((dump && sb_disasm) || use_sb) {
- if (r600_sb_bytecode_process(r600_ctx, shader_ctx.bc, NULL, dump, use_sb))
- R600_ERR("r600_sb_bytecode_process failed!\n");
- }
-
- return 1;
-}
-
-#endif /* HAVE_OPENCL */
-
static int tgsi_is_supported(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
#endif
for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
if (i->Src[j].Register.Dimension) {
- if (i->Src[j].Register.File != TGSI_FILE_CONSTANT) {
+ switch (i->Src[j].Register.File) {
+ case TGSI_FILE_CONSTANT:
+ break;
+ case TGSI_FILE_INPUT:
+ if (ctx->type == TGSI_PROCESSOR_GEOMETRY)
+ break;
+ default:
R600_ERR("unsupported src %d (dimension %d)\n", j,
i->Src[j].Register.Dimension);
return -EINVAL;
* semantic indices, so we'll use 0 for them.
*/
if (name == TGSI_SEMANTIC_POSITION ||
- name == TGSI_SEMANTIC_PSIZE ||
- name == TGSI_SEMANTIC_FACE)
+ name == TGSI_SEMANTIC_PSIZE ||
+ name == TGSI_SEMANTIC_EDGEFLAG ||
+ name == TGSI_SEMANTIC_FACE)
index = 0;
else {
if (name == TGSI_SEMANTIC_GENERIC) {
ctx->shader->input[i].name = d->Semantic.Name;
ctx->shader->input[i].sid = d->Semantic.Index;
ctx->shader->input[i].interpolate = d->Interp.Interpolate;
- ctx->shader->input[i].centroid = d->Interp.Centroid;
+ ctx->shader->input[i].centroid = d->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID;
ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
if ((r = evergreen_interp_input(ctx, i)))
return r;
}
+ } else if (ctx->type == TGSI_PROCESSOR_GEOMETRY) {
+ /* FIXME probably skip inputs if they aren't passed in the ring */
+ ctx->shader->input[i].ring_offset = ctx->next_ring_offset;
+ ctx->next_ring_offset += 16;
+ if (ctx->shader->input[i].name == TGSI_SEMANTIC_PRIMID)
+ ctx->shader->gs_prim_id_input = true;
}
for (j = 1; j < count; ++j) {
ctx->shader->input[i + j] = ctx->shader->input[i];
ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First;
ctx->shader->output[i].interpolate = d->Interp.Interpolate;
ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
- if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+ if (ctx->type == TGSI_PROCESSOR_VERTEX ||
+ ctx->type == TGSI_PROCESSOR_GEOMETRY) {
ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
switch (d->Semantic.Name) {
case TGSI_SEMANTIC_CLIPDIST:
ctx->shader->vs_out_misc_write = 1;
ctx->shader->vs_out_point_size = 1;
break;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ ctx->shader->vs_out_misc_write = 1;
+ ctx->shader->vs_out_edgeflag = 1;
+ ctx->edgeflag_output = i;
+ break;
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ ctx->shader->vs_out_misc_write = 1;
+ ctx->shader->vs_out_viewport = 1;
+ break;
+ case TGSI_SEMANTIC_LAYER:
+ ctx->shader->vs_out_misc_write = 1;
+ ctx->shader->vs_out_layer = 1;
+ break;
case TGSI_SEMANTIC_CLIPVERTEX:
ctx->clip_vertex_write = TRUE;
ctx->cv_output = i;
break;
}
+ if (ctx->type == TGSI_PROCESSOR_GEOMETRY) {
+ ctx->gs_out_ring_offset += 16;
+ }
} else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
switch (d->Semantic.Name) {
case TGSI_SEMANTIC_COLOR:
ctx->input_linear = TRUE;
if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
ctx->input_perspective = TRUE;
- if (ctx->info.input_centroid[i])
+ if (ctx->info.input_interpolate_loc[i] == TGSI_INTERPOLATE_LOC_CENTROID)
ctx->input_centroid = TRUE;
}
}
}
-static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int cb_idx, unsigned int offset, unsigned int dst_reg)
+static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx,
+ unsigned int cb_idx, unsigned int offset, unsigned ar_chan,
+ unsigned int dst_reg)
{
struct r600_bytecode_vtx vtx;
unsigned int ar_reg;
alu.op = ALU_OP2_ADD_INT;
alu.src[0].sel = ctx->bc->ar_reg;
+ alu.src[0].chan = ar_chan;
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[1].value = offset;
alu.dst.sel = dst_reg;
+ alu.dst.chan = ar_chan;
alu.dst.write = 1;
alu.last = 1;
vtx.buffer_id = cb_idx;
vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
vtx.src_gpr = ar_reg;
+ vtx.src_sel_x = ar_chan;
vtx.mega_fetch_count = 16;
vtx.dst_gpr = dst_reg;
vtx.dst_sel_x = 0; /* SEL_X */
vtx.data_format = FMT_32_32_32_32_FLOAT;
vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
- vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
vtx.endian = r600_endian_swap(32);
if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
return 0;
}
+static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg)
+{
+ struct r600_bytecode_vtx vtx;
+ int r;
+ unsigned index = src->Register.Index;
+ unsigned vtx_id = src->Dimension.Index;
+ int offset_reg = vtx_id / 3;
+ int offset_chan = vtx_id % 3;
+
+ /* offsets of per-vertex data in ESGS ring are passed to GS in R0.x, R0.y,
+ * R0.w, R1.x, R1.y, R1.z (it seems R0.z is used for PrimitiveID) */
+
+ if (offset_reg == 0 && offset_chan == 2)
+ offset_chan = 3;
+
+ if (src->Dimension.Indirect) {
+ int treg[3];
+ int t2;
+ struct r600_bytecode_alu alu;
+ int r, i;
+
+ /* you have got to be shitting me -
+ we have to put the R0.x/y/w into Rt.x Rt+1.x Rt+2.x then index reg from Rt.
+ at least this is what fglrx seems to do. */
+ for (i = 0; i < 3; i++) {
+ treg[i] = r600_get_temp(ctx);
+ }
+ t2 = r600_get_temp(ctx);
+ for (i = 0; i < 3; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = 0;
+ alu.src[0].chan = i == 2 ? 3 : i;
+ alu.dst.sel = treg[i];
+ alu.dst.chan = 0;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = treg[0];
+ alu.src[0].rel = 1;
+ alu.dst.sel = t2;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ offset_reg = t2;
+ }
+
+
+ memset(&vtx, 0, sizeof(vtx));
+ vtx.buffer_id = R600_GS_RING_CONST_BUFFER;
+ vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
+ vtx.src_gpr = offset_reg;
+ vtx.src_sel_x = offset_chan;
+ vtx.offset = index * 16; /*bytes*/
+ vtx.mega_fetch_count = 16;
+ vtx.dst_gpr = dst_reg;
+ vtx.dst_sel_x = 0; /* SEL_X */
+ vtx.dst_sel_y = 1; /* SEL_Y */
+ vtx.dst_sel_z = 2; /* SEL_Z */
+ vtx.dst_sel_w = 3; /* SEL_W */
+ if (ctx->bc->chip_class >= EVERGREEN) {
+ vtx.use_const_fields = 1;
+ } else {
+ vtx.data_format = FMT_32_32_32_32_FLOAT;
+ }
+
+ if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
+ return r;
+
+ return 0;
+}
+
+static int tgsi_split_gs_inputs(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ int i;
+
+ for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ struct tgsi_full_src_register *src = &inst->Src[i];
+
+ if (src->Register.File == TGSI_FILE_INPUT) {
+ if (ctx->shader->input[src->Register.Index].name == TGSI_SEMANTIC_PRIMID) {
+ /* primitive id is in R0.z */
+ ctx->src[i].sel = 0;
+ ctx->src[i].swizzle[0] = 2;
+ }
+ }
+ if (src->Register.File == TGSI_FILE_INPUT && src->Register.Dimension) {
+ int treg = r600_get_temp(ctx);
+
+ fetch_gs_input(ctx, src, treg);
+ ctx->src[i].sel = treg;
+ }
+ }
+ return 0;
+}
+
static int tgsi_split_constant(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
}
if (ctx->src[i].rel) {
+ int chan = inst->Src[i].Indirect.Swizzle;
int treg = r600_get_temp(ctx);
- if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].kc_bank, ctx->src[i].sel - 512, treg)))
+ if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].kc_bank, ctx->src[i].sel - 512, chan, treg)))
return r;
ctx->src[i].kc_bank = 0;
return 0;
}
+static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so)
+{
+ unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS];
+ int i, j, r;
-static int r600_shader_from_tgsi(struct r600_screen *rscreen,
+ /* Sanity checking. */
+ if (so->num_outputs > PIPE_MAX_SHADER_OUTPUTS) {
+ R600_ERR("Too many stream outputs: %d\n", so->num_outputs);
+ r = -EINVAL;
+ goto out_err;
+ }
+ for (i = 0; i < so->num_outputs; i++) {
+ if (so->output[i].output_buffer >= 4) {
+ R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
+ so->output[i].output_buffer);
+ r = -EINVAL;
+ goto out_err;
+ }
+ }
+
+ /* Initialize locations where the outputs are stored. */
+ for (i = 0; i < so->num_outputs; i++) {
+ so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr;
+
+ /* Lower outputs with dst_offset < start_component.
+ *
+ * We can only output 4D vectors with a write mask, e.g. we can
+ * only output the W component at offset 3, etc. If we want
+ * to store Y, Z, or W at buffer offset 0, we need to use MOV
+ * to move it to X and output X. */
+ if (so->output[i].dst_offset < so->output[i].start_component) {
+ unsigned tmp = r600_get_temp(ctx);
+
+ for (j = 0; j < so->output[i].num_components; j++) {
+ struct r600_bytecode_alu alu;
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = so_gpr[i];
+ alu.src[0].chan = so->output[i].start_component + j;
+
+ alu.dst.sel = tmp;
+ alu.dst.chan = j;
+ alu.dst.write = 1;
+ if (j == so->output[i].num_components - 1)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ so->output[i].start_component = 0;
+ so_gpr[i] = tmp;
+ }
+ }
+
+ /* Write outputs to buffers. */
+ for (i = 0; i < so->num_outputs; i++) {
+ struct r600_bytecode_output output;
+
+ memset(&output, 0, sizeof(struct r600_bytecode_output));
+ output.gpr = so_gpr[i];
+ output.elem_size = so->output[i].num_components;
+ output.array_base = so->output[i].dst_offset - so->output[i].start_component;
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
+ output.burst_count = 1;
+ /* array_size is an upper limit for the burst_count
+ * with MEM_STREAM instructions */
+ output.array_size = 0xFFF;
+ output.comp_mask = ((1 << so->output[i].num_components) - 1) << so->output[i].start_component;
+ if (ctx->bc->chip_class >= EVERGREEN) {
+ switch (so->output[i].output_buffer) {
+ case 0:
+ output.op = CF_OP_MEM_STREAM0_BUF0;
+ break;
+ case 1:
+ output.op = CF_OP_MEM_STREAM0_BUF1;
+ break;
+ case 2:
+ output.op = CF_OP_MEM_STREAM0_BUF2;
+ break;
+ case 3:
+ output.op = CF_OP_MEM_STREAM0_BUF3;
+ break;
+ }
+ } else {
+ switch (so->output[i].output_buffer) {
+ case 0:
+ output.op = CF_OP_MEM_STREAM0;
+ break;
+ case 1:
+ output.op = CF_OP_MEM_STREAM1;
+ break;
+ case 2:
+ output.op = CF_OP_MEM_STREAM2;
+ break;
+ case 3:
+ output.op = CF_OP_MEM_STREAM3;
+ break;
+ }
+ }
+ r = r600_bytecode_add_output(ctx->bc, &output);
+ if (r)
+ goto out_err;
+ }
+ return 0;
+out_err:
+ return r;
+}
+
+static void convert_edgeflag_to_int(struct r600_shader_ctx *ctx)
+{
+ struct r600_bytecode_alu alu;
+ unsigned reg;
+
+ if (!ctx->shader->vs_out_edgeflag)
+ return;
+
+ reg = ctx->shader->output[ctx->edgeflag_output].gpr;
+
+ /* clamp(x, 0, 1) */
+ memset(&alu, 0, sizeof(alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = reg;
+ alu.dst.sel = reg;
+ alu.dst.write = 1;
+ alu.dst.clamp = 1;
+ alu.last = 1;
+ r600_bytecode_add_alu(ctx->bc, &alu);
+
+ memset(&alu, 0, sizeof(alu));
+ alu.op = ALU_OP1_FLT_TO_INT;
+ alu.src[0].sel = reg;
+ alu.dst.sel = reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r600_bytecode_add_alu(ctx->bc, &alu);
+}
+
+static int generate_gs_copy_shader(struct r600_context *rctx,
+ struct r600_pipe_shader *gs,
+ struct pipe_stream_output_info *so)
+{
+ struct r600_shader_ctx ctx = {};
+ struct r600_shader *gs_shader = &gs->shader;
+ struct r600_pipe_shader *cshader;
+ int ocnt = gs_shader->noutput;
+ struct r600_bytecode_alu alu;
+ struct r600_bytecode_vtx vtx;
+ struct r600_bytecode_output output;
+ struct r600_bytecode_cf *cf_jump, *cf_pop,
+ *last_exp_pos = NULL, *last_exp_param = NULL;
+ int i, next_clip_pos = 61, next_param = 0;
+
+ cshader = calloc(1, sizeof(struct r600_pipe_shader));
+ if (!cshader)
+ return 0;
+
+ memcpy(cshader->shader.output, gs_shader->output, ocnt *
+ sizeof(struct r600_shader_io));
+
+ cshader->shader.noutput = ocnt;
+
+ ctx.shader = &cshader->shader;
+ ctx.bc = &ctx.shader->bc;
+ ctx.type = ctx.bc->type = TGSI_PROCESSOR_VERTEX;
+
+ r600_bytecode_init(ctx.bc, rctx->b.chip_class, rctx->b.family,
+ rctx->screen->has_compressed_msaa_texturing);
+
+ ctx.bc->isa = rctx->isa;
+
+ /* R0.x = R0.x & 0x3fffffff */
+ memset(&alu, 0, sizeof(alu));
+ alu.op = ALU_OP2_AND_INT;
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = 0x3fffffff;
+ alu.dst.write = 1;
+ r600_bytecode_add_alu(ctx.bc, &alu);
+
+ /* R0.y = R0.x >> 30 */
+ memset(&alu, 0, sizeof(alu));
+ alu.op = ALU_OP2_LSHR_INT;
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = 0x1e;
+ alu.dst.chan = 1;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r600_bytecode_add_alu(ctx.bc, &alu);
+
+ /* PRED_SETE_INT __, R0.y, 0 */
+ memset(&alu, 0, sizeof(alu));
+ alu.op = ALU_OP2_PRED_SETE_INT;
+ alu.src[0].chan = 1;
+ alu.src[1].sel = V_SQ_ALU_SRC_0;
+ alu.execute_mask = 1;
+ alu.update_pred = 1;
+ alu.last = 1;
+ r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE);
+
+ r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP);
+ cf_jump = ctx.bc->cf_last;
+
+ /* fetch vertex data from GSVS ring */
+ for (i = 0; i < ocnt; ++i) {
+ struct r600_shader_io *out = &ctx.shader->output[i];
+ out->gpr = i + 1;
+ out->ring_offset = i * 16;
+
+ memset(&vtx, 0, sizeof(vtx));
+ vtx.op = FETCH_OP_VFETCH;
+ vtx.buffer_id = R600_GS_RING_CONST_BUFFER;
+ vtx.fetch_type = 2;
+ vtx.offset = out->ring_offset;
+ vtx.dst_gpr = out->gpr;
+ vtx.dst_sel_x = 0;
+ vtx.dst_sel_y = 1;
+ vtx.dst_sel_z = 2;
+ vtx.dst_sel_w = 3;
+ if (rctx->b.chip_class >= EVERGREEN) {
+ vtx.use_const_fields = 1;
+ } else {
+ vtx.data_format = FMT_32_32_32_32_FLOAT;
+ }
+
+ r600_bytecode_add_vtx(ctx.bc, &vtx);
+ }
+
+ /* XXX handle clipvertex, streamout? */
+ emit_streamout(&ctx, so);
+
+ /* export vertex data */
+ /* XXX factor out common code with r600_shader_from_tgsi ? */
+ for (i = 0; i < ocnt; ++i) {
+ struct r600_shader_io *out = &ctx.shader->output[i];
+
+ if (out->name == TGSI_SEMANTIC_CLIPVERTEX)
+ continue;
+
+ memset(&output, 0, sizeof(output));
+ output.gpr = out->gpr;
+ output.elem_size = 3;
+ output.swizzle_x = 0;
+ output.swizzle_y = 1;
+ output.swizzle_z = 2;
+ output.swizzle_w = 3;
+ output.burst_count = 1;
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+ output.op = CF_OP_EXPORT;
+ switch (out->name) {
+ case TGSI_SEMANTIC_POSITION:
+ output.array_base = 60;
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ break;
+
+ case TGSI_SEMANTIC_PSIZE:
+ output.array_base = 61;
+ if (next_clip_pos == 61)
+ next_clip_pos = 62;
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ output.swizzle_y = 7;
+ output.swizzle_z = 7;
+ output.swizzle_w = 7;
+ ctx.shader->vs_out_misc_write = 1;
+ ctx.shader->vs_out_point_size = 1;
+ break;
+ case TGSI_SEMANTIC_LAYER:
+ if (out->spi_sid) {
+ /* duplicate it as PARAM to pass to the pixel shader */
+ output.array_base = next_param++;
+ r600_bytecode_add_output(ctx.bc, &output);
+ last_exp_param = ctx.bc->cf_last;
+ }
+ output.array_base = 61;
+ if (next_clip_pos == 61)
+ next_clip_pos = 62;
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ output.swizzle_x = 7;
+ output.swizzle_y = 7;
+ output.swizzle_z = 0;
+ output.swizzle_w = 7;
+ ctx.shader->vs_out_misc_write = 1;
+ ctx.shader->vs_out_layer = 1;
+ break;
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ if (out->spi_sid) {
+ /* duplicate it as PARAM to pass to the pixel shader */
+ output.array_base = next_param++;
+ r600_bytecode_add_output(ctx.bc, &output);
+ last_exp_param = ctx.bc->cf_last;
+ }
+ output.array_base = 61;
+ if (next_clip_pos == 61)
+ next_clip_pos = 62;
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ ctx.shader->vs_out_misc_write = 1;
+ ctx.shader->vs_out_viewport = 1;
+ output.swizzle_x = 7;
+ output.swizzle_y = 7;
+ output.swizzle_z = 7;
+ output.swizzle_w = 0;
+ break;
+ case TGSI_SEMANTIC_CLIPDIST:
+ /* spi_sid is 0 for clipdistance outputs that were generated
+ * for clipvertex - we don't need to pass them to PS */
+ ctx.shader->clip_dist_write = gs->shader.clip_dist_write;
+ if (out->spi_sid) {
+ /* duplicate it as PARAM to pass to the pixel shader */
+ output.array_base = next_param++;
+ r600_bytecode_add_output(ctx.bc, &output);
+ last_exp_param = ctx.bc->cf_last;
+ }
+ output.array_base = next_clip_pos++;
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ break;
+ case TGSI_SEMANTIC_FOG:
+ output.swizzle_y = 4; /* 0 */
+ output.swizzle_z = 4; /* 0 */
+ output.swizzle_w = 5; /* 1 */
+ break;
+ default:
+ output.array_base = next_param++;
+ break;
+ }
+ r600_bytecode_add_output(ctx.bc, &output);
+ if (output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM)
+ last_exp_param = ctx.bc->cf_last;
+ else
+ last_exp_pos = ctx.bc->cf_last;
+ }
+
+ if (!last_exp_pos) {
+ memset(&output, 0, sizeof(output));
+ output.gpr = 0;
+ output.elem_size = 3;
+ output.swizzle_x = 7;
+ output.swizzle_y = 7;
+ output.swizzle_z = 7;
+ output.swizzle_w = 7;
+ output.burst_count = 1;
+ output.type = 2;
+ output.op = CF_OP_EXPORT;
+ output.array_base = 60;
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ r600_bytecode_add_output(ctx.bc, &output);
+ last_exp_pos = ctx.bc->cf_last;
+ }
+
+ if (!last_exp_param) {
+ memset(&output, 0, sizeof(output));
+ output.gpr = 0;
+ output.elem_size = 3;
+ output.swizzle_x = 7;
+ output.swizzle_y = 7;
+ output.swizzle_z = 7;
+ output.swizzle_w = 7;
+ output.burst_count = 1;
+ output.type = 2;
+ output.op = CF_OP_EXPORT;
+ output.array_base = next_param++;
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+ r600_bytecode_add_output(ctx.bc, &output);
+ last_exp_param = ctx.bc->cf_last;
+ }
+
+ last_exp_pos->op = CF_OP_EXPORT_DONE;
+ last_exp_param->op = CF_OP_EXPORT_DONE;
+
+ r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP);
+ cf_pop = ctx.bc->cf_last;
+
+ cf_jump->cf_addr = cf_pop->id + 2;
+ cf_jump->pop_count = 1;
+ cf_pop->cf_addr = cf_pop->id + 2;
+ cf_pop->pop_count = 1;
+
+ if (ctx.bc->chip_class == CAYMAN)
+ cm_bytecode_add_cf_end(ctx.bc);
+ else {
+ r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
+ ctx.bc->cf_last->end_of_program = 1;
+ }
+
+ gs->gs_copy_shader = cshader;
+
+ ctx.bc->nstack = 1;
+ cshader->shader.ring_item_size = ocnt * 16;
+
+ return r600_bytecode_build(ctx.bc);
+}
+
+static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind)
+{
+ struct r600_bytecode_output output;
+ int i, k, ring_offset;
+
+ for (i = 0; i < ctx->shader->noutput; i++) {
+ if (ctx->gs_for_vs) {
+ /* for ES we need to lookup corresponding ring offset expected by GS
+ * (map this output to GS input by name and sid) */
+ /* FIXME precompute offsets */
+ ring_offset = -1;
+ for(k = 0; k < ctx->gs_for_vs->ninput; ++k) {
+ struct r600_shader_io *in = &ctx->gs_for_vs->input[k];
+ struct r600_shader_io *out = &ctx->shader->output[i];
+ if (in->name == out->name && in->sid == out->sid)
+ ring_offset = in->ring_offset;
+ }
+
+ if (ring_offset == -1)
+ continue;
+ } else
+ ring_offset = i * 16;
+
+ /* next_ring_offset after parsing input decls contains total size of
+ * single vertex data, gs_next_vertex - current vertex index */
+ if (!ind)
+ ring_offset += ctx->gs_out_ring_offset * ctx->gs_next_vertex;
+
+ /* get a temp and add the ring offset to the next vertex base in the shader */
+ memset(&output, 0, sizeof(struct r600_bytecode_output));
+ output.gpr = ctx->shader->output[i].gpr;
+ output.elem_size = 3;
+ output.comp_mask = 0xF;
+ output.burst_count = 1;
+
+ if (ind)
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
+ else
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
+ output.op = CF_OP_MEM_RING;
+
+
+ if (ind) {
+ output.array_base = ring_offset >> 2; /* in dwords */
+ output.array_size = 0xfff;
+ output.index_gpr = ctx->gs_export_gpr_treg;
+ } else
+ output.array_base = ring_offset >> 2; /* in dwords */
+ r600_bytecode_add_output(ctx->bc, &output);
+ }
+
+ if (ind) {
+ struct r600_bytecode_alu alu;
+ int r;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_ADD_INT;
+ alu.src[0].sel = ctx->gs_export_gpr_treg;
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = ctx->gs_out_ring_offset >> 4;
+ alu.dst.sel = ctx->gs_export_gpr_treg;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ ++ctx->gs_next_vertex;
+ return 0;
+}
+
+static int r600_shader_from_tgsi(struct r600_context *rctx,
struct r600_pipe_shader *pipeshader,
struct r600_shader_key key)
{
+ struct r600_screen *rscreen = rctx->screen;
struct r600_shader *shader = &pipeshader->shader;
struct tgsi_token *tokens = pipeshader->selector->tokens;
struct pipe_stream_output_info so = pipeshader->selector->so;
unsigned output_done, noutput;
unsigned opcode;
int i, j, k, r = 0;
- int next_pixel_base = 0, next_pos_base = 60, next_param_base = 0;
+ int next_param_base = 0, next_clip_base;
+ int max_color_exports = MAX2(key.nr_cbufs, 1);
/* Declarations used by llvm code */
bool use_llvm = false;
bool indirect_gprs;
+ bool ring_outputs = false;
+ bool pos_emitted = false;
#ifdef R600_USE_LLVM
- use_llvm = !(rscreen->debug_flags & DBG_NO_LLVM);
+ use_llvm = rscreen->b.debug_flags & DBG_LLVM;
#endif
ctx.bc = &shader->bc;
ctx.shader = shader;
ctx.native_integers = true;
- r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family,
+ shader->vs_as_es = key.vs_as_es;
+
+ r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family,
rscreen->has_compressed_msaa_texturing);
ctx.tokens = tokens;
tgsi_scan_shader(tokens, &ctx.info);
shader->processor_type = ctx.type;
ctx.bc->type = shader->processor_type;
+ ring_outputs = key.vs_as_es || (ctx.type == TGSI_PROCESSOR_GEOMETRY);
+
+ if (key.vs_as_es) {
+ ctx.gs_for_vs = &rctx->gs_shader->current->shader;
+ } else {
+ ctx.gs_for_vs = NULL;
+ }
+
+ ctx.next_ring_offset = 0;
+ ctx.gs_out_ring_offset = 0;
+ ctx.gs_next_vertex = 0;
+
ctx.face_gpr = -1;
ctx.fragcoord_input = -1;
ctx.colors_used = 0;
if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
}
+ if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
+ /* FIXME 1 would be enough in some cases (3 or less input vertices) */
+ ctx.file_offset[TGSI_FILE_INPUT] = 2;
+ }
ctx.use_llvm = use_llvm;
if (use_llvm) {
ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
- ctx.temp_reg = ctx.bc->ar_reg + 1;
+ if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
+ ctx.gs_export_gpr_treg = ctx.bc->ar_reg + 1;
+ ctx.temp_reg = ctx.bc->ar_reg + 2;
+ } else
+ ctx.temp_reg = ctx.bc->ar_reg + 1;
if (indirect_gprs) {
shader->max_arrays = 0;
if (property->u[0].Data == 1)
shader->fs_write_all = TRUE;
break;
+ case TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION:
+ if (property->u[0].Data == 1)
+ shader->vs_position_window_space = TRUE;
+ break;
case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
/* we don't need this one */
break;
+ case TGSI_PROPERTY_GS_INPUT_PRIM:
+ shader->gs_input_prim = property->u[0].Data;
+ break;
+ case TGSI_PROPERTY_GS_OUTPUT_PRIM:
+ shader->gs_output_prim = property->u[0].Data;
+ break;
+ case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
+ shader->gs_max_out_vertices = property->u[0].Data;
+ break;
}
break;
default:
}
}
+ shader->ring_item_size = ctx.next_ring_offset;
+
/* Process two side if needed */
if (shader->two_side && ctx.colors_used) {
int i, count = ctx.shader->ninput;
if (use_llvm) {
struct radeon_llvm_context radeon_llvm_ctx;
LLVMModuleRef mod;
- bool dump = r600_can_dump_shader(rscreen, ctx.type);
+ bool dump = r600_can_dump_shader(&rscreen->b, tokens);
boolean use_kill = false;
memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx));
radeon_llvm_ctx.type = ctx.type;
radeon_llvm_ctx.two_side = shader->two_side;
radeon_llvm_ctx.face_gpr = ctx.face_gpr;
+ radeon_llvm_ctx.inputs_count = ctx.shader->ninput + 1;
radeon_llvm_ctx.r600_inputs = ctx.shader->input;
radeon_llvm_ctx.r600_outputs = ctx.shader->output;
- radeon_llvm_ctx.color_buffer_count = MAX2(key.nr_cbufs , 1);
+ radeon_llvm_ctx.color_buffer_count = max_color_exports;
radeon_llvm_ctx.chip_class = ctx.bc->chip_class;
- radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->chip_class >= EVERGREEN);
+ radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN);
radeon_llvm_ctx.stream_outputs = &so;
radeon_llvm_ctx.clip_vertex = ctx.cv_output;
radeon_llvm_ctx.alpha_to_one = key.alpha_to_one;
+ radeon_llvm_ctx.has_compressed_msaa_texturing =
+ ctx.bc->has_compressed_msaa_texturing;
mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
ctx.shader->has_txq_cube_array_z_comp = radeon_llvm_ctx.has_txq_cube_array_z_comp;
+ ctx.shader->uses_tex_buffers = radeon_llvm_ctx.uses_tex_buffers;
- if (r600_llvm_compile(mod, rscreen->family, ctx.bc, &use_kill, dump)) {
+ if (r600_llvm_compile(mod, rscreen->b.family, ctx.bc, &use_kill, dump)) {
radeon_llvm_dispose(&radeon_llvm_ctx);
use_llvm = 0;
fprintf(stderr, "R600 LLVM backend failed to compile "
#endif
/* End of LLVM backend setup */
- if (shader->fs_write_all && rscreen->chip_class >= EVERGREEN)
+ if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN)
shader->nr_ps_max_color_exports = 8;
if (!use_llvm) {
}
}
+ if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
+ struct r600_bytecode_alu alu;
+ int r;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[0].value = 0;
+ alu.dst.sel = ctx.gs_export_gpr_treg;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx.bc, &alu);
+ if (r)
+ return r;
+ }
if (shader->two_side && ctx.colors_used) {
if ((r = process_twoside_color_inputs(&ctx)))
return r;
goto out_err;
if ((r = tgsi_split_literal_constant(&ctx)))
goto out_err;
+ if (ctx.type == TGSI_PROCESSOR_GEOMETRY)
+ if ((r = tgsi_split_gs_inputs(&ctx)))
+ goto out_err;
if (ctx.bc->chip_class == CAYMAN)
ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
else if (ctx.bc->chip_class >= EVERGREEN)
noutput = shader->noutput;
- if (ctx.clip_vertex_write) {
+ if (!ring_outputs && ctx.clip_vertex_write) {
unsigned clipdist_temp[2];
clipdist_temp[0] = r600_get_temp(&ctx);
}
/* Add stream outputs. */
- if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs && !use_llvm) {
- unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS];
-
- /* Sanity checking. */
- if (so.num_outputs > PIPE_MAX_SHADER_OUTPUTS) {
- R600_ERR("Too many stream outputs: %d\n", so.num_outputs);
- r = -EINVAL;
- goto out_err;
- }
- for (i = 0; i < so.num_outputs; i++) {
- if (so.output[i].output_buffer >= 4) {
- R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
- so.output[i].output_buffer);
- r = -EINVAL;
- goto out_err;
- }
- }
-
- /* Initialize locations where the outputs are stored. */
- for (i = 0; i < so.num_outputs; i++) {
- so_gpr[i] = shader->output[so.output[i].register_index].gpr;
-
- /* Lower outputs with dst_offset < start_component.
- *
- * We can only output 4D vectors with a write mask, e.g. we can
- * only output the W component at offset 3, etc. If we want
- * to store Y, Z, or W at buffer offset 0, we need to use MOV
- * to move it to X and output X. */
- if (so.output[i].dst_offset < so.output[i].start_component) {
- unsigned tmp = r600_get_temp(&ctx);
-
- for (j = 0; j < so.output[i].num_components; j++) {
- struct r600_bytecode_alu alu;
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP1_MOV;
- alu.src[0].sel = so_gpr[i];
- alu.src[0].chan = so.output[i].start_component + j;
+ if (!ring_outputs && ctx.type == TGSI_PROCESSOR_VERTEX &&
+ so.num_outputs && !use_llvm)
+ emit_streamout(&ctx, &so);
- alu.dst.sel = tmp;
- alu.dst.chan = j;
- alu.dst.write = 1;
- if (j == so.output[i].num_components - 1)
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx.bc, &alu);
- if (r)
- return r;
- }
- so.output[i].start_component = 0;
- so_gpr[i] = tmp;
- }
- }
+ convert_edgeflag_to_int(&ctx);
- /* Write outputs to buffers. */
- for (i = 0; i < so.num_outputs; i++) {
- struct r600_bytecode_output output;
+ if (ring_outputs) {
+ if (key.vs_as_es)
+ emit_gs_ring_writes(&ctx, FALSE);
+ } else {
+ /* Export output */
+ next_clip_base = shader->vs_out_misc_write ? 62 : 61;
- memset(&output, 0, sizeof(struct r600_bytecode_output));
- output.gpr = so_gpr[i];
- output.elem_size = so.output[i].num_components;
- output.array_base = so.output[i].dst_offset - so.output[i].start_component;
- output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
- output.burst_count = 1;
- output.barrier = 1;
- /* array_size is an upper limit for the burst_count
- * with MEM_STREAM instructions */
- output.array_size = 0xFFF;
- output.comp_mask = ((1 << so.output[i].num_components) - 1) << so.output[i].start_component;
- if (ctx.bc->chip_class >= EVERGREEN) {
- switch (so.output[i].output_buffer) {
- case 0:
- output.op = CF_OP_MEM_STREAM0_BUF0;
+ for (i = 0, j = 0; i < noutput; i++, j++) {
+ memset(&output[j], 0, sizeof(struct r600_bytecode_output));
+ output[j].gpr = shader->output[i].gpr;
+ output[j].elem_size = 3;
+ output[j].swizzle_x = 0;
+ output[j].swizzle_y = 1;
+ output[j].swizzle_z = 2;
+ output[j].swizzle_w = 3;
+ output[j].burst_count = 1;
+ output[j].type = -1;
+ output[j].op = CF_OP_EXPORT;
+ switch (ctx.type) {
+ case TGSI_PROCESSOR_VERTEX:
+ switch (shader->output[i].name) {
+ case TGSI_SEMANTIC_POSITION:
+ output[j].array_base = 60;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ pos_emitted = true;
break;
- case 1:
- output.op = CF_OP_MEM_STREAM0_BUF1;
+
+ case TGSI_SEMANTIC_PSIZE:
+ output[j].array_base = 61;
+ output[j].swizzle_y = 7;
+ output[j].swizzle_z = 7;
+ output[j].swizzle_w = 7;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ pos_emitted = true;
break;
- case 2:
- output.op = CF_OP_MEM_STREAM0_BUF2;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ output[j].array_base = 61;
+ output[j].swizzle_x = 7;
+ output[j].swizzle_y = 0;
+ output[j].swizzle_z = 7;
+ output[j].swizzle_w = 7;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ pos_emitted = true;
break;
- case 3:
- output.op = CF_OP_MEM_STREAM0_BUF3;
+ case TGSI_SEMANTIC_LAYER:
+ /* spi_sid is 0 for outputs that are
+ * not consumed by PS */
+ if (shader->output[i].spi_sid) {
+ output[j].array_base = next_param_base++;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+ j++;
+ memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
+ }
+ output[j].array_base = 61;
+ output[j].swizzle_x = 7;
+ output[j].swizzle_y = 7;
+ output[j].swizzle_z = 0;
+ output[j].swizzle_w = 7;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ pos_emitted = true;
break;
- }
- } else {
- switch (so.output[i].output_buffer) {
- case 0:
- output.op = CF_OP_MEM_STREAM0;
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ /* spi_sid is 0 for outputs that are
+ * not consumed by PS */
+ if (shader->output[i].spi_sid) {
+ output[j].array_base = next_param_base++;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+ j++;
+ memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
+ }
+ output[j].array_base = 61;
+ output[j].swizzle_x = 7;
+ output[j].swizzle_y = 7;
+ output[j].swizzle_z = 7;
+ output[j].swizzle_w = 0;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ pos_emitted = true;
break;
- case 1:
- output.op = CF_OP_MEM_STREAM1;
+ case TGSI_SEMANTIC_CLIPVERTEX:
+ j--;
break;
- case 2:
- output.op = CF_OP_MEM_STREAM2;
+ case TGSI_SEMANTIC_CLIPDIST:
+ output[j].array_base = next_clip_base++;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ pos_emitted = true;
+ /* spi_sid is 0 for clipdistance outputs that were generated
+ * for clipvertex - we don't need to pass them to PS */
+ if (shader->output[i].spi_sid) {
+ j++;
+ /* duplicate it as PARAM to pass to the pixel shader */
+ memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
+ output[j].array_base = next_param_base++;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+ }
break;
- case 3:
- output.op = CF_OP_MEM_STREAM3;
+ case TGSI_SEMANTIC_FOG:
+ output[j].swizzle_y = 4; /* 0 */
+ output[j].swizzle_z = 4; /* 0 */
+ output[j].swizzle_w = 5; /* 1 */
break;
}
- }
- r = r600_bytecode_add_output(ctx.bc, &output);
- if (r)
- goto out_err;
- }
- }
-
- /* export output */
- for (i = 0, j = 0; i < noutput; i++, j++) {
- memset(&output[j], 0, sizeof(struct r600_bytecode_output));
- output[j].gpr = shader->output[i].gpr;
- output[j].elem_size = 3;
- output[j].swizzle_x = 0;
- output[j].swizzle_y = 1;
- output[j].swizzle_z = 2;
- output[j].swizzle_w = 3;
- output[j].burst_count = 1;
- output[j].barrier = 1;
- output[j].type = -1;
- output[j].op = CF_OP_EXPORT;
- switch (ctx.type) {
- case TGSI_PROCESSOR_VERTEX:
- switch (shader->output[i].name) {
- case TGSI_SEMANTIC_POSITION:
- output[j].array_base = next_pos_base++;
- output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
- break;
-
- case TGSI_SEMANTIC_PSIZE:
- output[j].array_base = next_pos_base++;
- output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
- break;
- case TGSI_SEMANTIC_CLIPVERTEX:
- j--;
- break;
- case TGSI_SEMANTIC_CLIPDIST:
- output[j].array_base = next_pos_base++;
- output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
- /* spi_sid is 0 for clipdistance outputs that were generated
- * for clipvertex - we don't need to pass them to PS */
- if (shader->output[i].spi_sid) {
- j++;
- /* duplicate it as PARAM to pass to the pixel shader */
- memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
- output[j].array_base = next_param_base++;
- output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
- }
- break;
- case TGSI_SEMANTIC_FOG:
- output[j].swizzle_y = 4; /* 0 */
- output[j].swizzle_z = 4; /* 0 */
- output[j].swizzle_w = 5; /* 1 */
break;
- }
- break;
- case TGSI_PROCESSOR_FRAGMENT:
- if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
- /* never export more colors than the number of CBs */
- if (next_pixel_base && next_pixel_base >= key.nr_cbufs) {
- /* skip export */
- j--;
- continue;
- }
- output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
- output[j].array_base = next_pixel_base++;
- output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
- shader->nr_ps_color_exports++;
- if (shader->fs_write_all && (rscreen->chip_class >= EVERGREEN)) {
- for (k = 1; k < key.nr_cbufs; k++) {
- j++;
- memset(&output[j], 0, sizeof(struct r600_bytecode_output));
- output[j].gpr = shader->output[i].gpr;
- output[j].elem_size = 3;
- output[j].swizzle_x = 0;
- output[j].swizzle_y = 1;
- output[j].swizzle_z = 2;
- output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
- output[j].burst_count = 1;
- output[j].barrier = 1;
- output[j].array_base = next_pixel_base++;
- output[j].op = CF_OP_EXPORT;
- output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
- shader->nr_ps_color_exports++;
+ case TGSI_PROCESSOR_FRAGMENT:
+ if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
+ /* never export more colors than the number of CBs */
+ if (shader->output[i].sid >= max_color_exports) {
+ /* skip export */
+ j--;
+ continue;
}
+ output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
+ output[j].array_base = shader->output[i].sid;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+ shader->nr_ps_color_exports++;
+ if (shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN)) {
+ for (k = 1; k < max_color_exports; k++) {
+ j++;
+ memset(&output[j], 0, sizeof(struct r600_bytecode_output));
+ output[j].gpr = shader->output[i].gpr;
+ output[j].elem_size = 3;
+ output[j].swizzle_x = 0;
+ output[j].swizzle_y = 1;
+ output[j].swizzle_z = 2;
+ output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
+ output[j].burst_count = 1;
+ output[j].array_base = k;
+ output[j].op = CF_OP_EXPORT;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+ shader->nr_ps_color_exports++;
+ }
+ }
+ } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
+ output[j].array_base = 61;
+ output[j].swizzle_x = 2;
+ output[j].swizzle_y = 7;
+ output[j].swizzle_z = output[j].swizzle_w = 7;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+ } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
+ output[j].array_base = 61;
+ output[j].swizzle_x = 7;
+ output[j].swizzle_y = 1;
+ output[j].swizzle_z = output[j].swizzle_w = 7;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+ } else {
+ R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
+ r = -EINVAL;
+ goto out_err;
}
- } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
- output[j].array_base = 61;
- output[j].swizzle_x = 2;
- output[j].swizzle_y = 7;
- output[j].swizzle_z = output[j].swizzle_w = 7;
- output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
- } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
- output[j].array_base = 61;
- output[j].swizzle_x = 7;
- output[j].swizzle_y = 1;
- output[j].swizzle_z = output[j].swizzle_w = 7;
- output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
- } else {
- R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
+ break;
+ default:
+ R600_ERR("unsupported processor type %d\n", ctx.type);
r = -EINVAL;
goto out_err;
}
- break;
- default:
- R600_ERR("unsupported processor type %d\n", ctx.type);
- r = -EINVAL;
- goto out_err;
- }
- if (output[j].type==-1) {
- output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
- output[j].array_base = next_param_base++;
+ if (output[j].type==-1) {
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+ output[j].array_base = next_param_base++;
+ }
}
- }
- /* add fake position export */
- if (ctx.type == TGSI_PROCESSOR_VERTEX && next_pos_base == 60) {
+ /* add fake position export */
+ if (ctx.type == TGSI_PROCESSOR_VERTEX && pos_emitted == false) {
memset(&output[j], 0, sizeof(struct r600_bytecode_output));
output[j].gpr = 0;
output[j].elem_size = 3;
output[j].swizzle_z = 7;
output[j].swizzle_w = 7;
output[j].burst_count = 1;
- output[j].barrier = 1;
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
- output[j].array_base = next_pos_base;
+ output[j].array_base = 60;
output[j].op = CF_OP_EXPORT;
j++;
- }
+ }
- /* add fake param output for vertex shader if no param is exported */
- if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) {
+ /* add fake param output for vertex shader if no param is exported */
+ if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) {
memset(&output[j], 0, sizeof(struct r600_bytecode_output));
output[j].gpr = 0;
output[j].elem_size = 3;
output[j].swizzle_z = 7;
output[j].swizzle_w = 7;
output[j].burst_count = 1;
- output[j].barrier = 1;
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
output[j].array_base = 0;
output[j].op = CF_OP_EXPORT;
j++;
- }
+ }
+
+ /* add fake pixel export */
+ if (ctx.type == TGSI_PROCESSOR_FRAGMENT && shader->nr_ps_color_exports == 0) {
+ memset(&output[j], 0, sizeof(struct r600_bytecode_output));
+ output[j].gpr = 0;
+ output[j].elem_size = 3;
+ output[j].swizzle_x = 7;
+ output[j].swizzle_y = 7;
+ output[j].swizzle_z = 7;
+ output[j].swizzle_w = 7;
+ output[j].burst_count = 1;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+ output[j].array_base = 0;
+ output[j].op = CF_OP_EXPORT;
+ j++;
+ }
+
+ noutput = j;
- /* add fake pixel export */
- if (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0) {
- memset(&output[j], 0, sizeof(struct r600_bytecode_output));
- output[j].gpr = 0;
- output[j].elem_size = 3;
- output[j].swizzle_x = 7;
- output[j].swizzle_y = 7;
- output[j].swizzle_z = 7;
- output[j].swizzle_w = 7;
- output[j].burst_count = 1;
- output[j].barrier = 1;
- output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
- output[j].array_base = 0;
- output[j].op = CF_OP_EXPORT;
- j++;
- }
-
- noutput = j;
-
- /* set export done on last export of each type */
- for (i = noutput - 1, output_done = 0; i >= 0; i--) {
- if (ctx.bc->chip_class < CAYMAN) {
- if (i == (noutput - 1)) {
- output[i].end_of_program = 1;
+ /* set export done on last export of each type */
+ for (i = noutput - 1, output_done = 0; i >= 0; i--) {
+ if (!(output_done & (1 << output[i].type))) {
+ output_done |= (1 << output[i].type);
+ output[i].op = CF_OP_EXPORT_DONE;
}
}
- if (!(output_done & (1 << output[i].type))) {
- output_done |= (1 << output[i].type);
- output[i].op = CF_OP_EXPORT_DONE;
+ /* add output to bytecode */
+ if (!use_llvm) {
+ for (i = 0; i < noutput; i++) {
+ r = r600_bytecode_add_output(ctx.bc, &output[i]);
+ if (r)
+ goto out_err;
+ }
}
}
- /* add output to bytecode */
+
+ /* add program end */
if (!use_llvm) {
- for (i = 0; i < noutput; i++) {
- r = r600_bytecode_add_output(ctx.bc, &output[i]);
- if (r)
- goto out_err;
+ if (ctx.bc->chip_class == CAYMAN)
+ cm_bytecode_add_cf_end(ctx.bc);
+ else {
+ const struct cf_op_info *last = NULL;
+
+ if (ctx.bc->cf_last)
+ last = r600_isa_cf(ctx.bc->cf_last->op);
+
+ /* alu clause instructions don't have EOP bit, so add NOP */
+ if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS)
+ r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
+
+ ctx.bc->cf_last->end_of_program = 1;
}
}
- /* add program end */
- if (!use_llvm && ctx.bc->chip_class == CAYMAN)
- cm_bytecode_add_cf_end(ctx.bc);
/* check GPR limit - we have 124 = 128 - 4
* (4 are reserved as alu clause temporary registers) */
goto out_err;
}
+ if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
+ if ((r = generate_gs_copy_shader(rctx, pipeshader, &so)))
+ return r;
+ }
+
free(ctx.literals);
tgsi_parse_free(&ctx.parse);
return 0;
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_alu alu;
- int i, j, r;
- int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+ unsigned write_mask = inst->Dst[0].Register.WriteMask;
+ int i, j, r, lasti = tgsi_last_instruction(write_mask);
+ /* use temp register if trans_only and more than one dst component */
+ int use_tmp = trans_only && (write_mask ^ (1 << lasti));
- for (i = 0; i < lasti + 1; i++) {
- if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ for (i = 0; i <= lasti; i++) {
+ if (!(write_mask & (1 << i)))
continue;
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (use_tmp) {
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ } else
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.op = ctx->inst_info->op;
if (!swap) {
if (r)
return r;
}
+
+ if (use_tmp) {
+ /* move result from temp to dst */
+ for (i = 0; i <= lasti; i++) {
+ if (!(write_mask & (1 << i)))
+ continue;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = i;
+ alu.last = (i == lasti);
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ }
return 0;
}
alu.src[0].sel = V_SQ_ALU_SRC_0;
- if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
+ if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILL) {
alu.src[1].sel = V_SQ_ALU_SRC_1;
alu.src[1].neg = 1;
} else {
unsigned write_mask = inst->Dst[0].Register.WriteMask;
int last_inst = tgsi_last_instruction(write_mask);
- /* tmp = -src */
+ /* tmp = -src */
+ for (i = 0; i < 4; i++) {
+ if (!(write_mask & (1<<i)))
+ continue;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_SUB_INT;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+
+ r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
+ alu.src[0].sel = V_SQ_ALU_SRC_0;
+
+ if (i == last_inst)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ /* dst = (src >= 0 ? src : tmp) */
+ for (i = 0; i < 4; i++) {
+ if (!(write_mask & (1<<i)))
+ continue;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP3_CNDGE_INT;
+ alu.is_op3 = 1;
+ alu.dst.write = 1;
+
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+ r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
+ alu.src[2].sel = ctx->temp_reg;
+ alu.src[2].chan = i;
+
+ if (i == last_inst)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static int tgsi_issg(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ int i, r;
+ unsigned write_mask = inst->Dst[0].Register.WriteMask;
+ int last_inst = tgsi_last_instruction(write_mask);
+
+ /* tmp = (src >= 0 ? src : -1) */
+ for (i = 0; i < 4; i++) {
+ if (!(write_mask & (1<<i)))
+ continue;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP3_CNDGE_INT;
+ alu.is_op3 = 1;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+ r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
+ alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT;
+
+ if (i == last_inst)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ /* dst = (tmp > 0 ? 1 : tmp) */
+ for (i = 0; i < 4; i++) {
+ if (!(write_mask & (1<<i)))
+ continue;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP3_CNDGT_INT;
+ alu.is_op3 = 1;
+ alu.dst.write = 1;
+
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = i;
+
+ alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
+
+ alu.src[2].sel = ctx->temp_reg;
+ alu.src[2].chan = i;
+
+ if (i == last_inst)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+
+
+static int tgsi_ssg(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ int i, r;
+
+ /* tmp = (src > 0 ? 1 : src) */
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP3_CNDGT;
+ alu.is_op3 = 1;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+ alu.src[1].sel = V_SQ_ALU_SRC_1;
+ r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
+
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ /* dst = (-tmp > 0 ? -1 : tmp) */
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP3_CNDGT;
+ alu.is_op3 = 1;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = i;
+ alu.src[0].neg = 1;
+
+ alu.src[1].sel = V_SQ_ALU_SRC_1;
+ alu.src[1].neg = 1;
+
+ alu.src[2].sel = ctx->temp_reg;
+ alu.src[2].chan = i;
+
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static int tgsi_bfi(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ int i, r, t1, t2;
+
+ unsigned write_mask = inst->Dst[0].Register.WriteMask;
+ int last_inst = tgsi_last_instruction(write_mask);
+
+ t1 = ctx->temp_reg;
+
+ for (i = 0; i < 4; i++) {
+ if (!(write_mask & (1<<i)))
+ continue;
+
+ /* create mask tmp */
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_BFM_INT;
+ alu.dst.sel = t1;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ alu.last = i == last_inst;
+
+ r600_bytecode_src(&alu.src[0], &ctx->src[3], i);
+ r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ t2 = r600_get_temp(ctx);
+
for (i = 0; i < 4; i++) {
if (!(write_mask & (1<<i)))
continue;
+ /* shift insert left */
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP2_SUB_INT;
-
- alu.dst.sel = ctx->temp_reg;
+ alu.op = ALU_OP2_LSHL_INT;
+ alu.dst.sel = t2;
alu.dst.chan = i;
alu.dst.write = 1;
+ alu.last = i == last_inst;
- r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
- alu.src[0].sel = V_SQ_ALU_SRC_0;
+ r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
+ r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
- if (i == last_inst)
- alu.last = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
}
- /* dst = (src >= 0 ? src : tmp) */
for (i = 0; i < 4; i++) {
if (!(write_mask & (1<<i)))
continue;
+ /* actual bitfield insert */
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP3_CNDGE_INT;
+ alu.op = ALU_OP3_BFI_INT;
alu.is_op3 = 1;
- alu.dst.write = 1;
-
tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ alu.last = i == last_inst;
- r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
- r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
- alu.src[2].sel = ctx->temp_reg;
- alu.src[2].chan = i;
+ alu.src[0].sel = t1;
+ alu.src[0].chan = i;
+ alu.src[1].sel = t2;
+ alu.src[1].chan = i;
+ r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
- if (i == last_inst)
- alu.last = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
}
+
return 0;
}
-static int tgsi_issg(struct r600_shader_ctx *ctx)
+static int tgsi_msb(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_alu alu;
- int i, r;
+ int i, r, t1, t2;
+
unsigned write_mask = inst->Dst[0].Register.WriteMask;
int last_inst = tgsi_last_instruction(write_mask);
- /* tmp = (src >= 0 ? src : -1) */
+ assert(ctx->inst_info->op == ALU_OP1_FFBH_INT ||
+ ctx->inst_info->op == ALU_OP1_FFBH_UINT);
+
+ t1 = ctx->temp_reg;
+
+ /* bit position is indexed from lsb by TGSI, and from msb by the hardware */
for (i = 0; i < 4; i++) {
if (!(write_mask & (1<<i)))
continue;
+ /* t1 = FFBH_INT / FFBH_UINT */
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP3_CNDGE_INT;
- alu.is_op3 = 1;
-
- alu.dst.sel = ctx->temp_reg;
+ alu.op = ctx->inst_info->op;
+ alu.dst.sel = t1;
alu.dst.chan = i;
alu.dst.write = 1;
+ alu.last = i == last_inst;
r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
- r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
- alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT;
- if (i == last_inst)
- alu.last = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
}
- /* dst = (tmp > 0 ? 1 : tmp) */
+ t2 = r600_get_temp(ctx);
+
for (i = 0; i < 4; i++) {
if (!(write_mask & (1<<i)))
continue;
+ /* t2 = 31 - t1 */
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP3_CNDGT_INT;
- alu.is_op3 = 1;
+ alu.op = ALU_OP2_SUB_INT;
+ alu.dst.sel = t2;
+ alu.dst.chan = i;
alu.dst.write = 1;
+ alu.last = i == last_inst;
- tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
-
- alu.src[0].sel = ctx->temp_reg;
- alu.src[0].chan = i;
-
- alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
-
- alu.src[2].sel = ctx->temp_reg;
- alu.src[2].chan = i;
+ alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[0].value = 31;
+ alu.src[1].sel = t1;
+ alu.src[1].chan = i;
- if (i == last_inst)
- alu.last = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
}
- return 0;
-}
-
-
-static int tgsi_ssg(struct r600_shader_ctx *ctx)
-{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bytecode_alu alu;
- int i, r;
-
- /* tmp = (src > 0 ? 1 : src) */
for (i = 0; i < 4; i++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP3_CNDGT;
- alu.is_op3 = 1;
-
- alu.dst.sel = ctx->temp_reg;
- alu.dst.chan = i;
-
- r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
- alu.src[1].sel = V_SQ_ALU_SRC_1;
- r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
-
- if (i == 3)
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
+ if (!(write_mask & (1<<i)))
+ continue;
- /* dst = (-tmp > 0 ? -1 : tmp) */
- for (i = 0; i < 4; i++) {
+ /* result = t1 >= 0 ? t2 : t1 */
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP3_CNDGT;
+ alu.op = ALU_OP3_CNDGE_INT;
alu.is_op3 = 1;
tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ alu.last = i == last_inst;
- alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].sel = t1;
alu.src[0].chan = i;
- alu.src[0].neg = 1;
-
- alu.src[1].sel = V_SQ_ALU_SRC_1;
- alu.src[1].neg = 1;
-
- alu.src[2].sel = ctx->temp_reg;
+ alu.src[1].sel = t2;
+ alu.src[1].chan = i;
+ alu.src[2].sel = t1;
alu.src[2].chan = i;
- if (i == 3)
- alu.last = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
}
+
return 0;
}
vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; /* SEL_Z */
vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; /* SEL_W */
vtx.use_const_fields = 1;
- vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
return r;
(inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA);
+ bool txf_add_offsets = inst->Texture.NumOffsets &&
+ inst->Instruction.Opcode == TGSI_OPCODE_TXF &&
+ inst->Texture.Texture != TGSI_TEXTURE_BUFFER;
+
/* Texture fetch instructions can only use gprs as source.
* Also they cannot negate the source or take the absolute value */
const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
tgsi_tex_src_requires_loading(ctx, 0)) ||
- read_compressed_msaa;
+ read_compressed_msaa || txf_add_offsets;
+
boolean src_loaded = FALSE;
unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1;
int8_t offset_x = 0, offset_y = 0, offset_z = 0;
if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
- inst->Instruction.Opcode == TGSI_OPCODE_TXL2)
+ inst->Instruction.Opcode == TGSI_OPCODE_TXL2 ||
+ inst->Instruction.Opcode == TGSI_OPCODE_TG4)
sampler_src_reg = 2;
src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
}
}
- if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
- /* get offset values */
- if (inst->Texture.NumOffsets) {
- assert(inst->Texture.NumOffsets == 1);
-
- offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
- offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
- offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
- }
- } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
/* TGSI moves the sampler to src reg 3 for TXD */
sampler_src_reg = 3;
src_gpr = ctx->temp_reg;
}
+ /* get offset values */
+ if (inst->Texture.NumOffsets) {
+ assert(inst->Texture.NumOffsets == 1);
+
+ /* The texture offset feature doesn't work with the TXF instruction
+ * and must be emulated by adding the offset to the texture coordinates. */
+ if (txf_add_offsets) {
+ const struct tgsi_texture_offset *off = inst->TexOffsets;
+
+ switch (inst->Texture.Texture) {
+ case TGSI_TEXTURE_3D:
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_ADD_INT;
+ alu.src[0].sel = src_gpr;
+ alu.src[0].chan = 2;
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleZ];
+ alu.dst.sel = src_gpr;
+ alu.dst.chan = 2;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ /* fall through */
+
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_RECT:
+ case TGSI_TEXTURE_SHADOWRECT:
+ case TGSI_TEXTURE_2D_ARRAY:
+ case TGSI_TEXTURE_SHADOW2D_ARRAY:
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_ADD_INT;
+ alu.src[0].sel = src_gpr;
+ alu.src[0].chan = 1;
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleY];
+ alu.dst.sel = src_gpr;
+ alu.dst.chan = 1;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ /* fall through */
+
+ case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_SHADOW1D:
+ case TGSI_TEXTURE_1D_ARRAY:
+ case TGSI_TEXTURE_SHADOW1D_ARRAY:
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_ADD_INT;
+ alu.src[0].sel = src_gpr;
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleX];
+ alu.dst.sel = src_gpr;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ break;
+ /* texture offsets do not apply to other texture targets */
+ }
+ } else {
+ offset_x = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
+ offset_y = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
+ offset_z = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
+ }
+ }
+
/* Obtain the sample index for reading a compressed MSAA color texture.
* To read the FMASK, we use the ldfptr instruction, which tells us
* where the samples are stored.
case FETCH_OP_SAMPLE_G:
opcode = FETCH_OP_SAMPLE_C_G;
break;
+ /* Texture gather variants */
+ case FETCH_OP_GATHER4:
+ tex.op = FETCH_OP_GATHER4_C;
+ break;
+ case FETCH_OP_GATHER4_O:
+ tex.op = FETCH_OP_GATHER4_C_O;
+ break;
}
}
tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
tex.src_gpr = src_gpr;
tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
- tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
- tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
- tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
- tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TG4) {
+ int8_t texture_component_select = ctx->literals[4 * inst->Src[1].Register.Index + inst->Src[1].Register.SwizzleX];
+ tex.inst_mod = texture_component_select;
+
+ /* GATHER4 result order is different from TGSI TG4 */
+ tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
+ tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
+ tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
+ tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
+ }
+ else if (inst->Instruction.Opcode == TGSI_OPCODE_LODQ) {
+ tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
+ tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
+ tex.dst_sel_z = 7;
+ tex.dst_sel_w = 7;
+ }
+ else {
+ tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
+ tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
+ tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
+ tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
+ }
+
if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ) {
tex.src_sel_x = 4;
tex.offset_x = offset_x;
tex.offset_y = offset_y;
- tex.offset_z = offset_z;
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TG4 &&
+ inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY) {
+ tex.offset_z = 0;
+ }
+ else {
+ tex.offset_z = offset_z;
+ }
/* Put the depth for comparison in W.
* TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W.
tex.coord_type_z = 0;
/* mask unused source components */
- if (opcode == FETCH_OP_SAMPLE) {
+ if (opcode == FETCH_OP_SAMPLE || opcode == FETCH_OP_GATHER4) {
switch (inst->Texture.Texture) {
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_alu alu;
int r;
+ int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
return -1;
}
- r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
- alu.last = 1;
- alu.dst.sel = ctx->bc->ar_reg;
- alu.dst.write = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ for (i = 0; i <= lasti; ++i) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+ alu.last = i == lasti;
+ alu.dst.sel = ctx->bc->ar_reg;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
ctx->bc->ar_loaded = 0;
return 0;
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_alu alu;
int r;
+ int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ARL:
memset(&alu, 0, sizeof(alu));
alu.op = ALU_OP1_FLOOR;
- r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->bc->ar_reg;
alu.dst.write = 1;
- alu.last = 1;
-
- if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
- return r;
+ for (i = 0; i <= lasti; ++i) {
+ if (inst->Dst[0].Register.WriteMask & (1 << i)) {
+ alu.dst.chan = i;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+ alu.last = i == lasti;
+ if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
+ return r;
+ }
+ }
memset(&alu, 0, sizeof(alu));
alu.op = ALU_OP1_FLT_TO_INT;
alu.src[0].sel = ctx->bc->ar_reg;
alu.dst.sel = ctx->bc->ar_reg;
alu.dst.write = 1;
- alu.last = 1;
-
- if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
- return r;
+ /* FLT_TO_INT is trans-only on r600/r700 */
+ alu.last = TRUE;
+ for (i = 0; i <= lasti; ++i) {
+ alu.dst.chan = i;
+ alu.src[0].chan = i;
+ if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
+ return r;
+ }
break;
case TGSI_OPCODE_ARR:
memset(&alu, 0, sizeof(alu));
alu.op = ALU_OP1_FLT_TO_INT;
- r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->bc->ar_reg;
alu.dst.write = 1;
- alu.last = 1;
-
- if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
- return r;
+ /* FLT_TO_INT is trans-only on r600/r700 */
+ alu.last = TRUE;
+ for (i = 0; i <= lasti; ++i) {
+ if (inst->Dst[0].Register.WriteMask & (1 << i)) {
+ alu.dst.chan = i;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+ if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
+ return r;
+ }
+ }
break;
case TGSI_OPCODE_UARL:
memset(&alu, 0, sizeof(alu));
alu.op = ALU_OP1_MOV;
- r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->bc->ar_reg;
alu.dst.write = 1;
- alu.last = 1;
-
- if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
- return r;
+ for (i = 0; i <= lasti; ++i) {
+ if (inst->Dst[0].Register.WriteMask & (1 << i)) {
+ alu.dst.chan = i;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+ alu.last = i == lasti;
+ if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
+ return r;
+ }
+ }
break;
default:
assert(0);
return 0;
}
+static int tgsi_loop_breakc(struct r600_shader_ctx *ctx)
+{
+ int r;
+ unsigned int fscp;
+
+ for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
+ {
+ if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
+ break;
+ }
+ if (fscp == 0) {
+ R600_ERR("BREAKC not inside loop/endloop pair\n");
+ return -EINVAL;
+ }
+
+ if (ctx->bc->chip_class == EVERGREEN &&
+ ctx->bc->family != CHIP_CYPRESS &&
+ ctx->bc->family != CHIP_JUNIPER) {
+ /* HW bug: ALU_BREAK does not save the active mask correctly */
+ r = tgsi_uif(ctx);
+ if (r)
+ return r;
+
+ r = r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_BREAK);
+ if (r)
+ return r;
+ fc_set_mid(ctx, fscp);
+
+ return tgsi_endif(ctx);
+ } else {
+ r = emit_logic_pred(ctx, ALU_OP2_PRED_SETE_INT, CF_OP_ALU_BREAK);
+ if (r)
+ return r;
+ fc_set_mid(ctx, fscp);
+ }
+
+ return 0;
+}
+
static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
{
unsigned int fscp;
return 0;
}
+static int tgsi_gs_emit(struct r600_shader_ctx *ctx)
+{
+ if (ctx->inst_info->op == CF_OP_EMIT_VERTEX)
+ emit_gs_ring_writes(ctx, TRUE);
+
+ return r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
+}
+
static int tgsi_umad(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
{TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2},
{TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp},
{TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
- {20, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
{TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported},
/* gap */
{22, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_COS, 0, ALU_OP1_COS, tgsi_trig},
{TGSI_OPCODE_DDX, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
{TGSI_OPCODE_DDY, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
- {TGSI_OPCODE_KILP, 0, ALU_OP2_KILLGT, tgsi_kill}, /* predicated kill */
+ {TGSI_OPCODE_KILL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */
{TGSI_OPCODE_PK2H, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_PK2US, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_PK4B, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex},
{TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
{TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
- {TGSI_OPCODE_EMIT, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ENDPRIM, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_EMIT, 0, CF_OP_EMIT_VERTEX, tgsi_gs_emit},
+ {TGSI_OPCODE_ENDPRIM, 0, CF_OP_CUT_VERTEX, tgsi_gs_emit},
{TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop},
{TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop},
{105, 0, ALU_OP0_NOP, tgsi_unsupported},
{106, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
- {108, 0, ALU_OP0_NOP, tgsi_unsupported},
- {109, 0, ALU_OP0_NOP, tgsi_unsupported},
- {110, 0, ALU_OP0_NOP, tgsi_unsupported},
- {111, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2},
+ {TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2},
+ {TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap},
+ {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap},
{TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported},
/* gap */
{114, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_KIL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
+ {TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_loop_breakc},
+ {TGSI_OPCODE_KILL_IF, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
{TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */
/* gap */
{118, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_TEX2, 0, FETCH_OP_SAMPLE, tgsi_tex},
{TGSI_OPCODE_TXB2, 0, FETCH_OP_SAMPLE_LB, tgsi_tex},
{TGSI_OPCODE_TXL2, 0, FETCH_OP_SAMPLE_L, tgsi_tex},
+ {TGSI_OPCODE_IMUL_HI, 0, ALU_OP2_MULHI_INT, tgsi_op2_trans},
+ {TGSI_OPCODE_UMUL_HI, 0, ALU_OP2_MULHI_UINT, tgsi_op2_trans},
+ {TGSI_OPCODE_TG4, 0, FETCH_OP_GATHER4, tgsi_unsupported},
+ {TGSI_OPCODE_LODQ, 0, FETCH_OP_GET_LOD, tgsi_unsupported},
+ {TGSI_OPCODE_IBFE, 1, ALU_OP3_BFE_INT, tgsi_unsupported},
+ {TGSI_OPCODE_UBFE, 1, ALU_OP3_BFE_UINT, tgsi_unsupported},
+ {TGSI_OPCODE_BFI, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_BREV, 0, ALU_OP1_BFREV_INT, tgsi_unsupported},
+ {TGSI_OPCODE_POPC, 0, ALU_OP1_BCNT_INT, tgsi_unsupported},
+ {TGSI_OPCODE_LSB, 0, ALU_OP1_FFBL_INT, tgsi_unsupported},
+ {TGSI_OPCODE_IMSB, 0, ALU_OP1_FFBH_INT, tgsi_unsupported},
+ {TGSI_OPCODE_UMSB, 0, ALU_OP1_FFBH_UINT, tgsi_unsupported},
{TGSI_OPCODE_LAST, 0, ALU_OP0_NOP, tgsi_unsupported},
};
{TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2},
{TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp},
{TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
- {20, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
{TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported},
/* gap */
{22, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_COS, 0, ALU_OP1_COS, tgsi_trig},
{TGSI_OPCODE_DDX, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
{TGSI_OPCODE_DDY, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
- {TGSI_OPCODE_KILP, 0, ALU_OP2_KILLGT, tgsi_kill}, /* predicated kill */
+ {TGSI_OPCODE_KILL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */
{TGSI_OPCODE_PK2H, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_PK2US, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_PK4B, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex},
{TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
{TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
- {TGSI_OPCODE_EMIT, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ENDPRIM, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_EMIT, 0, CF_OP_EMIT_VERTEX, tgsi_gs_emit},
+ {TGSI_OPCODE_ENDPRIM, 0, CF_OP_CUT_VERTEX, tgsi_gs_emit},
{TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop},
{TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop},
{105, 0, ALU_OP0_NOP, tgsi_unsupported},
{106, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
- {108, 0, ALU_OP0_NOP, tgsi_unsupported},
- {109, 0, ALU_OP0_NOP, tgsi_unsupported},
- {110, 0, ALU_OP0_NOP, tgsi_unsupported},
- {111, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2},
+ {TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2},
+ {TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap},
+ {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap},
{TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported},
/* gap */
{114, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_KIL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
+ {TGSI_OPCODE_KILL_IF, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
{TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */
/* gap */
{118, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_TEX2, 0, FETCH_OP_SAMPLE, tgsi_tex},
{TGSI_OPCODE_TXB2, 0, FETCH_OP_SAMPLE_LB, tgsi_tex},
{TGSI_OPCODE_TXL2, 0, FETCH_OP_SAMPLE_L, tgsi_tex},
+ {TGSI_OPCODE_IMUL_HI, 0, ALU_OP2_MULHI_INT, tgsi_op2_trans},
+ {TGSI_OPCODE_UMUL_HI, 0, ALU_OP2_MULHI_UINT, tgsi_op2_trans},
+ {TGSI_OPCODE_TG4, 0, FETCH_OP_GATHER4, tgsi_tex},
+ {TGSI_OPCODE_LODQ, 0, FETCH_OP_GET_LOD, tgsi_tex},
+ {TGSI_OPCODE_IBFE, 1, ALU_OP3_BFE_INT, tgsi_op3},
+ {TGSI_OPCODE_UBFE, 1, ALU_OP3_BFE_UINT, tgsi_op3},
+ {TGSI_OPCODE_BFI, 0, ALU_OP0_NOP, tgsi_bfi},
+ {TGSI_OPCODE_BREV, 0, ALU_OP1_BFREV_INT, tgsi_op2},
+ {TGSI_OPCODE_POPC, 0, ALU_OP1_BCNT_INT, tgsi_op2},
+ {TGSI_OPCODE_LSB, 0, ALU_OP1_FFBL_INT, tgsi_op2},
+ {TGSI_OPCODE_IMSB, 0, ALU_OP1_FFBH_INT, tgsi_msb},
+ {TGSI_OPCODE_UMSB, 0, ALU_OP1_FFBH_UINT, tgsi_msb},
{TGSI_OPCODE_LAST, 0, ALU_OP0_NOP, tgsi_unsupported},
};
{TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2},
{TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp},
{TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
- {20, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, cayman_emit_float_instr},
{TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported},
/* gap */
{22, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_COS, 0, ALU_OP1_COS, cayman_trig},
{TGSI_OPCODE_DDX, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
{TGSI_OPCODE_DDY, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
- {TGSI_OPCODE_KILP, 0, ALU_OP2_KILLGT, tgsi_kill}, /* predicated kill */
+ {TGSI_OPCODE_KILL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */
{TGSI_OPCODE_PK2H, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_PK2US, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_PK4B, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex},
{TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
{TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
- {TGSI_OPCODE_EMIT, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ENDPRIM, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_EMIT, 0, CF_OP_EMIT_VERTEX, tgsi_gs_emit},
+ {TGSI_OPCODE_ENDPRIM, 0, CF_OP_CUT_VERTEX, tgsi_gs_emit},
{TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop},
{TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop},
{106, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported},
/* gap */
- {108, 0, ALU_OP0_NOP, tgsi_unsupported},
- {109, 0, ALU_OP0_NOP, tgsi_unsupported},
- {110, 0, ALU_OP0_NOP, tgsi_unsupported},
- {111, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2},
+ {TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2},
+ {TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap},
+ {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap},
{TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported},
/* gap */
{114, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_KIL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
+ {TGSI_OPCODE_KILL_IF, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
{TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */
/* gap */
{118, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_TEX2, 0, FETCH_OP_SAMPLE, tgsi_tex},
{TGSI_OPCODE_TXB2, 0, FETCH_OP_SAMPLE_LB, tgsi_tex},
{TGSI_OPCODE_TXL2, 0, FETCH_OP_SAMPLE_L, tgsi_tex},
+ {TGSI_OPCODE_IMUL_HI, 0, ALU_OP2_MULHI_INT, cayman_mul_int_instr},
+ {TGSI_OPCODE_UMUL_HI, 0, ALU_OP2_MULHI_UINT, cayman_mul_int_instr},
+ {TGSI_OPCODE_TG4, 0, FETCH_OP_GATHER4, tgsi_tex},
+ {TGSI_OPCODE_LODQ, 0, FETCH_OP_GET_LOD, tgsi_tex},
+ {TGSI_OPCODE_IBFE, 1, ALU_OP3_BFE_INT, tgsi_op3},
+ {TGSI_OPCODE_UBFE, 1, ALU_OP3_BFE_UINT, tgsi_op3},
+ {TGSI_OPCODE_BFI, 0, ALU_OP0_NOP, tgsi_bfi},
+ {TGSI_OPCODE_BREV, 0, ALU_OP1_BFREV_INT, tgsi_op2},
+ {TGSI_OPCODE_POPC, 0, ALU_OP1_BCNT_INT, tgsi_op2},
+ {TGSI_OPCODE_LSB, 0, ALU_OP1_FFBL_INT, tgsi_op2},
+ {TGSI_OPCODE_IMSB, 0, ALU_OP1_FFBH_INT, tgsi_msb},
+ {TGSI_OPCODE_UMSB, 0, ALU_OP1_FFBH_UINT, tgsi_msb},
{TGSI_OPCODE_LAST, 0, ALU_OP0_NOP, tgsi_unsupported},
};