+static int r600_fetch_tess_io_info(struct r600_shader_ctx *ctx)
+{
+ int r;
+ struct r600_bytecode_vtx vtx;
+ int temp_val = ctx->temp_reg;
+ /* need to store the TCS output somewhere */
+ r = single_alu_op2(ctx, ALU_OP1_MOV,
+ temp_val, 0,
+ V_SQ_ALU_SRC_LITERAL, 0,
+ 0, 0);
+ if (r)
+ return r;
+
+ /* used by VS/TCS */
+ if (ctx->tess_input_info) {
+ /* fetch tcs input values into resv space */
+ memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
+ vtx.op = FETCH_OP_VFETCH;
+ vtx.buffer_id = R600_LDS_INFO_CONST_BUFFER;
+ vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
+ vtx.mega_fetch_count = 16;
+ vtx.data_format = FMT_32_32_32_32;
+ vtx.num_format_all = 2;
+ vtx.format_comp_all = 1;
+ vtx.use_const_fields = 0;
+ vtx.endian = r600_endian_swap(32);
+ vtx.srf_mode_all = 1;
+ vtx.offset = 0;
+ vtx.dst_gpr = ctx->tess_input_info;
+ vtx.dst_sel_x = 0;
+ vtx.dst_sel_y = 1;
+ vtx.dst_sel_z = 2;
+ vtx.dst_sel_w = 3;
+ vtx.src_gpr = temp_val;
+ vtx.src_sel_x = 0;
+
+ r = r600_bytecode_add_vtx(ctx->bc, &vtx);
+ if (r)
+ return r;
+ }
+
+ /* used by TCS/TES */
+ if (ctx->tess_output_info) {
+ /* fetch tcs output values into resv space */
+ memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
+ vtx.op = FETCH_OP_VFETCH;
+ vtx.buffer_id = R600_LDS_INFO_CONST_BUFFER;
+ vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
+ vtx.mega_fetch_count = 16;
+ vtx.data_format = FMT_32_32_32_32;
+ vtx.num_format_all = 2;
+ vtx.format_comp_all = 1;
+ vtx.use_const_fields = 0;
+ vtx.endian = r600_endian_swap(32);
+ vtx.srf_mode_all = 1;
+ vtx.offset = 16;
+ vtx.dst_gpr = ctx->tess_output_info;
+ vtx.dst_sel_x = 0;
+ vtx.dst_sel_y = 1;
+ vtx.dst_sel_z = 2;
+ vtx.dst_sel_w = 3;
+ vtx.src_gpr = temp_val;
+ vtx.src_sel_x = 0;
+
+ r = r600_bytecode_add_vtx(ctx->bc, &vtx);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static int emit_lds_vs_writes(struct r600_shader_ctx *ctx)
+{
+ int i, j, r;
+ int temp_reg;
+
+ /* fetch tcs input values into input_vals */
+ ctx->tess_input_info = r600_get_temp(ctx);
+ ctx->tess_output_info = 0;
+ r = r600_fetch_tess_io_info(ctx);
+ if (r)
+ return r;
+
+ temp_reg = r600_get_temp(ctx);
+ /* dst reg contains LDS address stride * idx */
+ /* MUL vertexID, vertex_dw_stride */
+ r = single_alu_op2(ctx, ALU_OP2_MUL_UINT24,
+ temp_reg, 0,
+ ctx->tess_input_info, 1,
+ 0, 1); /* rel id in r0.y? */
+ if (r)
+ return r;
+
+ for (i = 0; i < ctx->shader->noutput; i++) {
+ struct r600_bytecode_alu alu;
+ int param = r600_get_lds_unique_index(ctx->shader->output[i].name, ctx->shader->output[i].sid);
+
+ if (param) {
+ r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
+ temp_reg, 1,
+ temp_reg, 0,
+ V_SQ_ALU_SRC_LITERAL, param * 16);
+ if (r)
+ return r;
+ }
+
+ r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
+ temp_reg, 2,
+ temp_reg, param ? 1 : 0,
+ V_SQ_ALU_SRC_LITERAL, 8);
+ if (r)
+ return r;
+
+
+ for (j = 0; j < 2; j++) {
+ int chan = (j == 1) ? 2 : (param ? 1 : 0);
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = LDS_OP3_LDS_WRITE_REL;
+ alu.src[0].sel = temp_reg;
+ alu.src[0].chan = chan;
+ alu.src[1].sel = ctx->shader->output[i].gpr;
+ alu.src[1].chan = j * 2;
+ alu.src[2].sel = ctx->shader->output[i].gpr;
+ alu.src[2].chan = (j * 2) + 1;
+ alu.last = 1;
+ alu.dst.chan = 0;
+ alu.lds_idx = 1;
+ alu.is_lds_idx_op = true;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ }
+ return 0;
+}
+
+static int r600_store_tcs_output(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ const struct tgsi_full_dst_register *dst = &inst->Dst[0];
+ int i, r, lasti;
+ int temp_reg = r600_get_temp(ctx);
+ struct r600_bytecode_alu alu;
+ unsigned write_mask = dst->Register.WriteMask;
+
+ if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT)
+ return 0;
+
+ r = get_lds_offset0(ctx, 1, temp_reg, dst->Register.Dimension ? false : true);
+ if (r)
+ return r;
+
+ /* the base address is now in temp.x */
+ r = r600_get_byte_address(ctx, temp_reg,
+ &inst->Dst[0], NULL, ctx->tess_output_info, 1);
+ if (r)
+ return r;
+
+ /* LDS write */
+ lasti = tgsi_last_instruction(write_mask);
+ for (i = 1; i <= lasti; i++) {
+
+ if (!(write_mask & (1 << i)))
+ continue;
+ r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
+ temp_reg, i,
+ temp_reg, 0,
+ V_SQ_ALU_SRC_LITERAL, 4 * i);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i <= lasti; i++) {
+ if (!(write_mask & (1 << i)))
+ continue;
+
+ if ((i == 0 && ((write_mask & 3) == 3)) ||
+ (i == 2 && ((write_mask & 0xc) == 0xc))) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = LDS_OP3_LDS_WRITE_REL;
+ alu.src[0].sel = temp_reg;
+ alu.src[0].chan = i;
+
+ alu.src[1].sel = dst->Register.Index;
+ alu.src[1].sel += ctx->file_offset[dst->Register.File];
+ alu.src[1].chan = i;
+
+ alu.src[2].sel = dst->Register.Index;
+ alu.src[2].sel += ctx->file_offset[dst->Register.File];
+ alu.src[2].chan = i + 1;
+ alu.lds_idx = 1;
+ alu.dst.chan = 0;
+ alu.last = 1;
+ alu.is_lds_idx_op = true;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ i += 1;
+ continue;
+ }
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = LDS_OP2_LDS_WRITE;
+ alu.src[0].sel = temp_reg;
+ alu.src[0].chan = i;
+
+ alu.src[1].sel = dst->Register.Index;
+ alu.src[1].sel += ctx->file_offset[dst->Register.File];
+ alu.src[1].chan = i;
+
+ alu.src[2].sel = V_SQ_ALU_SRC_0;
+ alu.dst.chan = 0;
+ alu.last = 1;
+ alu.is_lds_idx_op = true;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static int r600_tess_factor_read(struct r600_shader_ctx *ctx,
+ int output_idx)
+{
+ int param;
+ unsigned temp_reg = r600_get_temp(ctx);
+ unsigned name = ctx->shader->output[output_idx].name;
+ int dreg = ctx->shader->output[output_idx].gpr;
+ int r;
+
+ param = r600_get_lds_unique_index(name, 0);
+ r = get_lds_offset0(ctx, 1, temp_reg, true);
+ if (r)
+ return r;
+
+ r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
+ temp_reg, 0,
+ temp_reg, 0,
+ V_SQ_ALU_SRC_LITERAL, param * 16);
+ if (r)
+ return r;
+
+ do_lds_fetch_values(ctx, temp_reg, dreg);
+ return 0;
+}
+
+static int r600_emit_tess_factor(struct r600_shader_ctx *ctx)
+{
+ unsigned i;
+ int stride, outer_comps, inner_comps;
+ int tessinner_idx = -1, tessouter_idx = -1;
+ int r;
+ int temp_reg = r600_get_temp(ctx);
+ int treg[3] = {-1, -1, -1};
+ struct r600_bytecode_alu alu;
+ struct r600_bytecode_cf *cf_jump, *cf_pop;
+
+ /* only execute factor emission for invocation 0 */
+ /* PRED_SETE_INT __, R0.x, 0 */
+ memset(&alu, 0, sizeof(alu));
+ alu.op = ALU_OP2_PRED_SETE_INT;
+ alu.src[0].chan = 2;
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.execute_mask = 1;
+ alu.update_pred = 1;
+ alu.last = 1;
+ r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_PUSH_BEFORE);
+
+ r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP);
+ cf_jump = ctx->bc->cf_last;
+
+ treg[0] = r600_get_temp(ctx);
+ switch (ctx->shader->tcs_prim_mode) {
+ case PIPE_PRIM_LINES:
+ stride = 8; /* 2 dwords, 1 vec2 store */
+ outer_comps = 2;
+ inner_comps = 0;
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ stride = 16; /* 4 dwords, 1 vec4 store */
+ outer_comps = 3;
+ inner_comps = 1;
+ treg[1] = r600_get_temp(ctx);
+ break;
+ case PIPE_PRIM_QUADS:
+ stride = 24; /* 6 dwords, 2 stores (vec4 + vec2) */
+ outer_comps = 4;
+ inner_comps = 2;
+ treg[1] = r600_get_temp(ctx);
+ treg[2] = r600_get_temp(ctx);
+ break;
+ default:
+ assert(0);
+ return -1;
+ }
+
+ /* R0 is InvocationID, RelPatchID, PatchID, tf_base */
+ /* TF_WRITE takes index in R.x, value in R.y */
+ for (i = 0; i < ctx->shader->noutput; i++) {
+ if (ctx->shader->output[i].name == TGSI_SEMANTIC_TESSINNER)
+ tessinner_idx = i;
+ if (ctx->shader->output[i].name == TGSI_SEMANTIC_TESSOUTER)
+ tessouter_idx = i;
+ }
+
+ if (tessouter_idx == -1)
+ return -1;
+
+ if (tessinner_idx == -1 && inner_comps)
+ return -1;
+
+ if (tessouter_idx != -1) {
+ r = r600_tess_factor_read(ctx, tessouter_idx);
+ if (r)
+ return r;
+ }
+
+ if (tessinner_idx != -1) {
+ r = r600_tess_factor_read(ctx, tessinner_idx);
+ if (r)
+ return r;
+ }
+
+ /* r.x = tf_base(r0.w) + relpatchid(r0.y) * tf_stride */
+ /* r.x = relpatchid(r0.y) * tf_stride */
+
+ /* multiply incoming r0.y * stride - t.x = r0.y * stride */
+ /* add incoming r0.w to it: t.x = t.x + r0.w */
+ r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24,
+ temp_reg, 0,
+ 0, 1,
+ V_SQ_ALU_SRC_LITERAL, stride,
+ 0, 3);
+ if (r)
+ return r;
+
+ for (i = 0; i < outer_comps + inner_comps; i++) {
+ int out_idx = i >= outer_comps ? tessinner_idx : tessouter_idx;
+ int out_comp = i >= outer_comps ? i - outer_comps : i;
+
+ r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
+ treg[i / 2], (2 * (i % 2)),
+ temp_reg, 0,
+ V_SQ_ALU_SRC_LITERAL, 4 * i);
+ if (r)
+ return r;
+ r = single_alu_op2(ctx, ALU_OP1_MOV,
+ treg[i / 2], 1 + (2 * (i%2)),
+ ctx->shader->output[out_idx].gpr, out_comp,
+ 0, 0);
+ if (r)
+ return r;
+ }
+ for (i = 0; i < outer_comps + inner_comps; i++) {
+ struct r600_bytecode_gds gds;
+
+ memset(&gds, 0, sizeof(struct r600_bytecode_gds));
+ gds.src_gpr = treg[i / 2];
+ gds.src_sel_x = 2 * (i % 2);
+ gds.src_sel_y = 1 + (2 * (i % 2));
+ gds.src_sel_z = 4;
+ gds.dst_sel_x = 7;
+ gds.dst_sel_y = 7;
+ gds.dst_sel_z = 7;
+ gds.dst_sel_w = 7;
+ gds.op = FETCH_OP_TF_WRITE;
+ r = r600_bytecode_add_gds(ctx->bc, &gds);
+ if (r)
+ return r;
+ }
+
+ // Patch up jump label
+ r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP);
+ cf_pop = ctx->bc->cf_last;
+
+ cf_jump->cf_addr = cf_pop->id + 2;
+ cf_jump->pop_count = 1;
+ cf_pop->cf_addr = cf_pop->id + 2;
+ cf_pop->pop_count = 1;
+
+ return 0;
+}
+
+static int r600_shader_from_tgsi(struct r600_context *rctx,
+ struct r600_pipe_shader *pipeshader,
+ union r600_shader_key key)
+{
+ struct r600_screen *rscreen = rctx->screen;
+ struct r600_shader *shader = &pipeshader->shader;
+ struct tgsi_token *tokens = pipeshader->selector->tokens;
+ struct pipe_stream_output_info so = pipeshader->selector->so;
+ struct tgsi_full_immediate *immediate;
+ struct r600_shader_ctx ctx;
+ struct r600_bytecode_output output[32];
+ unsigned output_done, noutput;
+ unsigned opcode;
+ int i, j, k, r = 0;
+ int next_param_base = 0, next_clip_base;
+ int max_color_exports = MAX2(key.ps.nr_cbufs, 1);
+ bool indirect_gprs;
+ bool ring_outputs = false;
+ bool lds_outputs = false;
+ bool lds_inputs = false;
+ bool pos_emitted = false;
+