boolean clip_vertex_write;
unsigned cv_output;
unsigned edgeflag_output;
+ int cs_block_size_reg;
+ int cs_grid_size_reg;
+ bool cs_block_size_loaded, cs_grid_size_loaded;
int fragcoord_input;
- int native_integers;
int next_ring_offset;
int gs_out_ring_offset;
int gs_next_vertex;
d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) {
break; /* Already handled from allocate_system_value_inputs */
} else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
- if (!ctx->native_integers) {
- struct r600_bytecode_alu alu;
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-
- alu.op = ALU_OP1_INT_TO_FLT;
- alu.src[0].sel = 0;
- alu.src[0].chan = 3;
-
- alu.dst.sel = 0;
- alu.dst.chan = 3;
- alu.dst.write = 1;
- alu.last = 1;
-
- if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
- return r;
- }
break;
} else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
break;
vtx.num_format_all = 2;
vtx.format_comp_all = 1;
vtx.use_const_fields = 0;
- vtx.offset = 1; // first element is size of buffer
+ vtx.offset = 0;
+ vtx.endian = r600_endian_swap(32);
+ vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
+
+ r = r600_bytecode_add_vtx(ctx->bc, &vtx);
+ if (r)
+ return r;
+
+ return t1;
+}
+
+static int load_block_grid_size(struct r600_shader_ctx *ctx, bool load_block)
+{
+ struct r600_bytecode_vtx vtx;
+ int r, t1;
+
+ if (ctx->cs_block_size_loaded)
+ return ctx->cs_block_size_reg;
+ if (ctx->cs_grid_size_loaded)
+ return ctx->cs_grid_size_reg;
+
+ t1 = load_block ? ctx->cs_block_size_reg : ctx->cs_grid_size_reg;
+ struct r600_bytecode_alu alu;
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = V_SQ_ALU_SRC_0;
+ alu.dst.sel = t1;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
+ vtx.op = FETCH_OP_VFETCH;
+ vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER;
+ vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
+ vtx.src_gpr = t1;
+ vtx.src_sel_x = 0;
+
+ vtx.mega_fetch_count = 16;
+ vtx.dst_gpr = t1;
+ vtx.dst_sel_x = 0;
+ vtx.dst_sel_y = 1;
+ vtx.dst_sel_z = 2;
+ vtx.dst_sel_w = 7;
+ vtx.data_format = FMT_32_32_32_32;
+ vtx.num_format_all = 1;
+ vtx.format_comp_all = 0;
+ vtx.use_const_fields = 0;
+ vtx.offset = load_block ? 0 : 16; // first element is size of buffer
vtx.endian = r600_endian_swap(32);
vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
if (r)
return r;
+ if (load_block)
+ ctx->cs_block_size_loaded = true;
+ else
+ ctx->cs_grid_size_loaded = true;
return t1;
}
r600_src->swizzle[1] = 3;
r600_src->swizzle[2] = 3;
r600_src->swizzle[3] = 3;
+ } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_GRID_SIZE) {
+ r600_src->sel = load_block_grid_size(ctx, false);
+ } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_BLOCK_SIZE) {
+ r600_src->sel = load_block_grid_size(ctx, true);
}
} else {
if (tgsi_src->Register.Indirect)
}
static int r600_tess_factor_read(struct r600_shader_ctx *ctx,
- int output_idx)
+ int output_idx, int nc)
{
int param;
unsigned temp_reg = r600_get_temp(ctx);
if (r)
return r;
- do_lds_fetch_values(ctx, temp_reg, dreg, 0xf);
+ do_lds_fetch_values(ctx, temp_reg, dreg, ((1u << nc) - 1));
return 0;
}
return -1;
if (tessouter_idx != -1) {
- r = r600_tess_factor_read(ctx, tessouter_idx);
+ r = r600_tess_factor_read(ctx, tessouter_idx, outer_comps);
if (r)
return r;
}
if (tessinner_idx != -1) {
- r = r600_tess_factor_read(ctx, tessinner_idx);
+ r = r600_tess_factor_read(ctx, tessinner_idx, inner_comps);
if (r)
return r;
}
ctx.bc = &shader->bc;
ctx.shader = shader;
- ctx.native_integers = true;
r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family,
rscreen->has_compressed_msaa_texturing);
ctx.clip_vertex_write = 0;
ctx.thread_id_gpr_loaded = false;
+ ctx.cs_block_size_reg = -1;
+ ctx.cs_grid_size_reg = -1;
+ ctx.cs_block_size_loaded = false;
+ ctx.cs_grid_size_loaded = false;
+
shader->nr_ps_color_exports = 0;
shader->nr_ps_max_color_exports = 0;
if (add_tess_inout)
ctx.file_offset[TGSI_FILE_INPUT]+=2;
}
- if (ctx.type == PIPE_SHADER_COMPUTE)
+ if (ctx.type == PIPE_SHADER_COMPUTE) {
ctx.file_offset[TGSI_FILE_INPUT] = 2;
+ for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) {
+ if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_GRID_SIZE)
+ ctx.cs_grid_size_reg = ctx.file_offset[TGSI_FILE_INPUT]++;
+ if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_BLOCK_SIZE)
+ ctx.cs_block_size_reg = ctx.file_offset[TGSI_FILE_INPUT]++;
+ }
+ }
ctx.file_offset[TGSI_FILE_OUTPUT] =
ctx.file_offset[TGSI_FILE_INPUT] +
ctx.nliterals = 0;
ctx.literals = NULL;
+ ctx.max_driver_temp_used = 0;
shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
ctx.info.colors_written == 1;
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
int src_gpr, r, i;
int id = tgsi_tex_get_src_gpr(ctx, 1);
+ int sampler_index_mode = inst->Src[1].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
if (src_requires_loading) {
vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; /* SEL_Z */
vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; /* SEL_W */
vtx.use_const_fields = 1;
+ vtx.buffer_index_mode = sampler_index_mode;
if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
return r;
static int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, int offset)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bytecode_alu alu;
int r;
int id = tgsi_tex_get_src_gpr(ctx, reg_idx) + offset;
+ int sampler_index_mode = inst->Src[reg_idx].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP1_MOV;
- alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
- if (ctx->bc->chip_class >= EVERGREEN) {
- /* channel 0 or 2 of each word */
- alu.src[0].sel += (id / 2);
- alu.src[0].chan = (id % 2) * 2;
- } else {
+ if (ctx->bc->chip_class < EVERGREEN) {
+ struct r600_bytecode_alu alu;
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
/* r600 we have them at channel 2 of the second dword */
alu.src[0].sel += (id * 2) + 1;
alu.src[0].chan = 1;
+ alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
+ tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ return 0;
+ } else {
+ struct r600_bytecode_vtx vtx;
+ memset(&vtx, 0, sizeof(vtx));
+ vtx.op = FETCH_OP_GDS_MIN_UINT; /* aka GET_BUFFER_RESINFO */
+ vtx.buffer_id = id + R600_MAX_CONST_BUFFERS;
+ vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
+ vtx.src_gpr = 0;
+ vtx.mega_fetch_count = 16; /* no idea here really... */
+ vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
+ vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; /* SEL_X */
+ vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 4 : 7; /* SEL_Y */
+ vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 4 : 7; /* SEL_Z */
+ vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 4 : 7; /* SEL_W */
+ vtx.data_format = FMT_32_32_32_32;
+ vtx.buffer_index_mode = sampler_index_mode;
+
+ if ((r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx)))
+ return r;
+ return 0;
}
- alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
- tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- return 0;
}
+
static int tgsi_tex(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) {
- ctx->shader->uses_tex_buffers = true;
+ if (ctx->bc->chip_class < EVERGREEN)
+ ctx->shader->uses_tex_buffers = true;
return r600_do_buffer_txq(ctx, 1, 0);
}
else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
if (ctx->bc->chip_class >= EVERGREEN) {
- /* channel 1 or 3 of each word */
- alu.src[0].sel += (id / 2);
- alu.src[0].chan = ((id % 2) * 2) + 1;
+ /* with eg each dword is number of cubes */
+ alu.src[0].sel += id / 4;
+ alu.src[0].chan = id % 4;
} else {
/* r600 we have them at channel 2 of the second dword */
alu.src[0].sel += (id * 2) + 1;
int *uav_id_p, int *uav_index_mode_p)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- int uav_id, uav_index_mode;
+ int uav_id, uav_index_mode = 0;
int r;
bool is_cm = (ctx->bc->chip_class == CAYMAN);
if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
(inst->Src[0].Register.File == TGSI_FILE_IMAGE && inst->Memory.Texture == TGSI_TEXTURE_BUFFER)) {
- ctx->shader->uses_tex_buffers = true;
+ if (ctx->bc->chip_class < EVERGREEN)
+ ctx->shader->uses_tex_buffers = true;
return r600_do_buffer_txq(ctx, 0, ctx->shader->image_size_const_offset);
}
alu.op = ALU_OP1_MOV;
alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
- /* channel 1 or 3 of each word */
- alu.src[0].sel += (id / 2);
- alu.src[0].chan = ((id % 2) * 2) + 1;
+ /* with eg each dword is either number of cubes */
+ alu.src[0].sel += id / 4;
+ alu.src[0].chan = id % 4;
alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
alu.last = 1;
return 0;
}
+static int tgsi_clock(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ int r;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
+ alu.src[0].sel = EG_V_SQ_ALU_SRC_TIME_LO;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
+ alu.src[0].sel = EG_V_SQ_ALU_SRC_TIME_HI;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ return 0;
+}
+
static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
[TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_r600_arl},
[TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2},
[TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow},
[31] = { ALU_OP0_NOP, tgsi_unsupported},
[32] = { ALU_OP0_NOP, tgsi_unsupported},
- [33] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_CLOCK] = { ALU_OP0_NOP, tgsi_unsupported},
[34] = { ALU_OP0_NOP, tgsi_unsupported},
[35] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig},
[TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow},
[31] = { ALU_OP0_NOP, tgsi_unsupported},
[32] = { ALU_OP0_NOP, tgsi_unsupported},
- [33] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_CLOCK] = { ALU_OP0_NOP, tgsi_clock},
[34] = { ALU_OP0_NOP, tgsi_unsupported},
[35] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig},
[TGSI_OPCODE_POW] = { ALU_OP0_NOP, cayman_pow},
[31] = { ALU_OP0_NOP, tgsi_unsupported},
[32] = { ALU_OP0_NOP, tgsi_unsupported},
- [33] = { ALU_OP0_NOP, tgsi_unsupported},
+ [TGSI_OPCODE_CLOCK] = { ALU_OP0_NOP, tgsi_clock},
[34] = { ALU_OP0_NOP, tgsi_unsupported},
[35] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_COS] = { ALU_OP1_COS, cayman_trig},