const struct pipe_draw_info *info,
unsigned *num_patches)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
struct si_shader_ctx_state *ls = &sctx->vs_shader;
/* The TES pointer will only be used for sctx->last_tcs.
* It would be wrong to think that TCS = TES. */
perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
lds_size = output_patch0_offset + output_patch_size * *num_patches;
- ls_rsrc2 = ls->current->ls_rsrc2;
+ ls_rsrc2 = ls->current->config.rsrc2;
if (sctx->b.chip_class >= CIK) {
assert(lds_size <= 65536);
if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII)
radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
- radeon_emit(cs, ls->current->ls_rsrc1);
+ radeon_emit(cs, ls->current->config.rsrc1);
radeon_emit(cs, ls_rsrc2);
/* Compute userdata SGPRs. */
radeon_emit(cs, tcs_out_layout | (num_tcs_output_cp << 26));
}
+static unsigned si_num_prims_for_vertices(const struct pipe_draw_info *info)
+{
+ switch (info->mode) {
+ case PIPE_PRIM_PATCHES:
+ return info->count / info->vertices_per_patch;
+ case R600_PRIM_RECTANGLE_LIST:
+ return info->count / 3;
+ default:
+ return u_prims_for_vertices(info->mode, info->count);
+ }
+}
+
static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
const struct pipe_draw_info *info,
unsigned num_patches)
if (sctx->b.screen->info.max_se >= 2 && ia_switch_on_eoi &&
(info->indirect ||
(info->instance_count > 1 &&
- u_prims_for_vertices(info->mode, info->count) <= 1)))
+ si_num_prims_for_vertices(info) <= 1)))
sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
static void si_emit_scratch_reloc(struct si_context *sctx)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
if (!sctx->emit_scratch_reloc)
return;
sctx->spi_tmpring_size);
if (sctx->scratch_buffer) {
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
sctx->scratch_buffer, RADEON_USAGE_READWRITE,
RADEON_PRIO_SCRATCH_BUFFER);
/* rast_prim is the primitive type after GS. */
static void si_emit_rasterizer_prim_state(struct si_context *sctx)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
unsigned rast_prim = sctx->current_rast_prim;
struct si_state_rasterizer *rs = sctx->emitted.named.rasterizer;
static void si_emit_draw_registers(struct si_context *sctx,
const struct pipe_draw_info *info)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
unsigned prim = si_conv_pipe_prim(info->mode);
unsigned gs_out_prim = si_conv_prim_to_gs_out(sctx->current_rast_prim);
unsigned ia_multi_vgt_param, ls_hs_config, num_patches = 0;
const struct pipe_draw_info *info,
const struct pipe_index_buffer *ib)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
unsigned sh_base_reg = sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX];
+ bool render_cond_bit = sctx->b.render_cond && !sctx->b.render_cond_force_off;
if (info->count_from_stream_output) {
struct r600_so_target *t =
radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
radeon_emit(cs, 0); /* unused */
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
t->buf_filled_size, RADEON_USAGE_READ,
RADEON_PRIO_SO_FILLED_SIZE);
}
} else {
si_invalidate_draw_sh_constants(sctx);
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
(struct r600_resource *)info->indirect,
RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
}
ib->index_size;
uint64_t index_va = r600_resource(ib->buffer)->gpu_address + ib->offset;
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
(struct r600_resource *)ib->buffer,
RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER);
radeon_emit(cs, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0));
radeon_emit(cs, index_max_size);
- radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_INDIRECT, 3, sctx->b.predicate_drawing));
+ radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_INDIRECT, 3, render_cond_bit));
radeon_emit(cs, info->indirect_offset);
radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
} else {
index_va += info->start * ib->index_size;
- radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_2, 4, sctx->b.predicate_drawing));
+ radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_2, 4, render_cond_bit));
radeon_emit(cs, index_max_size);
radeon_emit(cs, index_va);
radeon_emit(cs, (index_va >> 32UL) & 0xFF);
radeon_emit(cs, indirect_va);
radeon_emit(cs, indirect_va >> 32);
- radeon_emit(cs, PKT3(PKT3_DRAW_INDIRECT, 3, sctx->b.predicate_drawing));
+ radeon_emit(cs, PKT3(PKT3_DRAW_INDIRECT, 3, render_cond_bit));
radeon_emit(cs, info->indirect_offset);
radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX);
} else {
- radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, sctx->b.predicate_drawing));
+ radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit));
radeon_emit(cs, info->count);
radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX |
S_0287F0_USE_OPAQUE(!!info->count_from_stream_output));
}
}
-#define BOTH_ICACHE_KCACHE (SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_KCACHE)
-
void si_emit_cache_flush(struct si_context *si_ctx, struct r600_atom *atom)
{
struct r600_common_context *sctx = &si_ctx->b;
- struct radeon_winsys_cs *cs = sctx->rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->gfx.cs;
uint32_t cp_coher_cntl = 0;
uint32_t compute =
PKT3_SHADER_TYPE_S(!!(sctx->flags & SI_CONTEXT_FLAG_COMPUTE));
if (sctx->flags & SI_CONTEXT_INV_ICACHE)
cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
- if (sctx->flags & SI_CONTEXT_INV_KCACHE)
+ if (sctx->flags & SI_CONTEXT_INV_SMEM_L1)
cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
- if (sctx->flags & SI_CONTEXT_INV_TC_L1)
+ if (sctx->flags & SI_CONTEXT_INV_VMEM_L1)
cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
- if (sctx->flags & SI_CONTEXT_INV_TC_L2) {
+ if (sctx->flags & SI_CONTEXT_INV_GLOBAL_L2) {
cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
/* TODO: this might not be needed. */
S_0085F0_CB5_DEST_BASE_ENA(1) |
S_0085F0_CB6_DEST_BASE_ENA(1) |
S_0085F0_CB7_DEST_BASE_ENA(1);
+
+ /* Necessary for DCC */
+ if (sctx->chip_class >= VI) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0) | compute);
+ radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_DATA_TS) |
+ EVENT_INDEX(5));
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ }
}
if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_DB) {
cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) |
si_get_draw_start_count(sctx, info, &start, &count);
start_offset = start * ib.index_size;
- u_upload_alloc(sctx->b.uploader, start_offset, count * 2,
+ u_upload_alloc(sctx->b.uploader, start_offset, count * 2, 256,
&out_offset, &out_buffer, &ptr);
if (!out_buffer) {
pipe_resource_reference(&ib.buffer, NULL);
start_offset = start * ib.index_size;
u_upload_data(sctx->b.uploader, start_offset, count * ib.index_size,
- (char*)ib.user_buffer + start_offset,
+ 256, (char*)ib.user_buffer + start_offset,
&ib.offset, &ib.buffer);
if (!ib.buffer)
return;
/* VI reads index buffers through TC L2. */
if (info->indexed && sctx->b.chip_class <= CIK &&
r600_resource(ib.buffer)->TC_L2_dirty) {
- sctx->b.flags |= SI_CONTEXT_INV_TC_L2;
+ sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
r600_resource(ib.buffer)->TC_L2_dirty = false;
}
/* Workaround for a VGT hang when streamout is enabled.
* It must be done after drawing. */
- if ((sctx->b.family == CHIP_HAWAII || sctx->b.family == CHIP_TONGA) &&
+ if ((sctx->b.family == CHIP_HAWAII ||
+ sctx->b.family == CHIP_TONGA ||
+ sctx->b.family == CHIP_FIJI) &&
(sctx->b.streamout.streamout_enabled ||
sctx->b.streamout.prims_gen_query_enabled)) {
sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
void si_trace_emit(struct si_context *sctx)
{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
sctx->trace_id++;
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf,
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, sctx->trace_buf,
RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |