output_patch_size));
/* Make sure the output data fits in the offchip buffer */
- *num_patches = MIN2(*num_patches, SI_TESS_OFFCHIP_BLOCK_SIZE /
- output_patch_size);
+ *num_patches = MIN2(*num_patches,
+ (sctx->screen->tess_offchip_block_dw_size * 4) /
+ output_patch_size);
/* Not necessary for correctness, but improves performance. The
* specific value is taken from the proprietary driver.
partial_vs_wave = true;
/* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
- if (sctx->b.chip_class >= VI) {
+ if (sctx->screen->has_distributed_tess) {
if (sctx->gs_shader.cso)
partial_es_wave = true;
else
if (sctx->b.chip_class >= CIK) {
/* WD_SWITCH_ON_EOP has no effect on GPUs with less than
* 4 shader engines. Set 1 to pass the assertion below.
- * The other cases are hardware requirements. */
+ * The other cases are hardware requirements.
+ *
+ * Polaris supports primitive restart with WD_SWITCH_ON_EOP=0
+ * for points, line strips, and tri strips.
+ */
if (sctx->b.screen->info.max_se < 4 ||
prim == PIPE_PRIM_POLYGON ||
prim == PIPE_PRIM_LINE_LOOP ||
prim == PIPE_PRIM_TRIANGLE_FAN ||
prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY ||
- info->primitive_restart ||
+ (info->primitive_restart &&
+ (sctx->b.family < CHIP_POLARIS10 ||
+ (prim != PIPE_PRIM_POINTS &&
+ prim != PIPE_PRIM_LINE_STRIP &&
+ prim != PIPE_PRIM_TRIANGLE_STRIP))) ||
info->count_from_stream_output)
wd_switch_on_eop = true;
rs->pa_sc_line_stipple == sctx->last_sc_line_stipple)
return;
+ /* For lines, reset the stipple pattern at each primitive. Otherwise,
+ * reset the stipple pattern at each packet (line strips, line loops).
+ */
radeon_set_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE,
rs->pa_sc_line_stipple |
- S_028A0C_AUTO_RESET_CNTL(rast_prim == PIPE_PRIM_LINES ? 1 :
- rast_prim == PIPE_PRIM_LINE_STRIP ? 2 : 0));
+ S_028A0C_AUTO_RESET_CNTL(rast_prim == PIPE_PRIM_LINES ? 1 : 2));
sctx->last_rast_prim = rast_prim;
sctx->last_sc_line_stipple = rs->pa_sc_line_stipple;
unsigned gs_out_prim = si_conv_prim_to_gs_out(sctx->current_rast_prim);
unsigned ia_multi_vgt_param, ls_hs_config, num_patches = 0;
+ /* Polaris needs different VTX_REUSE_DEPTH settings depending on
+ * whether the "fractional odd" tessellation spacing is used.
+ */
+ if (sctx->b.family >= CHIP_POLARIS10) {
+ struct si_shader_selector *tes = sctx->tes_shader.cso;
+ unsigned vtx_reuse_depth = 30;
+
+ if (tes &&
+ tes->info.properties[TGSI_PROPERTY_TES_SPACING] ==
+ PIPE_TESS_SPACING_FRACTIONAL_ODD)
+ vtx_reuse_depth = 14;
+
+ if (vtx_reuse_depth != sctx->last_vtx_reuse_depth) {
+ radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
+ vtx_reuse_depth);
+ sctx->last_vtx_reuse_depth = vtx_reuse_depth;
+ }
+ }
+
if (sctx->tes_shader.cso)
si_emit_derived_tess_state(sctx, info, &num_patches);
if (prim != sctx->last_prim ||
ia_multi_vgt_param != sctx->last_multi_vgt_param ||
ls_hs_config != sctx->last_ls_hs_config) {
- if (sctx->b.family >= CHIP_POLARIS10) {
+ if (sctx->b.chip_class >= CIK) {
radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
radeon_set_context_reg_idx(cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config);
radeon_set_uconfig_reg_idx(cs, R_030908_VGT_PRIMITIVE_TYPE, 1, prim);
- } else if (sctx->b.chip_class >= CIK) {
- radeon_emit(cs, PKT3(PKT3_DRAW_PREAMBLE, 2, 0));
- radeon_emit(cs, prim); /* VGT_PRIMITIVE_TYPE */
- radeon_emit(cs, ia_multi_vgt_param); /* IA_MULTI_VGT_PARAM */
- radeon_emit(cs, ls_hs_config); /* VGT_LS_HS_CONFIG */
} else {
radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, prim);
radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
unsigned sh_base_reg = sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX];
bool render_cond_bit = sctx->b.render_cond && !sctx->b.render_cond_force_off;
+ uint32_t index_max_size = 0;
+ uint64_t index_va = 0;
if (info->count_from_stream_output) {
struct r600_so_target *t =
assert(!"unreachable");
return;
}
+
+ index_max_size = (ib->buffer->width0 - ib->offset) /
+ ib->index_size;
+ index_va = r600_resource(ib->buffer)->gpu_address + ib->offset;
+
+ assert(index_va % 2 == 0);
+
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
+ (struct r600_resource *)ib->buffer,
+ RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER);
}
if (!info->indirect) {
sctx->last_sh_base_reg = sh_base_reg;
}
} else {
+ uint64_t indirect_va = r600_resource(info->indirect)->gpu_address;
+
+ assert(indirect_va % 8 == 0);
+
si_invalidate_draw_sh_constants(sctx);
+ radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0));
+ radeon_emit(cs, 1);
+ radeon_emit(cs, indirect_va);
+ radeon_emit(cs, indirect_va >> 32);
+
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
(struct r600_resource *)info->indirect,
RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
}
- if (info->indexed) {
- uint32_t index_max_size = (ib->buffer->width0 - ib->offset) /
- ib->index_size;
- uint64_t index_va = r600_resource(ib->buffer)->gpu_address + ib->offset;
-
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
- (struct r600_resource *)ib->buffer,
- RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER);
-
- if (info->indirect) {
- uint64_t indirect_va = r600_resource(info->indirect)->gpu_address;
-
- assert(indirect_va % 8 == 0);
- assert(index_va % 2 == 0);
- assert(info->indirect_offset % 4 == 0);
+ if (info->indirect) {
+ unsigned di_src_sel = info->indexed ? V_0287F0_DI_SRC_SEL_DMA
+ : V_0287F0_DI_SRC_SEL_AUTO_INDEX;
- radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0));
- radeon_emit(cs, 1);
- radeon_emit(cs, indirect_va);
- radeon_emit(cs, indirect_va >> 32);
+ assert(info->indirect_offset % 4 == 0);
+ if (info->indexed) {
radeon_emit(cs, PKT3(PKT3_INDEX_BASE, 1, 0));
radeon_emit(cs, index_va);
radeon_emit(cs, index_va >> 32);
radeon_emit(cs, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0));
radeon_emit(cs, index_max_size);
+ }
- if (sctx->b.family < CHIP_POLARIS10) {
- radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_INDIRECT, 3, render_cond_bit));
- radeon_emit(cs, info->indirect_offset);
- radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
- radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
- radeon_emit(cs, V_0287F0_DI_SRC_SEL_DMA);
- } else {
- radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_INDIRECT_MULTI, 8, render_cond_bit));
- radeon_emit(cs, info->indirect_offset);
- radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
- radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
- radeon_emit(cs, 0); /* draw_index */
- radeon_emit(cs, 1); /* count */
- radeon_emit(cs, 0); /* count_addr -- disabled */
- radeon_emit(cs, 0);
- radeon_emit(cs, 16); /* stride */
- radeon_emit(cs, V_0287F0_DI_SRC_SEL_DMA);
- }
+ if (!sctx->screen->has_draw_indirect_multi) {
+ radeon_emit(cs, PKT3(info->indexed ? PKT3_DRAW_INDEX_INDIRECT
+ : PKT3_DRAW_INDIRECT,
+ 3, render_cond_bit));
+ radeon_emit(cs, info->indirect_offset);
+ radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
+ radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
+ radeon_emit(cs, di_src_sel);
} else {
+ radeon_emit(cs, PKT3(info->indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI :
+ PKT3_DRAW_INDIRECT_MULTI,
+ 8, render_cond_bit));
+ radeon_emit(cs, info->indirect_offset);
+ radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
+ radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
+ radeon_emit(cs, 0); /* draw_index */
+ radeon_emit(cs, 1); /* count */
+ radeon_emit(cs, 0); /* count_addr -- disabled */
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 16); /* stride */
+ radeon_emit(cs, di_src_sel);
+ }
+ } else {
+ if (info->indexed) {
index_va += info->start * ib->index_size;
radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_2, 4, render_cond_bit));
radeon_emit(cs, (index_va >> 32UL) & 0xFF);
radeon_emit(cs, info->count);
radeon_emit(cs, V_0287F0_DI_SRC_SEL_DMA);
- }
- } else {
- if (info->indirect) {
- uint64_t indirect_va = r600_resource(info->indirect)->gpu_address;
-
- assert(indirect_va % 8 == 0);
- assert(info->indirect_offset % 4 == 0);
-
- radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0));
- radeon_emit(cs, 1);
- radeon_emit(cs, indirect_va);
- radeon_emit(cs, indirect_va >> 32);
-
- if (sctx->b.family < CHIP_POLARIS10) {
- radeon_emit(cs, PKT3(PKT3_DRAW_INDIRECT, 3, render_cond_bit));
- radeon_emit(cs, info->indirect_offset);
- radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
- radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
- radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX);
- } else {
- radeon_emit(cs, PKT3(PKT3_DRAW_INDIRECT_MULTI, 8, render_cond_bit));
- radeon_emit(cs, info->indirect_offset);
- radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
- radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
- radeon_emit(cs, 0); /* draw_index */
- radeon_emit(cs, 1); /* count */
- radeon_emit(cs, 0); /* count_addr -- disabled */
- radeon_emit(cs, 0);
- radeon_emit(cs, 16); /* stride */
- radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX);
- }
} else {
radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit));
radeon_emit(cs, info->count);
radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX |
- S_0287F0_USE_OPAQUE(!!info->count_from_stream_output));
+ S_0287F0_USE_OPAQUE(!!info->count_from_stream_output));
}
}
}
struct si_context *sctx = (struct si_context *)ctx;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
struct pipe_index_buffer ib = {};
- unsigned mask, dirty_fb_counter, dirty_tex_counter;
+ unsigned mask, dirty_fb_counter, dirty_tex_counter, rast_prim;
if (!info->count && !info->indirect &&
(info->indexed || !info->count_from_stream_output))
* draw_vbo recursively, and before si_update_shaders, which uses
* current_rast_prim for this draw_vbo call. */
if (sctx->gs_shader.cso)
- sctx->current_rast_prim = sctx->gs_shader.cso->gs_output_prim;
+ rast_prim = sctx->gs_shader.cso->gs_output_prim;
else if (sctx->tes_shader.cso)
- sctx->current_rast_prim =
- sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
+ rast_prim = sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
else
- sctx->current_rast_prim = info->mode;
+ rast_prim = info->mode;
+
+ if (rast_prim != sctx->current_rast_prim) {
+ sctx->current_rast_prim = rast_prim;
+ sctx->do_update_shaders = true;
+ }
+
+ if (sctx->do_update_shaders && !si_update_shaders(sctx))
+ return;
- if (!si_update_shaders(sctx) ||
- !si_upload_graphics_shader_descriptors(sctx))
+ if (!si_upload_graphics_shader_descriptors(sctx))
return;
if (info->indexed) {
if (sctx->b.flags)
si_mark_atom_dirty(sctx, sctx->atoms.s.cache_flush);
+ /* Add buffer sizes for memory checking in need_cs_space. */
+ if (sctx->emit_scratch_reloc && sctx->scratch_buffer)
+ r600_context_add_resource_size(ctx, &sctx->scratch_buffer->b.b);
+ if (info->indirect)
+ r600_context_add_resource_size(ctx, info->indirect);
+
si_need_cs_space(sctx);
+ /* Since we've called r600_context_add_resource_size for vertex buffers,
+ * this must be called after si_need_cs_space, because we must let
+ * need_cs_space flush before we add buffers to the buffer list.
+ */
+ if (!si_upload_vertex_buffer_descriptors(sctx))
+ return;
+
/* Emit states. */
mask = sctx->dirty_atoms;
while (mask) {
surf = sctx->framebuffer.state.cbufs[i];
rtex = (struct r600_texture*)surf->texture;
- rtex->dirty_level_mask |= 1 << surf->u.tex.level;
+ if (rtex->fmask.size)
+ rtex->dirty_level_mask |= 1 << surf->u.tex.level;
+ if (rtex->dcc_gather_statistics)
+ rtex->separate_dcc_dirty = true;
} while (mask);
}