*/
#include "si_build_pm4.h"
-#include "gfx9d.h"
+#include "sid.h"
#include "util/u_index_modify.h"
#include "util/u_log.h"
#include "util/u_upload_mgr.h"
#include "util/u_prim.h"
+#include "util/u_suballoc.h"
#include "ac_debug.h"
C_VS_STATE_LS_OUT_VERTEX_SIZE;
sctx->current_vs_state |= tcs_in_layout;
+ /* We should be able to support in-shader LDS use with LLVM >= 9
+ * by just adding the lds_sizes together, but it has never
+ * been tested. */
+ assert(ls_current->config.lds_size == 0);
+
if (sctx->chip_class >= GFX9) {
unsigned hs_rsrc2 = ls_current->config.rsrc2 |
S_00B42C_LDS_SIZE(lds_size);
}
}
-static unsigned si_num_prims_for_vertices(const struct pipe_draw_info *info)
+static unsigned si_num_prims_for_vertices(const struct pipe_draw_info *info,
+ enum pipe_prim_type prim)
{
- switch (info->mode) {
+ switch (prim) {
case PIPE_PRIM_PATCHES:
return info->count / info->vertices_per_patch;
case PIPE_PRIM_POLYGON:
case SI_PRIM_RECTANGLE_LIST:
return info->count / 3;
default:
- return u_decomposed_prims_for_vertices(info->mode, info->count);
+ return u_decomposed_prims_for_vertices(prim, info->count);
}
}
static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
const struct pipe_draw_info *info,
- unsigned num_patches)
+ enum pipe_prim_type prim,
+ unsigned num_patches,
+ unsigned instance_count,
+ bool primitive_restart)
{
union si_vgt_param_key key = sctx->ia_multi_vgt_param_key;
unsigned primgroup_size;
primgroup_size = 128; /* recommended without a GS and tess */
}
- key.u.prim = info->mode;
- key.u.uses_instancing = info->indirect || info->instance_count > 1;
+ key.u.prim = prim;
+ key.u.uses_instancing = info->indirect || instance_count > 1;
key.u.multi_instances_smaller_than_primgroup =
info->indirect ||
- (info->instance_count > 1 &&
+ (instance_count > 1 &&
(info->count_from_stream_output ||
- si_num_prims_for_vertices(info) < primgroup_size));
- key.u.primitive_restart = info->primitive_restart;
+ si_num_prims_for_vertices(info, prim) < primgroup_size));
+ key.u.primitive_restart = primitive_restart;
key.u.count_from_stream_output = info->count_from_stream_output != NULL;
ia_multi_vgt_param = sctx->ia_multi_vgt_param[key.index] |
if (sctx->family == CHIP_HAWAII &&
G_028AA8_SWITCH_ON_EOI(ia_multi_vgt_param) &&
(info->indirect ||
- (info->instance_count > 1 &&
+ (instance_count > 1 &&
(info->count_from_stream_output ||
- si_num_prims_for_vertices(info) <= 1))))
+ si_num_prims_for_vertices(info, prim) <= 1))))
sctx->flags |= SI_CONTEXT_VGT_FLUSH;
}
}
static inline bool si_prim_restart_index_changed(struct si_context *sctx,
- const struct pipe_draw_info *info)
+ bool primitive_restart,
+ unsigned restart_index)
{
- return info->primitive_restart &&
- (info->restart_index != sctx->last_restart_index ||
+ return primitive_restart &&
+ (restart_index != sctx->last_restart_index ||
sctx->last_restart_index == SI_RESTART_INDEX_UNKNOWN);
}
static void si_emit_draw_registers(struct si_context *sctx,
const struct pipe_draw_info *info,
- unsigned num_patches)
+ enum pipe_prim_type prim,
+ unsigned num_patches,
+ unsigned instance_count,
+ bool primitive_restart)
{
struct radeon_cmdbuf *cs = sctx->gfx_cs;
- unsigned prim = si_conv_pipe_prim(info->mode);
+ unsigned vgt_prim = si_conv_pipe_prim(prim);
unsigned ia_multi_vgt_param;
- ia_multi_vgt_param = si_get_ia_multi_vgt_param(sctx, info, num_patches);
+ ia_multi_vgt_param = si_get_ia_multi_vgt_param(sctx, info, prim, num_patches,
+ instance_count, primitive_restart);
/* Draw state. */
if (ia_multi_vgt_param != sctx->last_multi_vgt_param) {
sctx->last_multi_vgt_param = ia_multi_vgt_param;
}
- if (prim != sctx->last_prim) {
+ if (vgt_prim != sctx->last_prim) {
if (sctx->chip_class >= GFX7)
radeon_set_uconfig_reg_idx(cs, sctx->screen,
- R_030908_VGT_PRIMITIVE_TYPE, 1, prim);
+ R_030908_VGT_PRIMITIVE_TYPE, 1, vgt_prim);
else
- radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, prim);
+ radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, vgt_prim);
- sctx->last_prim = prim;
+ sctx->last_prim = vgt_prim;
}
/* Primitive restart. */
- if (info->primitive_restart != sctx->last_primitive_restart_en) {
+ if (primitive_restart != sctx->last_primitive_restart_en) {
if (sctx->chip_class >= GFX9)
radeon_set_uconfig_reg(cs, R_03092C_VGT_MULTI_PRIM_IB_RESET_EN,
- info->primitive_restart);
+ primitive_restart);
else
radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN,
- info->primitive_restart);
+ primitive_restart);
- sctx->last_primitive_restart_en = info->primitive_restart;
+ sctx->last_primitive_restart_en = primitive_restart;
}
- if (si_prim_restart_index_changed(sctx, info)) {
+ if (si_prim_restart_index_changed(sctx, primitive_restart, info->restart_index)) {
radeon_set_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
info->restart_index);
sctx->last_restart_index = info->restart_index;
const struct pipe_draw_info *info,
struct pipe_resource *indexbuf,
unsigned index_size,
- unsigned index_offset)
+ unsigned index_offset,
+ unsigned instance_count,
+ bool dispatch_prim_discard_cs,
+ unsigned original_index_size)
{
struct pipe_draw_indirect_info *indirect = info->indirect;
struct radeon_cmdbuf *cs = sctx->gfx_cs;
sctx->last_index_size = index_size;
}
- index_max_size = (indexbuf->width0 - index_offset) /
- index_size;
- index_va = si_resource(indexbuf)->gpu_address + index_offset;
+ if (original_index_size) {
+ index_max_size = (indexbuf->width0 - index_offset) /
+ original_index_size;
+ index_va = si_resource(indexbuf)->gpu_address + index_offset;
- radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
- si_resource(indexbuf),
- RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER);
+ radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
+ si_resource(indexbuf),
+ RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER);
+ }
} else {
/* On GFX7 and later, non-indexed draws overwrite VGT_INDEX_TYPE,
* so the state must be re-emitted before the next indexed draw.
radeon_emit(cs, di_src_sel);
}
} else {
- unsigned instance_count = info->instance_count;
int base_vertex;
if (sctx->last_instance_count == SI_INSTANCE_COUNT_UNKNOWN ||
}
/* Base vertex and start instance. */
- base_vertex = index_size ? info->index_bias : info->start;
+ base_vertex = original_index_size ? info->index_bias : info->start;
if (sctx->num_vs_blit_sgprs) {
/* Re-emit draw constants after we leave u_blitter. */
}
if (index_size) {
+ if (dispatch_prim_discard_cs) {
+ index_va += info->start * original_index_size;
+ index_max_size = MIN2(index_max_size, info->count);
+
+ si_dispatch_prim_discard_cs_and_draw(sctx, info,
+ original_index_size,
+ base_vertex,
+ index_va, index_max_size);
+ return;
+ }
+
index_va += info->start * index_size;
radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_2, 4, render_cond_bit));
}
}
-static void si_emit_surface_sync(struct si_context *sctx,
- unsigned cp_coher_cntl)
+void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs,
+ unsigned cp_coher_cntl)
{
- struct radeon_cmdbuf *cs = sctx->gfx_cs;
+ bool compute_ib = !sctx->has_graphics ||
+ cs == sctx->prim_discard_compute_cs;
- if (sctx->chip_class >= GFX9 || !sctx->has_graphics) {
+ if (sctx->chip_class >= GFX9 || compute_ib) {
/* Flush caches and wait for the caches to assert idle. */
radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
/* ACQUIRE_MEM has an implicit context roll if the current context
* is busy. */
- if (sctx->has_graphics)
+ if (!compute_ib)
sctx->context_roll = true;
}
+void si_prim_discard_signal_next_compute_ib_start(struct si_context *sctx)
+{
+ if (!si_compute_prim_discard_enabled(sctx))
+ return;
+
+ if (!sctx->barrier_buf) {
+ u_suballocator_alloc(sctx->allocator_zeroed_memory, 4, 4,
+ &sctx->barrier_buf_offset,
+ (struct pipe_resource**)&sctx->barrier_buf);
+ }
+
+ /* Emit a placeholder to signal the next compute IB to start.
+ * See si_compute_prim_discard.c for explanation.
+ */
+ uint32_t signal = 1;
+ si_cp_write_data(sctx, sctx->barrier_buf, sctx->barrier_buf_offset,
+ 4, V_370_MEM, V_370_ME, &signal);
+
+ sctx->last_pkt3_write_data =
+ &sctx->gfx_cs->current.buf[sctx->gfx_cs->current.cdw - 5];
+
+ /* Only the last occurence of WRITE_DATA will be executed.
+ * The packet will be enabled in si_flush_gfx_cs.
+ */
+ *sctx->last_pkt3_write_data = PKT3(PKT3_NOP, 3, 0);
+}
+
void si_emit_cache_flush(struct si_context *sctx)
{
struct radeon_cmdbuf *cs = sctx->gfx_cs;
}
uint32_t cp_coher_cntl = 0;
- uint32_t flush_cb_db = flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
- SI_CONTEXT_FLUSH_AND_INV_DB);
+ const uint32_t flush_cb_db = flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
+ SI_CONTEXT_FLUSH_AND_INV_DB);
+ const bool is_barrier = flush_cb_db ||
+ /* INV_ICACHE == beginning of gfx IB. Checking
+ * INV_ICACHE fixes corruption for DeusExMD with
+ * compute-based culling, but I don't know why.
+ */
+ flags & (SI_CONTEXT_INV_ICACHE |
+ SI_CONTEXT_PS_PARTIAL_FLUSH |
+ SI_CONTEXT_VS_PARTIAL_FLUSH) ||
+ (flags & SI_CONTEXT_CS_PARTIAL_FLUSH &&
+ sctx->compute_is_busy);
if (flags & SI_CONTEXT_FLUSH_AND_INV_CB)
sctx->num_cb_cache_flushes++;
/* Invalidate L1 & L2. (L1 is always invalidated on GFX6)
* WB must be set on GFX8+ when TC_ACTION is set.
*/
- si_emit_surface_sync(sctx, cp_coher_cntl |
+ si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl |
S_0085F0_TC_ACTION_ENA(1) |
S_0085F0_TCL1_ACTION_ENA(1) |
S_0301F0_TC_WB_ACTION_ENA(sctx->chip_class >= GFX8));
*
* WB doesn't work without NC.
*/
- si_emit_surface_sync(sctx, cp_coher_cntl |
+ si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl |
S_0301F0_TC_WB_ACTION_ENA(1) |
S_0301F0_TC_NC_ACTION_ENA(1));
cp_coher_cntl = 0;
}
if (flags & SI_CONTEXT_INV_VMEM_L1) {
/* Invalidate per-CU VMEM L1. */
- si_emit_surface_sync(sctx, cp_coher_cntl |
+ si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl |
S_0085F0_TCL1_ACTION_ENA(1));
cp_coher_cntl = 0;
}
/* If TC flushes haven't cleared this... */
if (cp_coher_cntl)
- si_emit_surface_sync(sctx, cp_coher_cntl);
+ si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl);
+
+ if (is_barrier)
+ si_prim_discard_signal_next_compute_ib_start(sctx);
if (flags & SI_CONTEXT_START_PIPELINE_STATS) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
}
static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_info *info,
- unsigned skip_atom_mask)
+ enum pipe_prim_type prim, unsigned instance_count,
+ bool primitive_restart, unsigned skip_atom_mask)
{
unsigned num_patches = 0;
/* Emit draw states. */
si_emit_vs_state(sctx, info);
- si_emit_draw_registers(sctx, info, num_patches);
+ si_emit_draw_registers(sctx, info, prim, num_patches, instance_count,
+ primitive_restart);
+}
+
+static bool
+si_all_vs_resources_read_only(struct si_context *sctx,
+ struct pipe_resource *indexbuf)
+{
+ struct radeon_winsys *ws = sctx->ws;
+ struct radeon_cmdbuf *cs = sctx->gfx_cs;
+
+ /* Index buffer. */
+ if (indexbuf &&
+ ws->cs_is_buffer_referenced(cs, si_resource(indexbuf)->buf,
+ RADEON_USAGE_WRITE))
+ goto has_write_reference;
+
+ /* Vertex buffers. */
+ struct si_vertex_elements *velems = sctx->vertex_elements;
+ unsigned num_velems = velems->count;
+
+ for (unsigned i = 0; i < num_velems; i++) {
+ if (!((1 << i) & velems->first_vb_use_mask))
+ continue;
+
+ unsigned vb_index = velems->vertex_buffer_index[i];
+ struct pipe_resource *res = sctx->vertex_buffer[vb_index].buffer.resource;
+ if (!res)
+ continue;
+
+ if (ws->cs_is_buffer_referenced(cs, si_resource(res)->buf,
+ RADEON_USAGE_WRITE))
+ goto has_write_reference;
+ }
+
+ /* Constant and shader buffers. */
+ struct si_descriptors *buffers =
+ &sctx->descriptors[si_const_and_shader_buffer_descriptors_idx(PIPE_SHADER_VERTEX)];
+ for (unsigned i = 0; i < buffers->num_active_slots; i++) {
+ unsigned index = buffers->first_active_slot + i;
+ struct pipe_resource *res =
+ sctx->const_and_shader_buffers[PIPE_SHADER_VERTEX].buffers[index];
+ if (!res)
+ continue;
+
+ if (ws->cs_is_buffer_referenced(cs, si_resource(res)->buf,
+ RADEON_USAGE_WRITE))
+ goto has_write_reference;
+ }
+
+ /* Samplers. */
+ struct si_shader_selector *vs = sctx->vs_shader.cso;
+ if (vs->info.samplers_declared) {
+ unsigned num_samplers = util_last_bit(vs->info.samplers_declared);
+
+ for (unsigned i = 0; i < num_samplers; i++) {
+ struct pipe_sampler_view *view = sctx->samplers[PIPE_SHADER_VERTEX].views[i];
+ if (!view)
+ continue;
+
+ if (ws->cs_is_buffer_referenced(cs,
+ si_resource(view->texture)->buf,
+ RADEON_USAGE_WRITE))
+ goto has_write_reference;
+ }
+ }
+
+ /* Images. */
+ if (vs->info.images_declared) {
+ unsigned num_images = util_last_bit(vs->info.images_declared);
+
+ for (unsigned i = 0; i < num_images; i++) {
+ struct pipe_resource *res = sctx->images[PIPE_SHADER_VERTEX].views[i].resource;
+ if (!res)
+ continue;
+
+ if (ws->cs_is_buffer_referenced(cs, si_resource(res)->buf,
+ RADEON_USAGE_WRITE))
+ goto has_write_reference;
+ }
+ }
+
+ return true;
+
+has_write_reference:
+ /* If the current gfx IB has enough packets, flush it to remove write
+ * references to buffers.
+ */
+ if (cs->prev_dw + cs->current.cdw > 2048) {
+ si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
+ assert(si_all_vs_resources_read_only(sctx, indexbuf));
+ return true;
+ }
+ return false;
+}
+
+static ALWAYS_INLINE bool pd_msg(const char *s)
+{
+ if (SI_PRIM_DISCARD_DEBUG)
+ printf("PD failed: %s\n", s);
+ return false;
}
static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
struct si_context *sctx = (struct si_context *)ctx;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
struct pipe_resource *indexbuf = info->index.resource;
- unsigned dirty_tex_counter;
- enum pipe_prim_type rast_prim;
+ unsigned dirty_tex_counter, dirty_buf_counter;
+ enum pipe_prim_type rast_prim, prim = info->mode;
unsigned index_size = info->index_size;
unsigned index_offset = info->indirect ? info->start * index_size : 0;
+ unsigned instance_count = info->instance_count;
+ bool primitive_restart = info->primitive_restart &&
+ (!sctx->screen->options.prim_restart_tri_strips_only ||
+ (prim != PIPE_PRIM_TRIANGLE_STRIP &&
+ prim != PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY));
if (likely(!info->indirect)) {
/* GFX6-GFX7 treat instance_count==0 as instance_count==1. There is
* no workaround for indirect draws, but we can at least skip
* direct draws.
*/
- if (unlikely(!info->instance_count))
+ if (unlikely(!instance_count))
return;
/* Handle count == 0. */
if (unlikely(!sctx->vs_shader.cso ||
!rs ||
(!sctx->ps_shader.cso && !rs->rasterizer_discard) ||
- (!!sctx->tes_shader.cso != (info->mode == PIPE_PRIM_PATCHES)))) {
+ (!!sctx->tes_shader.cso != (prim == PIPE_PRIM_PATCHES)))) {
assert(0);
return;
}
si_update_all_texture_descriptors(sctx);
}
+ dirty_buf_counter = p_atomic_read(&sctx->screen->dirty_buf_counter);
+ if (unlikely(dirty_buf_counter != sctx->last_dirty_buf_counter)) {
+ sctx->last_dirty_buf_counter = dirty_buf_counter;
+ /* Rebind all buffers unconditionally. */
+ si_rebind_buffer(sctx, NULL);
+ }
+
si_decompress_textures(sctx, u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS));
/* Set the rasterization primitive type.
else
rast_prim = sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
} else
- rast_prim = info->mode;
+ rast_prim = prim;
if (rast_prim != sctx->current_rast_prim) {
if (util_prim_is_points_or_lines(sctx->current_rast_prim) !=
*/
bool gs_tri_strip_adj_fix =
!sctx->tes_shader.cso &&
- info->mode == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY &&
- !info->primitive_restart;
+ prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY &&
+ !primitive_restart;
if (gs_tri_strip_adj_fix != sctx->gs_tri_strip_adj_fix) {
sctx->gs_tri_strip_adj_fix = gs_tri_strip_adj_fix;
}
}
- if (sctx->do_update_shaders && !si_update_shaders(sctx))
- goto return_cleanup;
-
if (index_size) {
/* Translate or upload, if needed. */
/* 8-bit indices are supported on GFX8. */
}
}
+ bool dispatch_prim_discard_cs = false;
+ bool prim_discard_cs_instancing = false;
+ unsigned original_index_size = index_size;
+ unsigned direct_count = 0;
+
if (info->indirect) {
struct pipe_draw_indirect_info *indirect = info->indirect;
si_resource(indirect->indirect_draw_count)->TC_L2_dirty = false;
}
}
+ } else {
+ /* Multiply by 3 for strips and fans to get an approximate vertex
+ * count as triangles. */
+ direct_count = info->count * instance_count *
+ (prim == PIPE_PRIM_TRIANGLES ? 1 : 3);
+ }
+
+ /* Determine if we can use the primitive discard compute shader. */
+ if (si_compute_prim_discard_enabled(sctx) &&
+ (direct_count > sctx->prim_discard_vertex_count_threshold ?
+ (sctx->compute_num_verts_rejected += direct_count, true) : /* Add, then return true. */
+ (sctx->compute_num_verts_ineligible += direct_count, false)) && /* Add, then return false. */
+ (!info->count_from_stream_output || pd_msg("draw_opaque")) &&
+ (primitive_restart ?
+ /* Supported prim types with primitive restart: */
+ (prim == PIPE_PRIM_TRIANGLE_STRIP || pd_msg("bad prim type with primitive restart")) &&
+ /* Disallow instancing with primitive restart: */
+ (instance_count == 1 || pd_msg("instance_count > 1 with primitive restart")) :
+ /* Supported prim types without primitive restart + allow instancing: */
+ (1 << prim) & ((1 << PIPE_PRIM_TRIANGLES) |
+ (1 << PIPE_PRIM_TRIANGLE_STRIP) |
+ (1 << PIPE_PRIM_TRIANGLE_FAN)) &&
+ /* Instancing is limited to 16-bit indices, because InstanceID is packed into VertexID. */
+ /* TODO: DrawArraysInstanced doesn't sometimes work, so it's disabled. */
+ (instance_count == 1 ||
+ (instance_count <= USHRT_MAX && index_size && index_size <= 2) ||
+ pd_msg("instance_count too large or index_size == 4 or DrawArraysInstanced"))) &&
+ (info->drawid == 0 || !sctx->vs_shader.cso->info.uses_drawid || pd_msg("draw_id > 0")) &&
+ (!sctx->render_cond || pd_msg("render condition")) &&
+ /* Forced enablement ignores pipeline statistics queries. */
+ (sctx->screen->debug_flags & (DBG(PD) | DBG(ALWAYS_PD)) ||
+ (!sctx->num_pipeline_stat_queries && !sctx->streamout.prims_gen_query_enabled) ||
+ pd_msg("pipestat or primgen query")) &&
+ (!sctx->vertex_elements->instance_divisor_is_fetched || pd_msg("loads instance divisors")) &&
+ (!sctx->tes_shader.cso || pd_msg("uses tess")) &&
+ (!sctx->gs_shader.cso || pd_msg("uses GS")) &&
+ (!sctx->ps_shader.cso->info.uses_primid || pd_msg("PS uses PrimID")) &&
+#if SI_PRIM_DISCARD_DEBUG /* same as cso->prim_discard_cs_allowed */
+ (!sctx->vs_shader.cso->info.uses_bindless_images || pd_msg("uses bindless images")) &&
+ (!sctx->vs_shader.cso->info.uses_bindless_samplers || pd_msg("uses bindless samplers")) &&
+ (!sctx->vs_shader.cso->info.writes_memory || pd_msg("writes memory")) &&
+ (!sctx->vs_shader.cso->info.writes_viewport_index || pd_msg("writes viewport index")) &&
+ !sctx->vs_shader.cso->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION] &&
+ !sctx->vs_shader.cso->so.num_outputs &&
+#else
+ (sctx->vs_shader.cso->prim_discard_cs_allowed || pd_msg("VS shader uses unsupported features")) &&
+#endif
+ /* Check that all buffers are used for read only, because compute
+ * dispatches can run ahead. */
+ (si_all_vs_resources_read_only(sctx, index_size ? indexbuf : NULL) || pd_msg("write reference"))) {
+ switch (si_prepare_prim_discard_or_split_draw(sctx, info, primitive_restart)) {
+ case SI_PRIM_DISCARD_ENABLED:
+ original_index_size = index_size;
+ prim_discard_cs_instancing = instance_count > 1;
+ dispatch_prim_discard_cs = true;
+
+ /* The compute shader changes/lowers the following: */
+ prim = PIPE_PRIM_TRIANGLES;
+ index_size = 4;
+ instance_count = 1;
+ primitive_restart = false;
+ sctx->compute_num_verts_rejected -= direct_count;
+ sctx->compute_num_verts_accepted += direct_count;
+ break;
+ case SI_PRIM_DISCARD_DISABLED:
+ break;
+ case SI_PRIM_DISCARD_DRAW_SPLIT:
+ sctx->compute_num_verts_rejected -= direct_count;
+ goto return_cleanup;
+ }
}
+ if (prim_discard_cs_instancing != sctx->prim_discard_cs_instancing) {
+ sctx->prim_discard_cs_instancing = prim_discard_cs_instancing;
+ sctx->do_update_shaders = true;
+ }
+
+ if (sctx->do_update_shaders && !si_update_shaders(sctx))
+ goto return_cleanup;
+
si_need_gfx_cs_space(sctx);
if (sctx->bo_list_add_all_gfx_resources)
goto return_cleanup;
/* Emit all states except possibly render condition. */
- si_emit_all_states(sctx, info, masked_atoms);
+ si_emit_all_states(sctx, info, prim, instance_count,
+ primitive_restart, masked_atoms);
si_emit_cache_flush(sctx);
/* <-- CUs are idle here. */
sctx->dirty_atoms = 0;
- si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset);
+ si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset,
+ instance_count, dispatch_prim_discard_cs,
+ original_index_size);
/* <-- CUs are busy here. */
/* Start prefetches after the draw has been started. Both will run
cik_emit_prefetch_L2(sctx, true);
if (!si_upload_graphics_shader_descriptors(sctx))
- return;
+ goto return_cleanup;
- si_emit_all_states(sctx, info, masked_atoms);
+ si_emit_all_states(sctx, info, prim, instance_count,
+ primitive_restart, masked_atoms);
if (has_gfx9_scissor_bug &&
(sctx->context_roll ||
sctx->dirty_atoms = 0;
- si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset);
+ si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset,
+ instance_count, dispatch_prim_discard_cs,
+ original_index_size);
/* Prefetch the remaining shaders after the draw has been
* started. */
sctx->num_draw_calls++;
if (sctx->framebuffer.state.nr_cbufs > 1)
sctx->num_mrt_draw_calls++;
- if (info->primitive_restart)
+ if (primitive_restart)
sctx->num_prim_restart_calls++;
if (G_0286E8_WAVESIZE(sctx->spi_tmpring_size))
sctx->num_spill_draw_calls++;