return 1;
case PIPE_CAP_QUERY_SO_OVERFLOW:
- return sscreen->info.chip_class <= GFX9;
+ return !sscreen->use_ngg_streamout;
case PIPE_CAP_POST_DEPTH_COVERAGE:
return sscreen->info.chip_class >= GFX10;
si_emit_streamout_end(ctx);
ctx->streamout.suspended = true;
- /* Since streamout uses GDS on gfx10, we need to make
- * GDS idle when we leave the IB, otherwise another
- * process might overwrite it while our shaders are busy.
+ /* Since NGG streamout uses GDS, we need to make GDS
+ * idle when we leave the IB, otherwise another process
+ * might overwrite it while our shaders are busy.
*/
- if (ctx->chip_class >= GFX10)
+ if (ctx->screen->use_ngg_streamout)
wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
}
}
if (sctx->gds)
return;
- assert(sctx->chip_class >= GFX10); /* for gfx10 streamout */
+ assert(sctx->screen->use_ngg_streamout);
/* 4 streamout GDS counters.
* We need 256B (64 dw) of GDS, otherwise streamout hangs.
si_mark_atom_dirty(ctx, &ctx->atoms.s.dpbb_state);
si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref);
si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map);
- if (ctx->chip_class < GFX10)
+ if (!ctx->screen->use_ngg_streamout)
si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);
si_mark_atom_dirty(ctx, &ctx->atoms.s.render_cond);
/* CLEAR_STATE disables all window rectangles. */
if (!sctx->border_color_map)
goto fail;
- sctx->ngg = sctx->chip_class >= GFX10;
+ sctx->ngg = sscreen->use_ngg;
/* Initialize context functions used by graphics and compute. */
if (sctx->chip_class >= GFX10)
sscreen->info.family == CHIP_RAVEN;
sscreen->has_dcc_constant_encode = sscreen->info.family == CHIP_RAVEN2 ||
sscreen->info.chip_class >= GFX10;
+ sscreen->use_ngg = sscreen->info.chip_class >= GFX10;
+ sscreen->use_ngg_streamout = sscreen->info.chip_class >= GFX10;
/* Only enable primitive binning on APUs by default. */
if (sscreen->info.chip_class >= GFX10) {
bool dpbb_allowed;
bool dfsm_allowed;
bool llvm_has_working_vgpr_indexing;
+ bool use_ngg;
+ bool use_ngg_streamout;
struct {
#define OPT_BOOL(name, dflt, description) bool name:1;
if (!query)
return;
- if (ctx->chip_class == GFX10 &&
+ if (ctx->screen->use_ngg_streamout &&
(query->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)) {
assert(!"not implemented");
query_type != SI_QUERY_TIME_ELAPSED_SDMA))
return si_query_sw_create(query_type);
- if (sscreen->info.chip_class >= GFX10 &&
+ if (sscreen->use_ngg_streamout &&
(query_type == PIPE_QUERY_PRIMITIVES_EMITTED ||
query_type == PIPE_QUERY_PRIMITIVES_GENERATED ||
query_type == PIPE_QUERY_SO_STATISTICS ||
ret = si_insert_input_ptr(ctx, ret,
ctx->param_bindless_samplers_and_images,
8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
- if (ctx->screen->info.chip_class >= GFX10) {
+ if (ctx->screen->use_ngg) {
ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits,
8 + SI_SGPR_VS_STATE_BITS);
}
}
}
- if (ctx->ac.chip_class <= GFX9 &&
+ if (!ctx->screen->use_ngg_streamout &&
ctx->shader->selector->so.num_outputs)
si_llvm_emit_streamout(ctx, outputs, i, 0);
struct pipe_stream_output_info *so,
struct si_function_info *fninfo)
{
- if (ctx->ac.chip_class >= GFX10)
+ if (ctx->screen->use_ngg_streamout)
return;
/* Streamout SGPRs. */
/* Fetch the vertex stream ID.*/
LLVMValueRef stream_id;
- if (ctx.ac.chip_class <= GFX9 && gs_selector->so.num_outputs)
+ if (!sscreen->use_ngg_streamout && gs_selector->so.num_outputs)
stream_id = si_unpack_param(&ctx, ctx.param_streamout_config, 24, 2);
else
stream_id = ctx.i32_0;
}
/* Streamout and exports. */
- if (ctx.ac.chip_class <= GFX9 && gs_selector->so.num_outputs) {
+ if (!sscreen->use_ngg_streamout && gs_selector->so.num_outputs) {
si_llvm_emit_streamout(&ctx, outputs,
gsinfo->num_outputs,
stream);
struct radeon_cmdbuf *cs = sctx->gfx_cs;
enum pipe_prim_type rast_prim = sctx->current_rast_prim;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
+ bool use_ngg = sctx->screen->use_ngg;
if (likely(rast_prim == sctx->last_rast_prim &&
rs->pa_sc_line_stipple == sctx->last_sc_line_stipple &&
- (sctx->chip_class <= GFX9 ||
+ (!use_ngg ||
rs->flatshade_first == sctx->last_flatshade_first)))
return;
radeon_set_context_reg(cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out);
sctx->context_roll = true;
- if (sctx->chip_class >= GFX10) {
+ if (use_ngg) {
sctx->current_vs_state &= C_VS_STATE_OUTPRIM;
sctx->current_vs_state |= S_VS_STATE_OUTPRIM(gs_out);
}
}
- if (sctx->chip_class >= GFX10) {
+ if (use_ngg) {
unsigned vtx_index = rs->flatshade_first ? 0 : gs_out;
sctx->current_vs_state &= C_VS_STATE_PROVOKING_VTX_INDEX;
sctx->current_vs_state |= S_VS_STATE_PROVOKING_VTX_INDEX(vtx_index);
}
/* For NGG: */
- if (sctx->chip_class >= GFX10 &&
+ if (sctx->screen->use_ngg &&
sctx->shader_pointers.sh_base[PIPE_SHADER_VERTEX] !=
R_00B230_SPI_SHADER_USER_DATA_GS_0) {
radeon_set_sh_reg(cs,
S_00B12C_OC_LDS_EN(oc_lds_en) |
S_00B12C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
- if (sscreen->info.chip_class <= GFX9) {
+ if (sscreen->info.chip_class <= GFX9)
rsrc1 |= S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8);
+
+ if (!sscreen->use_ngg_streamout) {
rsrc2 |= S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) |
S_00B12C_SO_BASE1_EN(!!shader->selector->so.stride[1]) |
S_00B12C_SO_BASE2_EN(!!shader->selector->so.stride[2]) |
si_parse_next_shader_property(&sel->info,
sel->so.num_outputs != 0,
&shader->key);
- if (sscreen->info.chip_class >= GFX10 &&
+
+ if (sscreen->use_ngg &&
+ (!sel->so.num_outputs || sscreen->use_ngg_streamout) &&
((sel->type == PIPE_SHADER_VERTEX &&
!shader->key.as_ls && !shader->key.as_es) ||
sel->type == PIPE_SHADER_TESS_EVAL ||
/* The GS copy shader is always pre-compiled. */
if (sel->type == PIPE_SHADER_GEOMETRY &&
- (sscreen->info.chip_class <= GFX9 || sel->tess_turns_off_ngg)) {
+ (!sscreen->use_ngg || sel->tess_turns_off_ngg)) {
sel->gs_copy_shader = si_generate_gs_copy_shader(sscreen, compiler, sel, debug);
if (!sel->gs_copy_shader) {
fprintf(stderr, "radeonsi: can't create GS copy shader\n");
static bool si_update_ngg(struct si_context *sctx)
{
- if (sctx->chip_class <= GFX9)
+ if (!sctx->screen->use_ngg)
return false;
bool new_ngg = true;
return NULL;
}
- unsigned buf_filled_size_size = sctx->chip_class >= GFX10 ? 8 : 4;
+ unsigned buf_filled_size_size = sctx->screen->use_ngg_streamout ? 8 : 4;
u_suballocator_alloc(sctx->allocator_zeroed_memory, buf_filled_size_size, 4,
&t->buf_filled_size_offset,
(struct pipe_resource**)&t->buf_filled_size);
SI_CONTEXT_INV_VCACHE;
/* The BUFFER_FILLED_SIZE is written using a PS_DONE event. */
- if (sctx->chip_class >= GFX10) {
+ if (sctx->screen->use_ngg_streamout) {
sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
/* Wait now. This is needed to make sure that GDS is not
* start writing to the targets.
*/
if (num_targets) {
- if (sctx->chip_class >= GFX10)
+ if (sctx->screen->use_ngg_streamout)
si_allocate_gds(sctx);
sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
struct pipe_shader_buffer sbuf;
sbuf.buffer = targets[i]->buffer;
- if (sctx->chip_class >= GFX10) {
+ if (sctx->screen->use_ngg_streamout) {
sbuf.buffer_offset = targets[i]->buffer_offset;
sbuf.buffer_size = targets[i]->buffer_size;
} else {
void si_emit_streamout_end(struct si_context *sctx)
{
- if (sctx->chip_class >= GFX10) {
+ if (sctx->screen->use_ngg_streamout) {
gfx10_emit_streamout_end(sctx);
return;
}
static void si_emit_streamout_enable(struct si_context *sctx)
{
- assert(sctx->chip_class < GFX10);
+ assert(!sctx->screen->use_ngg_streamout);
radeon_set_context_reg_seq(sctx->gfx_cs, R_028B94_VGT_STRMOUT_CONFIG, 2);
radeon_emit(sctx->gfx_cs,
(sctx->streamout.enabled_mask << 8) |
(sctx->streamout.enabled_mask << 12);
- if (sctx->chip_class < GFX10 &&
+ if (!sctx->screen->use_ngg_streamout &&
((old_strmout_en != si_get_strmout_en(sctx)) ||
(old_hw_enabled_mask != sctx->streamout.hw_enabled_mask)))
si_mark_atom_dirty(sctx, &sctx->atoms.s.streamout_enable);
void si_update_prims_generated_query_state(struct si_context *sctx,
unsigned type, int diff)
{
- if (sctx->chip_class < GFX10 &&
+ if (!sctx->screen->use_ngg_streamout &&
type == PIPE_QUERY_PRIMITIVES_GENERATED) {
bool old_strmout_en = si_get_strmout_en(sctx);
sctx->b.stream_output_target_destroy = si_so_target_destroy;
sctx->b.set_stream_output_targets = si_set_streamout_targets;
- if (sctx->chip_class >= GFX10) {
+ if (sctx->screen->use_ngg_streamout) {
sctx->atoms.s.streamout_begin.emit = gfx10_emit_streamout_begin;
} else {
sctx->atoms.s.streamout_begin.emit = si_emit_streamout_begin;