/* For emulating the rewind packet on CI. */
#define FORCE_REWIND_EMULATION 0
-void si_initialize_prim_discard_tunables(struct si_context *sctx)
+void si_initialize_prim_discard_tunables(struct si_screen *sscreen,
+ bool is_aux_context,
+ unsigned *prim_discard_vertex_count_threshold,
+ unsigned *index_ring_size_per_ib)
{
- sctx->prim_discard_vertex_count_threshold = UINT_MAX; /* disable */
+ *prim_discard_vertex_count_threshold = UINT_MAX; /* disable */
- if (sctx->chip_class == GFX6 || /* SI support is not implemented */
- !sctx->screen->info.has_gds_ordered_append ||
- sctx->screen->debug_flags & DBG(NO_PD) ||
- /* If aux_context == NULL, we are initializing aux_context right now. */
- !sctx->screen->aux_context)
+ if (sscreen->info.chip_class == GFX6 || /* SI support is not implemented */
+ !sscreen->info.has_gds_ordered_append ||
+ sscreen->debug_flags & DBG(NO_PD) ||
+ is_aux_context)
return;
/* TODO: enable this after the GDS kernel memory management is fixed */
bool enable_on_pro_graphics_by_default = false;
- if (sctx->screen->debug_flags & DBG(ALWAYS_PD) ||
- sctx->screen->debug_flags & DBG(PD) ||
+ if (sscreen->debug_flags & DBG(ALWAYS_PD) ||
+ sscreen->debug_flags & DBG(PD) ||
(enable_on_pro_graphics_by_default &&
- sctx->screen->info.is_pro_graphics &&
- (sctx->family == CHIP_BONAIRE ||
- sctx->family == CHIP_HAWAII ||
- sctx->family == CHIP_TONGA ||
- sctx->family == CHIP_FIJI ||
- sctx->family == CHIP_POLARIS10 ||
- sctx->family == CHIP_POLARIS11 ||
- sctx->family == CHIP_VEGA10 ||
- sctx->family == CHIP_VEGA20))) {
- sctx->prim_discard_vertex_count_threshold = 6000 * 3; /* 6K triangles */
-
- if (sctx->screen->debug_flags & DBG(ALWAYS_PD))
- sctx->prim_discard_vertex_count_threshold = 0; /* always enable */
+ sscreen->info.is_pro_graphics &&
+ (sscreen->info.family == CHIP_BONAIRE ||
+ sscreen->info.family == CHIP_HAWAII ||
+ sscreen->info.family == CHIP_TONGA ||
+ sscreen->info.family == CHIP_FIJI ||
+ sscreen->info.family == CHIP_POLARIS10 ||
+ sscreen->info.family == CHIP_POLARIS11 ||
+ sscreen->info.family == CHIP_VEGA10 ||
+ sscreen->info.family == CHIP_VEGA20))) {
+ *prim_discard_vertex_count_threshold = 6000 * 3; /* 6K triangles */
+
+ if (sscreen->debug_flags & DBG(ALWAYS_PD))
+ *prim_discard_vertex_count_threshold = 0; /* always enable */
const uint32_t MB = 1024 * 1024;
const uint64_t GB = 1024 * 1024 * 1024;
/* The total size is double this per context.
* Greater numbers allow bigger gfx IBs.
*/
- if (sctx->screen->info.vram_size <= 2 * GB)
- sctx->index_ring_size_per_ib = 64 * MB;
- else if (sctx->screen->info.vram_size <= 4 * GB)
- sctx->index_ring_size_per_ib = 128 * MB;
+ if (sscreen->info.vram_size <= 2 * GB)
+ *index_ring_size_per_ib = 64 * MB;
+ else if (sscreen->info.vram_size <= 4 * GB)
+ *index_ring_size_per_ib = 128 * MB;
else
- sctx->index_ring_size_per_ib = 256 * MB;
+ *index_ring_size_per_ib = 256 * MB;
}
}
/* Create the compute shader function. */
unsigned old_type = ctx->type;
ctx->type = PIPE_SHADER_COMPUTE;
- si_create_function(ctx, "prim_discard_cs", NULL, 0, THREADGROUP_SIZE);
+ si_llvm_create_func(ctx, "prim_discard_cs", NULL, 0, THREADGROUP_SIZE);
ctx->type = old_type;
if (VERTEX_COUNTER_GDS_MODE == 1) {
};
LLVMValueRef rsrc = ac_build_gather_values(&ctx->ac, desc, 4);
ac_build_buffer_store_dword(&ctx->ac, rsrc, count, 1, ctx->i32_0,
- ctx->i32_0, 0, ac_glc | ac_slc, false);
+ ctx->i32_0, 0, ac_glc | ac_slc);
} else {
LLVMBuildStore(builder, count,
si_expand_32bit_pointer(ctx,
sctx->cs_prim_discard_state.cso = sctx->vs_shader.cso;
sctx->cs_prim_discard_state.current = NULL;
+ if (!sctx->compiler.passes)
+ si_init_compiler(sctx->screen, &sctx->compiler);
+
struct si_compiler_ctx_state compiler_state;
compiler_state.compiler = &sctx->compiler;
compiler_state.debug = sctx->debug;
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) |
S_008F0C_DATA_FORMAT(output_indexbuf_format);
- /* Viewport state.
- * This is needed by the small primitive culling, because it's done
- * in screen space.
- */
- float scale[2], translate[2];
-
- scale[0] = sctx->viewports.states[0].scale[0];
- scale[1] = sctx->viewports.states[0].scale[1];
- translate[0] = sctx->viewports.states[0].translate[0];
- translate[1] = sctx->viewports.states[0].translate[1];
+ /* Viewport state. */
+ struct si_small_prim_cull_info cull_info;
+ si_get_small_prim_cull_info(sctx, &cull_info);
- /* The viewport shouldn't flip the X axis for the small prim culling to work. */
- assert(-scale[0] + translate[0] <= scale[0] + translate[0]);
-
- /* If the Y axis is inverted (OpenGL default framebuffer), reverse it.
- * This is because the viewport transformation inverts the clip space
- * bounding box, so min becomes max, which breaks small primitive
- * culling.
- */
- if (sctx->viewports.y_inverted) {
- scale[1] = -scale[1];
- translate[1] = -translate[1];
- }
-
- /* Scale the framebuffer up, so that samples become pixels and small
- * primitive culling is the same for all sample counts.
- * This only works with the standard DX sample positions, because
- * the samples are evenly spaced on both X and Y axes.
- */
- unsigned num_samples = sctx->framebuffer.nr_samples;
- assert(num_samples >= 1);
-
- for (unsigned i = 0; i < 2; i++) {
- scale[i] *= num_samples;
- translate[i] *= num_samples;
- }
-
- desc[8] = fui(scale[0]);
- desc[9] = fui(scale[1]);
- desc[10] = fui(translate[0]);
- desc[11] = fui(translate[1]);
+ desc[8] = fui(cull_info.scale[0]);
+ desc[9] = fui(cull_info.scale[1]);
+ desc[10] = fui(cull_info.translate[0]);
+ desc[11] = fui(cull_info.translate[1]);
/* Better subpixel precision increases the efficiency of small
* primitive culling. */
+ unsigned num_samples = sctx->framebuffer.nr_samples;
unsigned quant_mode = sctx->viewports.as_scissor[0].quant_mode;
float small_prim_cull_precision;