unsigned scratch_offset_base = isgs ? 8 : 4;
LLVMValueRef scratch_offset_basev = isgs ? i32_8 : i32_4;
+ ac_llvm_add_target_dep_function_attr(ctx->main_fn, "amdgpu-gds-size", 256);
+
/* Determine the mapping of streamout buffers to vertex streams. */
for (unsigned i = 0; i < so->num_outputs; ++i) {
unsigned buf = so->output[i].output_buffer;
RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
}
-void si_begin_new_gfx_cs(struct si_context *ctx)
+static void si_add_gds_to_buffer_list(struct si_context *sctx)
{
- if (ctx->is_debug)
- si_begin_gfx_cs_debug(ctx);
-
- if (ctx->gds) {
- ctx->ws->cs_add_buffer(ctx->gfx_cs, ctx->gds,
+ if (sctx->gds) {
+ sctx->ws->cs_add_buffer(sctx->gfx_cs, sctx->gds,
RADEON_USAGE_READWRITE, 0, 0);
- if (ctx->gds_oa) {
- ctx->ws->cs_add_buffer(ctx->gfx_cs, ctx->gds_oa,
+ if (sctx->gds_oa) {
+ sctx->ws->cs_add_buffer(sctx->gfx_cs, sctx->gds_oa,
RADEON_USAGE_READWRITE, 0, 0);
}
}
+}
+
+void si_allocate_gds(struct si_context *sctx)
+{
+ struct radeon_winsys *ws = sctx->ws;
+
+ if (sctx->gds)
+ return;
+
+ assert(sctx->chip_class >= GFX10); /* for gfx10 streamout */
+
+ /* 4 streamout GDS counters.
+ * We need 256B (64 dw) of GDS, otherwise streamout hangs.
+ */
+ sctx->gds = ws->buffer_create(ws, 256, 4, RADEON_DOMAIN_GDS, 0);
+ sctx->gds_oa = ws->buffer_create(ws, 4, 1, RADEON_DOMAIN_OA, 0);
+
+ assert(sctx->gds && sctx->gds_oa);
+ si_add_gds_to_buffer_list(sctx);
+}
+
+void si_begin_new_gfx_cs(struct si_context *ctx)
+{
+ if (ctx->is_debug)
+ si_begin_gfx_cs_debug(ctx);
+ si_add_gds_to_buffer_list(ctx);
/* Always invalidate caches at the beginning of IBs, because external
* users (e.g. BO evictions and SDMA/UVD/VCE IBs) can modify our
/* si_gfx_cs.c */
void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
struct pipe_fence_handle **fence);
+void si_allocate_gds(struct si_context *ctx);
void si_begin_new_gfx_cs(struct si_context *ctx);
void si_need_gfx_cs_space(struct si_context *ctx);
void si_unref_sdma_uploads(struct si_context *sctx);
/* All readers of the streamout targets need to be finished before we can
* start writing to the targets.
*/
- if (num_targets)
+ if (num_targets) {
+ if (sctx->chip_class >= GFX10)
+ si_allocate_gds(sctx);
+
sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
- SI_CONTEXT_CS_PARTIAL_FLUSH;
+ SI_CONTEXT_CS_PARTIAL_FLUSH;
+ }
/* Streamout buffers must be bound in 2 places:
* 1) in VGT by setting the VGT_STRMOUT registers