radeon_emit(cs, 0xf0000000); /* NOP */
}
-void si_dma_emit_timestamp(struct si_context *sctx, struct r600_resource *dst,
+void si_dma_emit_timestamp(struct si_context *sctx, struct si_resource *dst,
uint64_t offset)
{
struct radeon_cmdbuf *cs = sctx->dma_cs;
{
struct radeon_cmdbuf *cs = sctx->dma_cs;
unsigned i, ncopy, csize;
- struct r600_resource *rdst = r600_resource(dst);
+ struct si_resource *sdst = si_resource(dst);
assert(offset % 4 == 0);
assert(size);
/* Mark the buffer range of destination as valid (initialized),
* so that transfer_map knows it should wait for the GPU when mapping
* that range. */
- util_range_add(&rdst->valid_buffer_range, offset, offset + size);
+ util_range_add(&sdst->valid_buffer_range, offset, offset + size);
- offset += rdst->gpu_address;
+ offset += sdst->gpu_address;
if (sctx->chip_class == SI) {
/* the same maximum size as for copying */
ncopy = DIV_ROUND_UP(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
- si_need_dma_space(sctx, ncopy * 4, rdst, NULL);
+ si_need_dma_space(sctx, ncopy * 4, sdst, NULL);
for (i = 0; i < ncopy; i++) {
csize = MIN2(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
/* The following code is for CI, VI, Vega/Raven, etc. */
/* the same maximum size as for copying */
ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
- si_need_dma_space(sctx, ncopy * 5, rdst, NULL);
+ si_need_dma_space(sctx, ncopy * 5, sdst, NULL);
for (i = 0; i < ncopy; i++) {
csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE);
}
void si_need_dma_space(struct si_context *ctx, unsigned num_dw,
- struct r600_resource *dst, struct r600_resource *src)
+ struct si_resource *dst, struct si_resource *src)
{
+ struct radeon_winsys *ws = ctx->ws;
uint64_t vram = ctx->dma_cs->used_vram;
uint64_t gtt = ctx->dma_cs->used_gart;
}
/* Flush the GFX IB if DMA depends on it. */
- if (radeon_emitted(ctx->gfx_cs, ctx->initial_gfx_cs_size) &&
+ if (!ctx->sdma_uploads_in_progress &&
+ radeon_emitted(ctx->gfx_cs, ctx->initial_gfx_cs_size) &&
((dst &&
- ctx->ws->cs_is_buffer_referenced(ctx->gfx_cs, dst->buf,
- RADEON_USAGE_READWRITE)) ||
+ ws->cs_is_buffer_referenced(ctx->gfx_cs, dst->buf,
+ RADEON_USAGE_READWRITE)) ||
(src &&
- ctx->ws->cs_is_buffer_referenced(ctx->gfx_cs, src->buf,
- RADEON_USAGE_WRITE))))
+ ws->cs_is_buffer_referenced(ctx->gfx_cs, src->buf,
+ RADEON_USAGE_WRITE))))
si_flush_gfx_cs(ctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
/* Flush if there's not enough space, or if the memory usage per IB
* engine busy while uploads are being submitted.
*/
num_dw++; /* for emit_wait_idle below */
- if (!ctx->ws->cs_check_space(ctx->dma_cs, num_dw) ||
- ctx->dma_cs->used_vram + ctx->dma_cs->used_gart > 64 * 1024 * 1024 ||
- !radeon_cs_memory_below_limit(ctx->screen, ctx->dma_cs, vram, gtt)) {
+ if (!ctx->sdma_uploads_in_progress &&
+ (!ws->cs_check_space(ctx->dma_cs, num_dw) ||
+ ctx->dma_cs->used_vram + ctx->dma_cs->used_gart > 64 * 1024 * 1024 ||
+ !radeon_cs_memory_below_limit(ctx->screen, ctx->dma_cs, vram, gtt))) {
si_flush_dma_cs(ctx, PIPE_FLUSH_ASYNC, NULL);
assert((num_dw + ctx->dma_cs->current.cdw) <= ctx->dma_cs->current.max_dw);
}
* prevent read-after-write hazards.
*/
if ((dst &&
- ctx->ws->cs_is_buffer_referenced(ctx->dma_cs, dst->buf,
- RADEON_USAGE_READWRITE)) ||
+ ws->cs_is_buffer_referenced(ctx->dma_cs, dst->buf,
+ RADEON_USAGE_READWRITE)) ||
(src &&
- ctx->ws->cs_is_buffer_referenced(ctx->dma_cs, src->buf,
- RADEON_USAGE_WRITE)))
+ ws->cs_is_buffer_referenced(ctx->dma_cs, src->buf,
+ RADEON_USAGE_WRITE)))
si_dma_emit_wait_idle(ctx);
+ unsigned sync = ctx->sdma_uploads_in_progress ? 0 : RADEON_USAGE_SYNCHRONIZED;
if (dst) {
- radeon_add_to_buffer_list(ctx, ctx->dma_cs, dst,
- RADEON_USAGE_WRITE, 0);
+ ws->cs_add_buffer(ctx->dma_cs, dst->buf, RADEON_USAGE_WRITE | sync,
+ dst->domains, 0);
}
if (src) {
- radeon_add_to_buffer_list(ctx, ctx->dma_cs, src,
- RADEON_USAGE_READ, 0);
+ ws->cs_add_buffer(ctx->dma_cs, src->buf, RADEON_USAGE_READ | sync,
+ src->domains, 0);
}
/* this function is called before all DMA calls, so increment this. */