fbe88acd1872f7a0d557dff145c221b3b99ef9bb
2 * Copyright 2018 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
26 #include "radeon/r600_cs.h"
28 static void si_dma_emit_wait_idle(struct si_context
*sctx
)
30 struct radeon_winsys_cs
*cs
= sctx
->b
.dma_cs
;
32 /* NOP waits for idle on Evergreen and later. */
33 if (sctx
->b
.chip_class
>= CIK
)
34 radeon_emit(cs
, 0x00000000); /* NOP */
36 radeon_emit(cs
, 0xf0000000); /* NOP */
39 void si_need_dma_space(struct si_context
*ctx
, unsigned num_dw
,
40 struct r600_resource
*dst
, struct r600_resource
*src
)
42 uint64_t vram
= ctx
->b
.dma_cs
->used_vram
;
43 uint64_t gtt
= ctx
->b
.dma_cs
->used_gart
;
46 vram
+= dst
->vram_usage
;
47 gtt
+= dst
->gart_usage
;
50 vram
+= src
->vram_usage
;
51 gtt
+= src
->gart_usage
;
54 /* Flush the GFX IB if DMA depends on it. */
55 if (radeon_emitted(ctx
->b
.gfx_cs
, ctx
->b
.initial_gfx_cs_size
) &&
57 ctx
->b
.ws
->cs_is_buffer_referenced(ctx
->b
.gfx_cs
, dst
->buf
,
58 RADEON_USAGE_READWRITE
)) ||
60 ctx
->b
.ws
->cs_is_buffer_referenced(ctx
->b
.gfx_cs
, src
->buf
,
61 RADEON_USAGE_WRITE
))))
62 si_flush_gfx_cs(ctx
, PIPE_FLUSH_ASYNC
, NULL
);
64 /* Flush if there's not enough space, or if the memory usage per IB
67 * IBs using too little memory are limited by the IB submission overhead.
68 * IBs using too much memory are limited by the kernel/TTM overhead.
69 * Too long IBs create CPU-GPU pipeline bubbles and add latency.
71 * This heuristic makes sure that DMA requests are executed
72 * very soon after the call is made and lowers memory usage.
73 * It improves texture upload performance by keeping the DMA
74 * engine busy while uploads are being submitted.
76 num_dw
++; /* for emit_wait_idle below */
77 if (!ctx
->b
.ws
->cs_check_space(ctx
->b
.dma_cs
, num_dw
) ||
78 ctx
->b
.dma_cs
->used_vram
+ ctx
->b
.dma_cs
->used_gart
> 64 * 1024 * 1024 ||
79 !radeon_cs_memory_below_limit(ctx
->screen
, ctx
->b
.dma_cs
, vram
, gtt
)) {
80 si_flush_dma_cs(ctx
, PIPE_FLUSH_ASYNC
, NULL
);
81 assert((num_dw
+ ctx
->b
.dma_cs
->current
.cdw
) <= ctx
->b
.dma_cs
->current
.max_dw
);
84 /* Wait for idle if either buffer has been used in the IB before to
85 * prevent read-after-write hazards.
88 ctx
->b
.ws
->cs_is_buffer_referenced(ctx
->b
.dma_cs
, dst
->buf
,
89 RADEON_USAGE_READWRITE
)) ||
91 ctx
->b
.ws
->cs_is_buffer_referenced(ctx
->b
.dma_cs
, src
->buf
,
93 si_dma_emit_wait_idle(ctx
);
96 radeon_add_to_buffer_list(ctx
, ctx
->b
.dma_cs
, dst
,
98 RADEON_PRIO_SDMA_BUFFER
);
101 radeon_add_to_buffer_list(ctx
, ctx
->b
.dma_cs
, src
,
103 RADEON_PRIO_SDMA_BUFFER
);
106 /* this function is called before all DMA calls, so increment this. */
107 ctx
->b
.num_dma_calls
++;
110 void si_flush_dma_cs(struct si_context
*ctx
, unsigned flags
,
111 struct pipe_fence_handle
**fence
)
113 struct radeon_winsys_cs
*cs
= ctx
->b
.dma_cs
;
114 struct radeon_saved_cs saved
;
115 bool check_vm
= (ctx
->screen
->debug_flags
& DBG(CHECK_VM
)) != 0;
117 if (!radeon_emitted(cs
, 0)) {
119 ctx
->b
.ws
->fence_reference(fence
, ctx
->b
.last_sdma_fence
);
124 si_save_cs(ctx
->b
.ws
, cs
, &saved
, true);
126 ctx
->b
.ws
->cs_flush(cs
, flags
, &ctx
->b
.last_sdma_fence
);
128 ctx
->b
.ws
->fence_reference(fence
, ctx
->b
.last_sdma_fence
);
131 /* Use conservative timeout 800ms, after which we won't wait any
132 * longer and assume the GPU is hung.
134 ctx
->b
.ws
->fence_wait(ctx
->b
.ws
, ctx
->b
.last_sdma_fence
, 800*1000*1000);
136 si_check_vm_faults(ctx
, &saved
, RING_DMA
);
137 si_clear_saved_cs(&saved
);
141 void si_screen_clear_buffer(struct si_screen
*sscreen
, struct pipe_resource
*dst
,
142 uint64_t offset
, uint64_t size
, unsigned value
)
144 struct si_context
*ctx
= (struct si_context
*)sscreen
->aux_context
;
146 mtx_lock(&sscreen
->aux_context_lock
);
147 ctx
->b
.dma_clear_buffer(ctx
, dst
, offset
, size
, value
);
148 sscreen
->aux_context
->flush(sscreen
->aux_context
, NULL
, 0);
149 mtx_unlock(&sscreen
->aux_context_lock
);