if (info->count_from_stream_output) {
struct si_streamout_target *t =
(struct si_streamout_target*)info->count_from_stream_output;
- uint64_t va = t->buf_filled_size->gpu_address +
- t->buf_filled_size_offset;
radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE,
t->stride_in_dw);
-
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
- COPY_DATA_DST_SEL(COPY_DATA_REG) |
- COPY_DATA_WR_CONFIRM);
- radeon_emit(cs, va); /* src address lo */
- radeon_emit(cs, va >> 32); /* src address hi */
- radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
- radeon_emit(cs, 0); /* unused */
-
- radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
- t->buf_filled_size, RADEON_USAGE_READ,
- RADEON_PRIO_SO_FILLED_SIZE);
+ si_cp_copy_data(sctx,
+ COPY_DATA_REG, NULL,
+ R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2,
+ COPY_DATA_SRC_MEM, t->buf_filled_size,
+ t->buf_filled_size_offset);
}
/* draw packet */
{
struct radeon_cmdbuf *cs = sctx->gfx_cs;
- if (sctx->chip_class >= GFX9) {
+ if (sctx->chip_class >= GFX9 || !sctx->has_graphics) {
/* Flush caches and wait for the caches to assert idle. */
radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
{
struct radeon_cmdbuf *cs = sctx->gfx_cs;
uint32_t flags = sctx->flags;
+
+ if (!sctx->has_graphics) {
+ /* Only process compute flags. */
+ flags &= SI_CONTEXT_INV_ICACHE |
+ SI_CONTEXT_INV_SMEM_L1 |
+ SI_CONTEXT_INV_VMEM_L1 |
+ SI_CONTEXT_INV_GLOBAL_L2 |
+ SI_CONTEXT_WRITEBACK_GLOBAL_L2 |
+ SI_CONTEXT_INV_L2_METADATA |
+ SI_CONTEXT_CS_PARTIAL_FLUSH;
+ }
+
uint32_t cp_coher_cntl = 0;
uint32_t flush_cb_db = flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
SI_CONTEXT_FLUSH_AND_INV_DB);
/* Make sure ME is idle (it executes most packets) before continuing.
* This prevents read-after-write hazards between PFP and ME.
*/
- if (cp_coher_cntl ||
- (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
- SI_CONTEXT_INV_VMEM_L1 |
- SI_CONTEXT_INV_GLOBAL_L2 |
- SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
+ if (sctx->has_graphics &&
+ (cp_coher_cntl ||
+ (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
+ SI_CONTEXT_INV_VMEM_L1 |
+ SI_CONTEXT_INV_GLOBAL_L2 |
+ SI_CONTEXT_WRITEBACK_GLOBAL_L2)))) {
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(cs, 0);
}
si_decompress_textures(sctx, u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS));
+ if (sctx->bo_list_add_all_gfx_resources)
+ si_gfx_resources_add_all_to_bo_list(sctx);
+
/* Set the rasterization primitive type.
*
* This must be done after si_decompress_textures, which can call
}
if (sctx->do_update_shaders && !si_update_shaders(sctx))
- return;
+ goto return_cleanup;
if (index_size) {
/* Translate or upload, if needed. */
* need_cs_space flush before we add buffers to the buffer list.
*/
if (!si_upload_vertex_buffer_descriptors(sctx))
- return;
+ goto return_cleanup;
/* Use optimal packet order based on whether we need to sync the pipeline. */
if (unlikely(sctx->flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
masked_atoms |= si_get_atom_bit(sctx, &sctx->atoms.s.render_cond);
if (!si_upload_graphics_shader_descriptors(sctx))
- return;
+ goto return_cleanup;
/* Emit all states except possibly render condition. */
si_emit_all_states(sctx, info, masked_atoms);
if (G_0286E8_WAVESIZE(sctx->spi_tmpring_size))
sctx->num_spill_draw_calls++;
}
+
+return_cleanup:
if (index_size && indexbuf != info->index.resource)
pipe_resource_reference(&indexbuf, NULL);
}