assert(num_tcs_input_cp <= 32);
assert(num_tcs_output_cp <= 32);
- uint64_t ring_va = r600_resource(sctx->tess_rings)->gpu_address;
+ uint64_t ring_va = si_resource(sctx->tess_rings)->gpu_address;
assert((ring_va & u_bit_consecutive(0, 19)) == 0);
tcs_in_layout = S_VS_STATE_LS_OUT_PATCH_SIZE(input_patch_size / 4) |
if (info->count_from_stream_output) {
struct si_streamout_target *t =
(struct si_streamout_target*)info->count_from_stream_output;
- uint64_t va = t->buf_filled_size->gpu_address +
- t->buf_filled_size_offset;
radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE,
t->stride_in_dw);
-
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
- COPY_DATA_DST_SEL(COPY_DATA_REG) |
- COPY_DATA_WR_CONFIRM);
- radeon_emit(cs, va); /* src address lo */
- radeon_emit(cs, va >> 32); /* src address hi */
- radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
- radeon_emit(cs, 0); /* unused */
-
- radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
- t->buf_filled_size, RADEON_USAGE_READ,
- RADEON_PRIO_SO_FILLED_SIZE);
+ si_cp_copy_data(sctx,
+ COPY_DATA_REG, NULL,
+ R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2,
+ COPY_DATA_SRC_MEM, t->buf_filled_size,
+ t->buf_filled_size_offset);
}
/* draw packet */
index_max_size = (indexbuf->width0 - index_offset) /
index_size;
- index_va = r600_resource(indexbuf)->gpu_address + index_offset;
+ index_va = si_resource(indexbuf)->gpu_address + index_offset;
radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
- r600_resource(indexbuf),
+ si_resource(indexbuf),
RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER);
} else {
/* On CI and later, non-indexed draws overwrite VGT_INDEX_TYPE,
}
if (indirect) {
- uint64_t indirect_va = r600_resource(indirect->buffer)->gpu_address;
+ uint64_t indirect_va = si_resource(indirect->buffer)->gpu_address;
assert(indirect_va % 8 == 0);
radeon_emit(cs, indirect_va >> 32);
radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
- r600_resource(indirect->buffer),
+ si_resource(indirect->buffer),
RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
unsigned di_src_sel = index_size ? V_0287F0_DI_SRC_SEL_DMA
uint64_t count_va = 0;
if (indirect->indirect_draw_count) {
- struct r600_resource *params_buf =
- r600_resource(indirect->indirect_draw_count);
+ struct si_resource *params_buf =
+ si_resource(indirect->indirect_draw_count);
radeon_add_to_buffer_list(
sctx, sctx->gfx_cs, params_buf,
{
struct radeon_cmdbuf *cs = sctx->gfx_cs;
- if (sctx->chip_class >= GFX9) {
+ if (sctx->chip_class >= GFX9 || !sctx->has_graphics) {
/* Flush caches and wait for the caches to assert idle. */
radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
{
struct radeon_cmdbuf *cs = sctx->gfx_cs;
uint32_t flags = sctx->flags;
+
+ if (!sctx->has_graphics) {
+ /* Only process compute flags. */
+ flags &= SI_CONTEXT_INV_ICACHE |
+ SI_CONTEXT_INV_SMEM_L1 |
+ SI_CONTEXT_INV_VMEM_L1 |
+ SI_CONTEXT_INV_GLOBAL_L2 |
+ SI_CONTEXT_WRITEBACK_GLOBAL_L2 |
+ SI_CONTEXT_INV_L2_METADATA |
+ SI_CONTEXT_CS_PARTIAL_FLUSH;
+ }
+
uint32_t cp_coher_cntl = 0;
uint32_t flush_cb_db = flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
SI_CONTEXT_FLUSH_AND_INV_DB);
/* Make sure ME is idle (it executes most packets) before continuing.
* This prevents read-after-write hazards between PFP and ME.
*/
- if (cp_coher_cntl ||
- (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
- SI_CONTEXT_INV_VMEM_L1 |
- SI_CONTEXT_INV_GLOBAL_L2 |
- SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
+ if (sctx->has_graphics &&
+ (cp_coher_cntl ||
+ (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
+ SI_CONTEXT_INV_VMEM_L1 |
+ SI_CONTEXT_INV_GLOBAL_L2 |
+ SI_CONTEXT_WRITEBACK_GLOBAL_L2)))) {
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(cs, 0);
}
si_decompress_textures(sctx, u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS));
+ if (sctx->bo_list_add_all_gfx_resources)
+ si_gfx_resources_add_all_to_bo_list(sctx);
+
/* Set the rasterization primitive type.
*
* This must be done after si_decompress_textures, which can call
}
if (sctx->do_update_shaders && !si_update_shaders(sctx))
- return;
+ goto return_cleanup;
if (index_size) {
/* Translate or upload, if needed. */
/* info->start will be added by the drawing code */
index_offset -= start_offset;
} else if (sctx->chip_class <= CIK &&
- r600_resource(indexbuf)->TC_L2_dirty) {
+ si_resource(indexbuf)->TC_L2_dirty) {
/* VI reads index buffers through TC L2, so it doesn't
* need this. */
sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
- r600_resource(indexbuf)->TC_L2_dirty = false;
+ si_resource(indexbuf)->TC_L2_dirty = false;
}
}
/* Indirect buffers use TC L2 on GFX9, but not older hw. */
if (sctx->chip_class <= VI) {
- if (r600_resource(indirect->buffer)->TC_L2_dirty) {
+ if (si_resource(indirect->buffer)->TC_L2_dirty) {
sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
- r600_resource(indirect->buffer)->TC_L2_dirty = false;
+ si_resource(indirect->buffer)->TC_L2_dirty = false;
}
if (indirect->indirect_draw_count &&
- r600_resource(indirect->indirect_draw_count)->TC_L2_dirty) {
+ si_resource(indirect->indirect_draw_count)->TC_L2_dirty) {
sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
- r600_resource(indirect->indirect_draw_count)->TC_L2_dirty = false;
+ si_resource(indirect->indirect_draw_count)->TC_L2_dirty = false;
}
}
}
* need_cs_space flush before we add buffers to the buffer list.
*/
if (!si_upload_vertex_buffer_descriptors(sctx))
- return;
+ goto return_cleanup;
/* Use optimal packet order based on whether we need to sync the pipeline. */
if (unlikely(sctx->flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
masked_atoms |= si_get_atom_bit(sctx, &sctx->atoms.s.render_cond);
if (!si_upload_graphics_shader_descriptors(sctx))
- return;
+ goto return_cleanup;
/* Emit all states except possibly render condition. */
si_emit_all_states(sctx, info, masked_atoms);
if (G_0286E8_WAVESIZE(sctx->spi_tmpring_size))
sctx->num_spill_draw_calls++;
}
+
+return_cleanup:
if (index_size && indexbuf != info->index.resource)
pipe_resource_reference(&indexbuf, NULL);
}
void si_trace_emit(struct si_context *sctx)
{
struct radeon_cmdbuf *cs = sctx->gfx_cs;
- uint64_t va = sctx->current_saved_cs->trace_buf->gpu_address;
uint32_t trace_id = ++sctx->current_saved_cs->trace_id;
- radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
- radeon_emit(cs, S_370_DST_SEL(sctx->chip_class >= CIK ? V_370_MEM
- : V_370_MEM_GRBM) |
- S_370_WR_CONFIRM(1) |
- S_370_ENGINE_SEL(V_370_ME));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- radeon_emit(cs, trace_id);
+ si_cp_write_data(sctx, sctx->current_saved_cs->trace_buf,
+ 0, 4, V_370_MEM, V_370_ME, &trace_id);
+
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, AC_ENCODE_TRACE_POINT(trace_id));