rctx->cmd_buf_is_compute = true;
}
- r600_need_cs_space(rctx, 0, true);
if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI) {
r600_shader_select(&rctx->b.b, rctx->cs_shader_state.shader->sel, &compute_dirty);
current = rctx->cs_shader_state.shader->sel->current;
}
rctx->cs_block_grid_sizes[3] = rctx->cs_block_grid_sizes[7] = 0;
rctx->driver_consts[PIPE_SHADER_COMPUTE].cs_block_grid_size_dirty = true;
+
+ evergreen_emit_atomic_buffer_setup_count(rctx, current, combined_atomics, &atomic_used_mask);
+ r600_need_cs_space(rctx, 0, true, util_bitcount(atomic_used_mask));
+
if (need_buf_const) {
eg_setup_buffer_constants(rctx, PIPE_SHADER_COMPUTE);
}
r600_update_driver_const_buffers(rctx, true);
- if (evergreen_emit_atomic_buffer_setup(rctx, current, combined_atomics, &atomic_used_mask)) {
+ evergreen_emit_atomic_buffer_setup(rctx, true, combined_atomics, atomic_used_mask);
+ if (atomic_used_mask) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
}
- }
+ } else
+ r600_need_cs_space(rctx, 0, true, 0);
/* Initialize all the compute-related registers.
*
r600_need_cs_space(rctx,
10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0) +
- R600_MAX_PFP_SYNC_ME_DWORDS, FALSE);
+ R600_MAX_PFP_SYNC_ME_DWORDS, FALSE, 0);
/* Flush the caches for the first copy only. */
if (rctx->b.flags) {
if (!buffers || !buffers[idx].buffer) {
pipe_resource_reference(&abuf->buffer, NULL);
- astate->enabled_mask &= ~(1 << i);
continue;
}
buf = &buffers[idx];
pipe_resource_reference(&abuf->buffer, buf->buffer);
abuf->buffer_offset = buf->buffer_offset;
abuf->buffer_size = buf->buffer_size;
- astate->enabled_mask |= (1 << i);
}
}
radeon_emit(cs, reloc);
}
-bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
- struct r600_pipe_shader *cs_shader,
- struct r600_shader_atomic *combined_atomics,
- uint8_t *atomic_used_mask_p)
+void evergreen_emit_atomic_buffer_setup_count(struct r600_context *rctx,
+ struct r600_pipe_shader *cs_shader,
+ struct r600_shader_atomic *combined_atomics,
+ uint8_t *atomic_used_mask_p)
{
- struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state;
- unsigned pkt_flags = 0;
uint8_t atomic_used_mask = 0;
int i, j, k;
bool is_compute = cs_shader ? true : false;
- if (is_compute)
- pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
-
for (i = 0; i < (is_compute ? 1 : EG_NUM_HW_STAGES); i++) {
uint8_t num_atomic_stage;
struct r600_pipe_shader *pshader;
}
}
}
+ *atomic_used_mask_p = atomic_used_mask;
+}
+
+void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
+ bool is_compute,
+ struct r600_shader_atomic *combined_atomics,
+ uint8_t atomic_used_mask)
+{
+ struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state;
+ unsigned pkt_flags = 0;
+ uint32_t mask;
+
+ if (is_compute)
+ pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
+
+ mask = atomic_used_mask;
+ if (!mask)
+ return;
- uint32_t mask = atomic_used_mask;
while (mask) {
unsigned atomic_index = u_bit_scan(&mask);
struct r600_shader_atomic *atomic = &combined_atomics[atomic_index];
else
evergreen_emit_set_append_cnt(rctx, atomic, resource, pkt_flags);
}
- *atomic_used_mask_p = atomic_used_mask;
- return true;
}
void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state;
uint32_t pkt_flags = 0;
uint32_t event = EVENT_TYPE_PS_DONE;
- uint32_t mask = astate->enabled_mask;
+ uint32_t mask;
uint64_t dst_offset;
unsigned reloc;
void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
- boolean count_draw_in)
+ boolean count_draw_in, unsigned num_atomics)
{
/* Flush the DMA IB if it's not empty. */
if (radeon_emitted(ctx->b.dma.cs, 0))
num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS;
}
+ /* add atomic counters, 8 pre + 8 post per counter + 16 post if any counters */
+ num_dw += (num_atomics * 16) + (num_atomics ? 16 : 0);
+
/* Count in r600_suspend_queries. */
num_dw += ctx->b.num_cs_dw_queries_suspend;
r600_need_cs_space(rctx,
10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0) +
- 3 + R600_MAX_PFP_SYNC_ME_DWORDS, FALSE);
+ 3 + R600_MAX_PFP_SYNC_ME_DWORDS, FALSE, 0);
/* Flush the caches for the first copy only. */
if (rctx->b.flags) {
};
struct r600_atomic_buffer_state {
- uint32_t enabled_mask;
- uint32_t dirty_mask;
struct pipe_shader_buffer buffer[EG_MAX_ATOMIC_BUFFERS];
};
struct pipe_fence_handle **fence);
void r600_begin_new_cs(struct r600_context *ctx);
void r600_flush_emit(struct r600_context *ctx);
-void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in);
+void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in, unsigned num_atomics);
void r600_emit_pfp_sync_me(struct r600_context *rctx);
void r600_cp_dma_copy_buffer(struct r600_context *rctx,
struct pipe_resource *dst, uint64_t dst_offset,
struct r600_pipe_shader_selector *sel);
struct r600_shader_atomic;
-bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
- struct r600_pipe_shader *cs_shader,
+void evergreen_emit_atomic_buffer_setup_count(struct r600_context *rctx,
+ struct r600_pipe_shader *cs_shader,
+ struct r600_shader_atomic *combined_atomics,
+ uint8_t *atomic_used_mask_p);
+void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
+ bool is_compute,
struct r600_shader_atomic *combined_atomics,
- uint8_t *atomic_used_mask_p);
+ uint8_t atomic_used_mask);
void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
bool is_compute,
struct r600_shader_atomic *combined_atomics,
: (rctx->tes_shader)? rctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]
: info->mode;
- if (rctx->b.chip_class >= EVERGREEN)
- evergreen_emit_atomic_buffer_setup(rctx, NULL, combined_atomics, &atomic_used_mask);
+ if (rctx->b.chip_class >= EVERGREEN) {
+ evergreen_emit_atomic_buffer_setup_count(rctx, NULL, combined_atomics, &atomic_used_mask);
+ }
if (index_size) {
index_offset += info->start * index_size;
evergreen_setup_tess_constants(rctx, info, &num_patches);
/* Emit states. */
- r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE);
+ r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE, util_bitcount(atomic_used_mask));
r600_flush_emit(rctx);
mask = rctx->dirty_atoms;
r600_emit_atom(rctx, rctx->atoms[u_bit_scan64(&mask)]);
}
+ if (rctx->b.chip_class >= EVERGREEN) {
+ evergreen_emit_atomic_buffer_setup(rctx, false, combined_atomics, atomic_used_mask);
+ }
+
if (rctx->b.chip_class == CAYMAN) {
/* Copied from radeonsi. */
unsigned primgroup_size = 128; /* recommended without a GS */
static void r600_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
bool include_draw_vbo)
{
- r600_need_cs_space((struct r600_context*)ctx, num_dw, include_draw_vbo);
+ r600_need_cs_space((struct r600_context*)ctx, num_dw, include_draw_vbo, 0);
}
/* keep this at the end of this file, please */