From dd3630f71c2fd23e58cf589acadbcdd2a3d4c24a Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 3 Nov 2017 11:11:15 +1000 Subject: [PATCH] r600/cs: add support for compute to image/buffers/atomics state This just adds the compute paths to state handling for the main objects Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/evergreen_state.c | 86 ++++++++++++++++---- src/gallium/drivers/r600/r600_hw_context.c | 2 + src/gallium/drivers/r600/r600_pipe.h | 6 +- src/gallium/drivers/r600/r600_state_common.c | 4 +- 4 files changed, 79 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 8825f92a8ee..e3731af64e1 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1683,14 +1683,13 @@ static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples, } static void evergreen_emit_image_state(struct r600_context *rctx, struct r600_atom *atom, - int immed_id_base, int res_id_base, int offset) + int immed_id_base, int res_id_base, int offset, uint32_t pkt_flags) { struct r600_image_state *state = (struct r600_image_state *)atom; struct pipe_framebuffer_state *fb_state = &rctx->framebuffer.state; struct radeon_winsys_cs *cs = rctx->b.gfx.cs; struct r600_texture *rtex; struct r600_resource *resource; - uint32_t pkt_flags = 0; int i; for (i = 0; i < R600_MAX_IMAGES; i++) { @@ -1698,7 +1697,8 @@ static void evergreen_emit_image_state(struct r600_context *rctx, struct r600_at unsigned reloc, immed_reloc; int idx = i + offset; - idx += fb_state->nr_cbufs + (rctx->dual_src_blend ? 1 : 0); + if (!pkt_flags) + idx += fb_state->nr_cbufs + (rctx->dual_src_blend ? 1 : 0); if (!image->base.resource) continue; @@ -1720,7 +1720,10 @@ static void evergreen_emit_image_state(struct r600_context *rctx, struct r600_at RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER); - radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + idx * 0x3C, 13); + if (pkt_flags) + radeon_compute_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + idx * 0x3C, 13); + else + radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + idx * 0x3C, 13); radeon_emit(cs, image->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ radeon_emit(cs, image->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ @@ -1748,7 +1751,11 @@ static void evergreen_emit_image_state(struct r600_context *rctx, struct r600_at radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C84_CB_COLOR0_FMASK */ radeon_emit(cs, reloc); - radeon_set_context_reg(cs, R_028B9C_CB_IMMED0_BASE + (idx * 4), resource->immed_buffer->gpu_address >> 8); + if (pkt_flags) + radeon_compute_set_context_reg(cs, R_028B9C_CB_IMMED0_BASE + (idx * 4), resource->immed_buffer->gpu_address >> 8); + else + radeon_set_context_reg(cs, R_028B9C_CB_IMMED0_BASE + (idx * 4), resource->immed_buffer->gpu_address >> 8); + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /**/ radeon_emit(cs, immed_reloc); @@ -1777,7 +1784,15 @@ static void evergreen_emit_fragment_image_state(struct r600_context *rctx, struc { evergreen_emit_image_state(rctx, atom, R600_IMAGE_IMMED_RESOURCE_OFFSET, - R600_IMAGE_REAL_RESOURCE_OFFSET, 0); + R600_IMAGE_REAL_RESOURCE_OFFSET, 0, 0); +} + +static void evergreen_emit_compute_image_state(struct r600_context *rctx, struct r600_atom *atom) +{ + evergreen_emit_image_state(rctx, atom, + EG_FETCH_CONSTANTS_OFFSET_CS + R600_IMAGE_IMMED_RESOURCE_OFFSET, + EG_FETCH_CONSTANTS_OFFSET_CS + R600_IMAGE_REAL_RESOURCE_OFFSET, + 0, RADEON_CP_PACKET3_COMPUTE_MODE); } static void evergreen_emit_fragment_buffer_state(struct r600_context *rctx, struct r600_atom *atom) @@ -1785,7 +1800,16 @@ static void evergreen_emit_fragment_buffer_state(struct r600_context *rctx, stru int offset = util_bitcount(rctx->fragment_images.enabled_mask); evergreen_emit_image_state(rctx, atom, R600_IMAGE_IMMED_RESOURCE_OFFSET, - R600_IMAGE_REAL_RESOURCE_OFFSET, offset); + R600_IMAGE_REAL_RESOURCE_OFFSET, offset, 0); +} + +static void evergreen_emit_compute_buffer_state(struct r600_context *rctx, struct r600_atom *atom) +{ + int offset = util_bitcount(rctx->compute_images.enabled_mask); + evergreen_emit_image_state(rctx, atom, + EG_FETCH_CONSTANTS_OFFSET_CS + R600_IMAGE_IMMED_RESOURCE_OFFSET, + EG_FETCH_CONSTANTS_OFFSET_CS + R600_IMAGE_REAL_RESOURCE_OFFSET, + offset, RADEON_CP_PACKET3_COMPUTE_MODE); } static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom) @@ -2323,7 +2347,7 @@ static void evergreen_emit_ps_sampler_views(struct r600_context *rctx, struct r6 static void evergreen_emit_cs_sampler_views(struct r600_context *rctx, struct r600_atom *atom) { evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].views, - EG_FETCH_CONSTANTS_OFFSET_CS + 2, RADEON_CP_PACKET3_COMPUTE_MODE); + EG_FETCH_CONSTANTS_OFFSET_CS + R600_MAX_CONST_BUFFERS, RADEON_CP_PACKET3_COMPUTE_MODE); } static void evergreen_emit_sampler_states(struct r600_context *rctx, @@ -3925,11 +3949,14 @@ static void evergreen_set_shader_buffers(struct pipe_context *ctx, int i, idx; unsigned old_mask; - if (shader != PIPE_SHADER_FRAGMENT && count == 0) + if (shader != PIPE_SHADER_FRAGMENT && + shader != PIPE_SHADER_COMPUTE && count == 0) return; - assert(shader == PIPE_SHADER_FRAGMENT); - istate = &rctx->fragment_buffers; + if (shader == PIPE_SHADER_FRAGMENT) + istate = &rctx->fragment_buffers; + else if (shader == PIPE_SHADER_COMPUTE) + istate = &rctx->compute_buffers; old_mask = istate->enabled_mask; for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) { @@ -4020,12 +4047,16 @@ static void evergreen_set_shader_images(struct pipe_context *ctx, unsigned old_mask; struct r600_image_state *istate = NULL; int idx; - if (shader != PIPE_SHADER_FRAGMENT && count == 0) + if (shader != PIPE_SHADER_FRAGMENT && shader != PIPE_SHADER_COMPUTE && count == 0) return; - istate = &rctx->fragment_images; + if (shader == PIPE_SHADER_FRAGMENT) + istate = &rctx->fragment_images; + else if (shader == PIPE_SHADER_COMPUTE) + istate = &rctx->compute_images; + + assert (shader == PIPE_SHADER_FRAGMENT || shader == PIPE_SHADER_COMPUTE); - assert (shader == PIPE_SHADER_FRAGMENT); old_mask = istate->enabled_mask; for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) { unsigned res_type; @@ -4193,7 +4224,9 @@ void evergreen_init_state_functions(struct r600_context *rctx) } r600_init_atom(rctx, &rctx->framebuffer.atom, id++, evergreen_emit_framebuffer_state, 0); r600_init_atom(rctx, &rctx->fragment_images.atom, id++, evergreen_emit_fragment_image_state, 0); + r600_init_atom(rctx, &rctx->compute_images.atom, id++, evergreen_emit_compute_image_state, 0); r600_init_atom(rctx, &rctx->fragment_buffers.atom, id++, evergreen_emit_fragment_buffer_state, 0); + r600_init_atom(rctx, &rctx->compute_buffers.atom, id++, evergreen_emit_compute_buffer_state, 0); /* shader const */ r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX].atom, id++, evergreen_emit_vs_constant_buffers, 0); r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY].atom, id++, evergreen_emit_gs_constant_buffers, 0); @@ -4609,6 +4642,9 @@ static void evergreen_emit_event_write_eos(struct r600_context *rctx, uint64_t dst_offset = resource->gpu_address + (atomic->start * 4); uint32_t reg_val = (base_reg_0 + atomic->hw_idx * 4) >> 2; + if (pkt_flags == RADEON_CP_PACKET3_COMPUTE_MODE) + event = EVENT_TYPE_CS_DONE; + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOS, 3, 0) | pkt_flags); radeon_emit(cs, EVENT_TYPE(event) | EVENT_INDEX(6)); radeon_emit(cs, (dst_offset) & 0xffffffff); @@ -4631,6 +4667,9 @@ static void cayman_emit_event_write_eos(struct r600_context *rctx, RADEON_PRIO_SHADER_RW_BUFFER); uint64_t dst_offset = resource->gpu_address + (atomic->start * 4); + if (pkt_flags == RADEON_CP_PACKET3_COMPUTE_MODE) + event = EVENT_TYPE_CS_DONE; + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOS, 3, 0) | pkt_flags); radeon_emit(cs, EVENT_TYPE(event) | EVENT_INDEX(6)); radeon_emit(cs, (dst_offset) & 0xffffffff); @@ -4664,6 +4703,7 @@ static void cayman_write_count_to_gds(struct r600_context *rctx, } bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, + struct r600_pipe_shader *cs_shader, struct r600_shader_atomic *combined_atomics, uint8_t *atomic_used_mask_p) { @@ -4672,12 +4712,19 @@ bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, unsigned pkt_flags = 0; uint8_t atomic_used_mask = 0; int i, j, k; + bool is_compute = cs_shader ? true : false; - for (i = 0; i < EG_NUM_HW_STAGES; i++) { + if (is_compute) + pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE; + + for (i = 0; i < (is_compute ? 1 : EG_NUM_HW_STAGES); i++) { uint8_t num_atomic_stage; struct r600_pipe_shader *pshader; - pshader = rctx->hw_shader_stages[i].shader; + if (is_compute) + pshader = cs_shader; + else + pshader = rctx->hw_shader_stages[i].shader; if (!pshader) continue; @@ -4720,6 +4767,7 @@ bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, } void evergreen_emit_atomic_buffer_save(struct r600_context *rctx, + bool is_compute, struct r600_shader_atomic *combined_atomics, uint8_t *atomic_used_mask_p) { @@ -4731,6 +4779,9 @@ void evergreen_emit_atomic_buffer_save(struct r600_context *rctx, uint64_t dst_offset; unsigned reloc; + if (is_compute) + pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE; + mask = *atomic_used_mask_p; if (!mask) return; @@ -4747,6 +4798,9 @@ void evergreen_emit_atomic_buffer_save(struct r600_context *rctx, evergreen_emit_event_write_eos(rctx, atomic, resource, pkt_flags); } + if (pkt_flags == RADEON_CP_PACKET3_COMPUTE_MODE) + event = EVENT_TYPE_CS_DONE; + ++rctx->append_fence_id; reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, r600_resource(rctx->append_fence), diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 4218d719207..259e1a826a3 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -351,6 +351,8 @@ void r600_begin_new_cs(struct r600_context *ctx) if (ctx->b.chip_class >= EVERGREEN) { r600_mark_atom_dirty(ctx, &ctx->fragment_images.atom); r600_mark_atom_dirty(ctx, &ctx->fragment_buffers.atom); + r600_mark_atom_dirty(ctx, &ctx->compute_images.atom); + r600_mark_atom_dirty(ctx, &ctx->compute_buffers.atom); } r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[R600_HW_STAGE_PS].atom); r600_mark_atom_dirty(ctx, &ctx->poly_offset_state.atom); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index e54fada9a65..711acccc55c 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -38,7 +38,7 @@ #include "tgsi/tgsi_scan.h" -#define R600_NUM_ATOMS 54 +#define R600_NUM_ATOMS 56 #define R600_MAX_IMAGES 8 /* @@ -522,7 +522,9 @@ struct r600_context { struct r600_atomic_buffer_state atomic_buffer_state; /* only have images on fragment shader */ struct r600_image_state fragment_images; + struct r600_image_state compute_images; struct r600_image_state fragment_buffers; + struct r600_image_state compute_buffers; /* Shaders and shader resources. */ struct r600_cso_state vertex_fetch_shader; struct r600_shader_state hw_shader_stages[EG_NUM_HW_STAGES]; @@ -1023,9 +1025,11 @@ void eg_dump_debug_state(struct pipe_context *ctx, FILE *f, struct r600_shader_atomic; bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, + struct r600_pipe_shader *cs_shader, struct r600_shader_atomic *combined_atomics, uint8_t *atomic_used_mask_p); void evergreen_emit_atomic_buffer_save(struct r600_context *rctx, + bool is_compute, struct r600_shader_atomic *combined_atomics, uint8_t *atomic_used_mask_p); diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index ee3340f6e81..6b0045f2d52 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -1893,7 +1893,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info : info->mode; if (rctx->b.chip_class >= EVERGREEN) - evergreen_emit_atomic_buffer_setup(rctx, combined_atomics, &atomic_used_mask); + evergreen_emit_atomic_buffer_setup(rctx, NULL, combined_atomics, &atomic_used_mask); if (index_size) { index_offset += info->start * index_size; @@ -2177,7 +2177,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info if (rctx->b.chip_class >= EVERGREEN) - evergreen_emit_atomic_buffer_save(rctx, combined_atomics, &atomic_used_mask); + evergreen_emit_atomic_buffer_save(rctx, false, combined_atomics, &atomic_used_mask); if (rctx->trace_buf) eg_trace_emit(rctx); -- 2.30.2