This reduces a little of CPU overhead.
The idea is to translate pipe vertex buffers directly into the CS
and not using any intermediate representations.
Framerate in Torcs:
before: 32.2
after: 34.6
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
ctx->num_ps_resources = 176;
ctx->num_vs_resources = 160;
- ctx->num_fs_resources = 16;
r = evergreen_resource_range_init(ctx, &ctx->ps_resources, 0, 176, 0x20);
if (r)
goto out_err;
r = evergreen_resource_range_init(ctx, &ctx->vs_resources, 0x1600, 160, 0x20);
- if (r)
- goto out_err;
- r = evergreen_resource_range_init(ctx, &ctx->fs_resources, 0x7C00, 16, 0x20);
if (r)
goto out_err;
r600_write_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override);
}
+static void evergreen_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom)
+{
+ struct radeon_winsys_cs *cs = rctx->cs;
+ struct pipe_vertex_buffer *vb = rctx->vbuf_mgr->real_vertex_buffer;
+ unsigned count = rctx->vbuf_mgr->nr_real_vertex_buffers;
+ unsigned i;
+ uint64_t va;
+
+ for (i = 0; i < count; i++) {
+ struct r600_resource *rbuffer = (struct r600_resource*)vb[i].buffer;
+
+ if (!rbuffer) {
+ continue;
+ }
+
+ va = r600_resource_va(&rctx->screen->screen, &rbuffer->b.b.b);
+ va += vb[i].buffer_offset;
+
+ /* fetch resources start at index 992 */
+ r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 8, 0));
+ r600_write_value(cs, (992 + i) * 8);
+ r600_write_value(cs, va); /* RESOURCEi_WORD0 */
+ r600_write_value(cs, rbuffer->buf->size - vb[i].buffer_offset - 1); /* RESOURCEi_WORD1 */
+ r600_write_value(cs, /* RESOURCEi_WORD2 */
+ S_030008_ENDIAN_SWAP(r600_endian_swap(32)) |
+ S_030008_STRIDE(vb[i].stride) |
+ S_030008_BASE_ADDRESS_HI(va >> 32UL));
+ r600_write_value(cs, /* RESOURCEi_WORD3 */
+ S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) |
+ S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) |
+ S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) |
+ S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W));
+ r600_write_value(cs, 0); /* RESOURCEi_WORD4 */
+ r600_write_value(cs, 0); /* RESOURCEi_WORD5 */
+ r600_write_value(cs, 0); /* RESOURCEi_WORD6 */
+ r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD7 */
+
+ r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
+ r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
+ }
+}
+
void evergreen_init_state_functions(struct r600_context *rctx)
{
r600_init_atom(&rctx->db_misc_state.atom, evergreen_emit_db_misc_state, 6, 0);
r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
+ r600_init_atom(&rctx->vertex_buffer_state, evergreen_emit_vertex_buffers, 0, 0);
rctx->context.create_blend_state = evergreen_create_blend_state;
rctx->context.create_depth_stencil_alpha_state = evergreen_create_dsa_state;
void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state);
void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid);
void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid);
-void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid);
void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id);
void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id);
void r600_context_flush(struct r600_context *ctx, unsigned flags);
}
r600_free_resource_range(ctx, &ctx->ps_resources, ctx->num_ps_resources);
r600_free_resource_range(ctx, &ctx->vs_resources, ctx->num_vs_resources);
- r600_free_resource_range(ctx, &ctx->fs_resources, ctx->num_fs_resources);
free(ctx->blocks);
}
r600_add_resource_block(ctx, &ctx->ps_resources, ctx->num_ps_resources, &c);
r600_add_resource_block(ctx, &ctx->vs_resources, ctx->num_vs_resources, &c);
- r600_add_resource_block(ctx, &ctx->fs_resources, ctx->num_fs_resources, &c);
return 0;
}
ctx->num_ps_resources = 160;
ctx->num_vs_resources = 160;
- ctx->num_fs_resources = 16;
r = r600_resource_range_init(ctx, &ctx->ps_resources, 0, 160, 0x1c);
if (r)
goto out_err;
r = r600_resource_range_init(ctx, &ctx->vs_resources, 0x1180, 160, 0x1c);
- if (r)
- goto out_err;
- r = r600_resource_range_init(ctx, &ctx->fs_resources, 0x2300, 16, 0x1c);
if (r)
goto out_err;
r600_context_pipe_state_set_resource(ctx, state, block);
}
-void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid)
-{
- struct r600_block *block = ctx->fs_resources.blocks[rid];
-
- r600_context_pipe_state_set_resource(ctx, state, block);
-}
-
void r600_context_pipe_state_set_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
{
struct r600_range *range;
r600_emit_atom(ctx, &ctx->start_cs_cmd.atom);
r600_atom_dirty(ctx, &ctx->db_misc_state.atom);
+ r600_atom_dirty(ctx, &ctx->vertex_buffer_state);
if (streamout_suspended) {
ctx->streamout_start = TRUE;
struct radeon_winsys *ws;
struct r600_pipe_state *states[R600_PIPE_NSTATES];
struct r600_vertex_element *vertex_elements;
- struct r600_pipe_resource_state fs_resource[PIPE_MAX_ATTRIBS];
struct pipe_framebuffer_state framebuffer;
unsigned cb_target_mask;
unsigned cb_color_control;
struct r600_surface_sync_cmd surface_sync_cmd;
struct r600_atom r6xx_flush_and_inv_cmd;
struct r600_db_misc_state db_misc_state;
+ struct r600_atom vertex_buffer_state;
/* Below are variables from the old r600_context.
*/
boolean predicate_drawing;
struct r600_range ps_resources;
struct r600_range vs_resources;
- struct r600_range fs_resources;
- int num_ps_resources, num_vs_resources, num_fs_resources;
+ int num_ps_resources, num_vs_resources;
unsigned num_so_targets;
struct r600_so_target *so_targets[PIPE_MAX_SO_BUFFERS];
/* With rasterizer discard, there doesn't have to be a pixel shader.
* In that case, we bind this one: */
void *dummy_pixel_shader;
+
+ bool vertex_buffers_dirty;
};
static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
r600_write_value(cs, db_render_override); /* R_028D10_DB_RENDER_OVERRIDE */
}
+static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom)
+{
+ struct radeon_winsys_cs *cs = rctx->cs;
+ struct pipe_vertex_buffer *vb = rctx->vbuf_mgr->real_vertex_buffer;
+ unsigned count = rctx->vbuf_mgr->nr_real_vertex_buffers;
+ unsigned i, offset;
+
+ for (i = 0; i < count; i++) {
+ struct r600_resource *rbuffer = (struct r600_resource*)vb[i].buffer;
+
+ if (!rbuffer) {
+ continue;
+ }
+
+ offset = vb[i].buffer_offset;
+
+ /* fetch resources start at index 320 */
+ r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
+ r600_write_value(cs, (320 + i) * 7);
+ r600_write_value(cs, offset); /* RESOURCEi_WORD0 */
+ r600_write_value(cs, rbuffer->buf->size - offset - 1); /* RESOURCEi_WORD1 */
+ r600_write_value(cs, /* RESOURCEi_WORD2 */
+ S_038008_ENDIAN_SWAP(r600_endian_swap(32)) |
+ S_038008_STRIDE(vb[i].stride));
+ r600_write_value(cs, 0); /* RESOURCEi_WORD3 */
+ r600_write_value(cs, 0); /* RESOURCEi_WORD4 */
+ r600_write_value(cs, 0); /* RESOURCEi_WORD5 */
+ r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD6 */
+
+ r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
+ r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
+ }
+}
+
void r600_init_state_functions(struct r600_context *rctx)
{
r600_init_atom(&rctx->db_misc_state.atom, r600_emit_db_misc_state, 4, 0);
r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
+ r600_init_atom(&rctx->vertex_buffer_state, r600_emit_vertex_buffers, 0, 0);
rctx->context.create_blend_state = r600_create_blend_state;
rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state;
const struct pipe_vertex_buffer *buffers)
{
struct r600_context *rctx = (struct r600_context *)ctx;
- int i;
-
- /* Zero states. */
- for (i = 0; i < count; i++) {
- if (!buffers[i].buffer) {
- r600_context_pipe_state_set_fs_resource(rctx, NULL, i);
- }
- }
- for (; i < rctx->vbuf_mgr->nr_real_vertex_buffers; i++) {
- r600_context_pipe_state_set_fs_resource(rctx, NULL, i);
- }
u_vbuf_set_vertex_buffers(rctx->vbuf_mgr, count, buffers);
+ rctx->vertex_buffers_dirty = true;
}
void *r600_create_vertex_elements(struct pipe_context *ctx,
rctx->streamout_append_bitmask = append_bitmask;
}
-static void r600_vertex_buffer_update(struct r600_context *rctx)
-{
- unsigned i, count;
-
- r600_inval_vertex_cache(rctx);
-
- count = rctx->vbuf_mgr->nr_real_vertex_buffers;
-
- for (i = 0 ; i < count; i++) {
- struct r600_pipe_resource_state *rstate = &rctx->fs_resource[i];
- struct pipe_vertex_buffer *vb = &rctx->vbuf_mgr->real_vertex_buffer[i];
-
- if (!vb->buffer) {
- continue;
- }
-
- if (!rstate->id) {
- if (rctx->chip_class >= EVERGREEN) {
- evergreen_pipe_init_buffer_resource(rctx, rstate);
- } else {
- r600_pipe_init_buffer_resource(rctx, rstate);
- }
- }
-
- if (rctx->chip_class >= EVERGREEN) {
- evergreen_pipe_mod_buffer_resource(&rctx->context, rstate, (struct r600_resource*)vb->buffer, vb->buffer_offset, vb->stride, RADEON_USAGE_READ);
- } else {
- r600_pipe_mod_buffer_resource(rstate, (struct r600_resource*)vb->buffer, vb->buffer_offset, vb->stride, RADEON_USAGE_READ);
- }
- r600_context_pipe_state_set_fs_resource(rctx, rstate, i);
- }
-}
-
static int r600_shader_rebuild(struct pipe_context * ctx, struct r600_pipe_shader * shader)
{
struct r600_context *rctx = (struct r600_context *)ctx;
r600_update_derived_state(rctx);
- u_vbuf_draw_begin(rctx->vbuf_mgr, &info);
- r600_vertex_buffer_update(rctx);
+ /* Update vertex buffers. */
+ if ((u_vbuf_draw_begin(rctx->vbuf_mgr, &info) & U_VBUF_BUFFERS_UPDATED) ||
+ rctx->vertex_buffers_dirty) {
+ r600_inval_vertex_cache(rctx);
+ rctx->vertex_buffer_state.num_dw = (rctx->chip_class >= EVERGREEN ? 12 : 10) *
+ rctx->vbuf_mgr->nr_real_vertex_buffers;
+ r600_atom_dirty(rctx, &rctx->vertex_buffer_state);
+ rctx->vertex_buffers_dirty = FALSE;
+ }
if (info.indexed) {
/* Initialize the index buffer struct. */