From 78068a5fbfc21fb52b289a81142b4211628f845c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 7 Dec 2010 06:24:06 +0100 Subject: [PATCH] r300g: cache packet dwords of 3D_LOAD_VBPNTR in a command buffer if possible It's not always possible to preprocess the content of 3D_LOAD_VBPNTR in a command buffer, because the offset to all vertex buffers (which the packet depends on) is derived from the "start" parameter of draw_arrays and the "indexBias" parameter of draw_elements, but we can at least lazily make a command buffer for the case when offset == 0, which should occur most of the time. --- src/gallium/drivers/r300/r300_context.h | 4 ++ src/gallium/drivers/r300/r300_emit.c | 73 ++++++++++++++++++++----- src/gallium/drivers/r300/r300_state.c | 3 +- 3 files changed, 65 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index ba5e9bca406..8d50ea3a30a 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -610,6 +610,10 @@ struct r300_context { /* const tracking for VS */ int vs_const_base; + + /* AOS (PACKET3_3D_LOAD_VBPNTR) command buffer for the case offset=0. */ + uint32_t aos_cb[(16 * 3 + 1) / 2]; + boolean aos_dirty; }; #define foreach_atom(r300, atom) \ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 45814f74935..82391e11a9b 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -29,6 +29,7 @@ #include "util/u_simple_list.h" #include "r300_context.h" +#include "r300_cb.h" #include "r300_cs.h" #include "r300_emit.h" #include "r300_fs.h" @@ -806,39 +807,83 @@ void r300_emit_textures_state(struct r300_context *r300, END_CS; } -void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed) +static void r300_update_aos_cb(struct r300_context *r300, unsigned packet_size) { struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer; struct pipe_vertex_element *velem = r300->velems->velem; - struct r300_buffer *buf; - int i; unsigned *hw_format_size = r300->velems->hw_format_size; unsigned size1, size2, aos_count = r300->velems->count; - unsigned packet_size = (aos_count * 3 + 1) / 2; - CS_LOCALS(r300); - - BEGIN_CS(2 + packet_size + aos_count * 2); - OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); - OUT_CS(aos_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); + int i; + CB_LOCALS; + BEGIN_CB(r300->aos_cb, packet_size); for (i = 0; i < aos_count - 1; i += 2) { vb1 = &vbuf[velem[i].vertex_buffer_index]; vb2 = &vbuf[velem[i+1].vertex_buffer_index]; size1 = hw_format_size[i]; size2 = hw_format_size[i+1]; - OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) | + OUT_CB(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) | R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride)); - OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); - OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride); + OUT_CB(vb1->buffer_offset + velem[i].src_offset); + OUT_CB(vb2->buffer_offset + velem[i+1].src_offset); } if (aos_count & 1) { vb1 = &vbuf[velem[i].vertex_buffer_index]; size1 = hw_format_size[i]; - OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); - OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); + OUT_CB(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); + OUT_CB(vb1->buffer_offset + velem[i].src_offset); + } + END_CB; + + r300->aos_dirty = FALSE; +} + +void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed) +{ + struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; + struct pipe_vertex_element *velem = r300->velems->velem; + struct r300_buffer *buf; + int i; + unsigned aos_count = r300->velems->count; + unsigned packet_size = (aos_count * 3 + 1) / 2; + CS_LOCALS(r300); + + BEGIN_CS(2 + packet_size + aos_count * 2); + OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); + OUT_CS(aos_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); + + if (!offset) { + if (r300->aos_dirty) { + r300_update_aos_cb(r300, packet_size); + } + OUT_CS_TABLE(r300->aos_cb, packet_size); + } else { + struct pipe_vertex_buffer *vb1, *vb2; + unsigned *hw_format_size = r300->velems->hw_format_size; + unsigned size1, size2; + + for (i = 0; i < aos_count - 1; i += 2) { + vb1 = &vbuf[velem[i].vertex_buffer_index]; + vb2 = &vbuf[velem[i+1].vertex_buffer_index]; + size1 = hw_format_size[i]; + size2 = hw_format_size[i+1]; + + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) | + R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride)); + OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); + OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride); + } + + if (aos_count & 1) { + vb1 = &vbuf[velem[i].vertex_buffer_index]; + size1 = hw_format_size[i]; + + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); + OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); + } } for (i = 0; i < aos_count; i++) { diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 70df484199a..c4945fb2fd9 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1509,7 +1509,7 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, r300->any_user_vbs = any_user_buffer; r300->vertex_buffer_max_index = max_index; - + r300->aos_dirty = TRUE; } else { /* SW TCL. */ draw_set_vertex_buffers(r300->draw, count, buffers); @@ -1716,6 +1716,7 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe, UPDATE_STATE(&velems->vertex_stream, r300->vertex_stream_state); r300->vertex_stream_state.size = (1 + velems->vertex_stream.count) * 2; + r300->aos_dirty = TRUE; } static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *state) -- 2.30.2