From 6b6145204dd4a1112f6e1fe10162636141495b79 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 10 Sep 2014 15:41:40 -0700 Subject: [PATCH] i965: Separate gl_InstanceID and gl_VertexID uploading. We always uploaded them together, mostly out of laziness - both required an additional vertex element. However, gl_VertexID now also requires an additional vertex buffer for storing gl_BaseVertex; for non-indirect draws this also means uploading (a small amount of) data. This is extra overhead we don't need if the shader only uses gl_InstanceID. In particular, our clear shaders currently use gl_InstanceID for doing layered clears, but don't need gl_VertexID. Signed-off-by: Kenneth Graunke Cc: "10.3" Reviewed-by: Ian Romanick Tested-by: Ian Romanick --- src/mesa/drivers/dri/i965/brw_context.h | 1 + src/mesa/drivers/dri/i965/brw_draw_upload.c | 29 ++++++++++++++----- src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 +- .../drivers/dri/i965/brw_vec4_vs_visitor.cpp | 4 ++- src/mesa/drivers/dri/i965/gen8_draw_upload.c | 22 +++++++++----- 5 files changed, 42 insertions(+), 16 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 39cb8566b63..5830aa993d5 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -553,6 +553,7 @@ struct brw_vs_prog_data { GLbitfield64 inputs_read; bool uses_vertexid; + bool uses_instanceid; }; diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index d59ca8bf3e8..2162624cede 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -671,14 +671,16 @@ emit_vertex_buffer_state(struct brw_context *brw, static void brw_emit_vertices(struct brw_context *brw) { - GLuint i, nr_elements; + GLuint i; brw_prepare_vertices(brw); brw_prepare_shader_draw_parameters(brw); brw_emit_query_begin(brw); - nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid; + unsigned nr_elements = brw->vb.nr_enabled; + if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) + ++nr_elements; /* If the VS doesn't read any inputs (calculating vertex position from * a state variable for some reason, for example), emit a single pad @@ -824,13 +826,26 @@ static void brw_emit_vertices(struct brw_context *brw) (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); } - if (brw->vs.prog_data->uses_vertexid) { + if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) { uint32_t dw0 = 0, dw1 = 0; + uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0; + uint32_t comp1 = BRW_VE1_COMPONENT_STORE_0; + uint32_t comp2 = BRW_VE1_COMPONENT_STORE_0; + uint32_t comp3 = BRW_VE1_COMPONENT_STORE_0; - dw1 = (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_VID << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_IID << BRW_VE1_COMPONENT_3_SHIFT); + if (brw->vs.prog_data->uses_vertexid) { + comp0 = BRW_VE1_COMPONENT_STORE_SRC; + comp2 = BRW_VE1_COMPONENT_STORE_VID; + } + + if (brw->vs.prog_data->uses_instanceid) { + comp3 = BRW_VE1_COMPONENT_STORE_IID; + } + + dw1 = (comp0 << BRW_VE1_COMPONENT_0_SHIFT) | + (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | + (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | + (comp3 << BRW_VE1_COMPONENT_3_SHIFT); if (brw->gen >= 6) { dw0 |= GEN6_VE0_VALID | diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index cda097f10c3..0f13c0d19f0 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1528,7 +1528,7 @@ vec4_vs_visitor::setup_attributes(int payload_reg) * don't represent it with a flag in inputs_read, so we call it * VERT_ATTRIB_MAX. */ - if (vs_prog_data->uses_vertexid) { + if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) { attribute_map[VERT_ATTRIB_MAX] = payload_reg + nr_attributes; nr_attributes++; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp index 667ed681b51..72b6ef03b42 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp @@ -151,18 +151,20 @@ vec4_vs_visitor::make_reg_for_system_value(ir_variable *ir) * it VERT_ATTRIB_MAX, which setup_attributes() picks up on. */ dst_reg *reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX); - vs_prog_data->uses_vertexid = true; switch (ir->data.location) { case SYSTEM_VALUE_BASE_VERTEX: reg->writemask = WRITEMASK_X; + vs_prog_data->uses_vertexid = true; break; case SYSTEM_VALUE_VERTEX_ID: case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: reg->writemask = WRITEMASK_Z; + vs_prog_data->uses_vertexid = true; break; case SYSTEM_VALUE_INSTANCE_ID: reg->writemask = WRITEMASK_W; + vs_prog_data->uses_instanceid = true; break; default: unreachable("not reached"); diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c b/src/mesa/drivers/dri/i965/gen8_draw_upload.c index 7e4c1eb3b63..8f0e5155556 100644 --- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c +++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c @@ -43,7 +43,7 @@ gen8_emit_vertices(struct brw_context *brw) brw_prepare_vertices(brw); brw_prepare_shader_draw_parameters(brw); - if (brw->vs.prog_data->uses_vertexid) { + if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) { unsigned vue = brw->vb.nr_enabled; WARN_ONCE(brw->vs.prog_data->inputs_read & VERT_BIT_EDGEFLAG, @@ -53,14 +53,22 @@ gen8_emit_vertices(struct brw_context *brw) "Trying to insert VID/IID past 33rd vertex element, " "need to reorder the vertex attrbutes."); + unsigned dw1 = 0; + if (brw->vs.prog_data->uses_vertexid) { + dw1 |= GEN8_SGVS_ENABLE_VERTEX_ID | + (2 << GEN8_SGVS_VERTEX_ID_COMPONENT_SHIFT) | /* .z channel */ + (vue << GEN8_SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT); + } + + if (brw->vs.prog_data->uses_instanceid) { + dw1 |= GEN8_SGVS_ENABLE_INSTANCE_ID | + (3 << GEN8_SGVS_INSTANCE_ID_COMPONENT_SHIFT) | /* .w channel */ + (vue << GEN8_SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT); + } + BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2)); - OUT_BATCH(GEN8_SGVS_ENABLE_VERTEX_ID | - (2 << GEN8_SGVS_VERTEX_ID_COMPONENT_SHIFT) | /* .z channel */ - (vue << GEN8_SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT) | - GEN8_SGVS_ENABLE_INSTANCE_ID | - (3 << GEN8_SGVS_INSTANCE_ID_COMPONENT_SHIFT) | /* .w channel */ - (vue << GEN8_SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT)); + OUT_BATCH(dw1); ADVANCE_BATCH(); BEGIN_BATCH(3); -- 2.30.2