X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fgen6_vs_state.c;h=6653a6d759b431d4c8dfb1d81b808e006932bf52;hb=63c313de84afa9ee65f5d98e1c843ace3a5c9f21;hp=d293482ed8e2da200851d46e7256f2d88449dbf9;hpb=ab973403e445cd8211dba4e87e057a9dc1b1af9d;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index d293482ed8e..6653a6d759b 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -32,62 +32,127 @@ #include "program/prog_parameter.h" #include "program/prog_statevars.h" #include "intel_batchbuffer.h" +#include "glsl/glsl_parser_extras.h" -static void -gen6_upload_vs_push_constants(struct brw_context *brw) +/** + * Creates a streamed BO containing the push constants for the VS or GS on + * gen6+. + * + * Push constants are constant values (such as GLSL uniforms) that are + * pre-loaded into a shader stage's register space at thread spawn time. + * + * Not all GLSL uniforms will be uploaded as push constants: The hardware has + * a limitation of 32 or 64 EU registers (256 or 512 floats) per stage to be + * uploaded as push constants, while GL 4.4 requires at least 1024 components + * to be usable for the VS. Plus, currently we always use pull constants + * instead of push constants when doing variable-index array access. + * + * See brw_curbe.c for the equivalent gen4/5 code. + */ +void +gen6_upload_push_constants(struct brw_context *brw, + const struct gl_program *prog, + const struct brw_stage_prog_data *prog_data, + struct brw_stage_state *stage_state, + enum aub_state_struct_type type) { - struct intel_context *intel = &brw->intel; - struct gl_context *ctx = &intel->ctx; - /* _BRW_NEW_VERTEX_PROGRAM */ - const struct brw_vertex_program *vp = - brw_vertex_program_const(brw->vertex_program); - unsigned int nr_params = brw->vs.prog_data->nr_params / 4; + struct gl_context *ctx = &brw->ctx; - /* Updates the ParamaterValues[i] pointers for all parameters of the - * basic type of PROGRAM_STATE_VAR. - */ - /* XXX: Should this happen somewhere before to get our state flag set? */ - _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); - - /* CACHE_NEW_VS_PROG | _NEW_TRANSFORM */ - if (brw->vs.prog_data->nr_params == 0 && !ctx->Transform.ClipPlanesEnabled) { - brw->vs.push_const_size = 0; + if (prog_data->nr_params == 0) { + stage_state->push_const_size = 0; } else { - int params_uploaded = 0; - float *param; - int i; + /* Updates the ParamaterValues[i] pointers for all parameters of the + * basic type of PROGRAM_STATE_VAR. + */ + /* XXX: Should this happen somewhere before to get our state flag set? */ + _mesa_load_state_parameters(ctx, prog->Parameters); + + gl_constant_value *param; + unsigned i; - param = brw_state_batch(brw, AUB_TRACE_VS_CONSTANTS, - (MAX_CLIP_PLANES + nr_params) * - 4 * sizeof(float), - 32, &brw->vs.push_const_offset); + param = brw_state_batch(brw, type, + prog_data->nr_params * sizeof(gl_constant_value), + 32, &stage_state->push_const_offset); - for (i = 0; i < brw->vs.prog_data->nr_params; i++) { - param[i] = *brw->vs.prog_data->param[i]; + STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); + + /* _NEW_PROGRAM_CONSTANTS + * + * Also _NEW_TRANSFORM -- we may reference clip planes other than as a + * side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS + * wouldn't be set for them. + */ + for (i = 0; i < prog_data->nr_params; i++) { + param[i] = *prog_data->param[i]; } - params_uploaded += brw->vs.prog_data->nr_params / 4; if (0) { - printf("VS constant buffer:\n"); - for (i = 0; i < params_uploaded; i++) { - float *buf = param + i * 4; - printf("%d: %f %f %f %f\n", - i, buf[0], buf[1], buf[2], buf[3]); + fprintf(stderr, "%s constants:\n", + _mesa_shader_stage_to_string(stage_state->stage)); + for (i = 0; i < prog_data->nr_params; i++) { + if ((i & 7) == 0) + fprintf(stderr, "g%d: ", + prog_data->dispatch_grf_start_reg + i / 8); + fprintf(stderr, "%8f ", param[i].f); + if ((i & 7) == 7) + fprintf(stderr, "\n"); } + if ((i & 7) != 0) + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); } - brw->vs.push_const_size = (params_uploaded + 1) / 2; + stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8; /* We can only push 32 registers of constants at a time. */ - assert(brw->vs.push_const_size <= 32); + + /* From the SNB PRM (vol2, part 1, section 3.2.1.4: 3DSTATE_CONSTANT_VS: + * + * "The sum of all four read length fields (each incremented to + * represent the actual read length) must be less than or equal to + * 32" + * + * From the IVB PRM (vol2, part 1, section 3.2.1.3: 3DSTATE_CONSTANT_VS: + * + * "The sum of all four read length fields must be less than or + * equal to the size of 64" + * + * The other shader stages all match the VS's limits. + */ + assert(stage_state->push_const_size <= 32); + } +} + +static void +gen6_upload_vs_push_constants(struct brw_context *brw) +{ + struct brw_stage_state *stage_state = &brw->vs.base; + + /* _BRW_NEW_VERTEX_PROGRAM */ + const struct brw_vertex_program *vp = + brw_vertex_program_const(brw->vertex_program); + /* BRW_NEW_VS_PROG_DATA */ + const struct brw_stage_prog_data *prog_data = &brw->vs.prog_data->base.base; + + gen6_upload_push_constants(brw, &vp->program.Base, prog_data, + stage_state, AUB_TRACE_VS_CONSTANTS); + + if (brw->gen >= 7) { + if (brw->gen == 7 && !brw->is_haswell && !brw->is_baytrail) + gen7_emit_vs_workaround_flush(brw); + + gen7_upload_constant_state(brw, stage_state, true /* active */, + _3DSTATE_CONSTANT_VS); } } const struct brw_tracked_state gen6_vs_push_constants = { .dirty = { - .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS, - .brw = (BRW_NEW_BATCH | - BRW_NEW_VERTEX_PROGRAM), - .cache = CACHE_NEW_VS_PROG, + .mesa = _NEW_PROGRAM_CONSTANTS | + _NEW_TRANSFORM, + .brw = BRW_NEW_BATCH | + BRW_NEW_PUSH_CONSTANT_ALLOCATION | + BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_VS_PROG_DATA, }, .emit = gen6_upload_vs_push_constants, }; @@ -95,19 +160,22 @@ const struct brw_tracked_state gen6_vs_push_constants = { static void upload_vs_state(struct brw_context *brw) { - struct intel_context *intel = &brw->intel; + const struct brw_stage_state *stage_state = &brw->vs.base; uint32_t floating_point_mode = 0; - /* From the BSpec, Volume 2a, Part 3 "Vertex Shader", Section + /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State, * 3DSTATE_VS, Dword 5.0 "VS Function Enable": + * * [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS * command that causes the VS Function Enable to toggle. Pipeline * flush can be executed by sending a PIPE_CONTROL command with CS * stall bit set and a post sync operation. + * + * We've already done such a flush at the start of state upload, so we + * don't need to do another one here. */ - intel_emit_post_sync_nonzero_flush(intel); - if (brw->vs.push_const_size == 0) { + if (stage_state->push_const_size == 0) { /* Disable the push constant buffers. */ BEGIN_BATCH(5); OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2)); @@ -124,36 +192,36 @@ upload_vs_state(struct brw_context *brw) /* Pointer to the VS constant buffer. Covered by the set of * state flags from gen6_upload_vs_constants */ - OUT_BATCH(brw->vs.push_const_offset + - brw->vs.push_const_size - 1); + OUT_BATCH(stage_state->push_const_offset + + stage_state->push_const_size - 1); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); } - /* Use ALT floating point mode for ARB vertex programs, because they - * require 0^0 == 1. - */ - if (intel->ctx.Shader.CurrentVertexProgram == NULL) + if (brw->vs.prog_data->base.base.use_alt_mode) floating_point_mode = GEN6_VS_FLOATING_POINT_MODE_ALT; BEGIN_BATCH(6); OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); - OUT_BATCH(brw->vs.prog_offset); + OUT_BATCH(stage_state->prog_offset); OUT_BATCH(floating_point_mode | - ((ALIGN(brw->sampler.count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT)); + ((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT) | + ((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) << + GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - if (brw->vs.prog_data->total_scratch) { - OUT_RELOC(brw->vs.scratch_bo, + if (brw->vs.prog_data->base.base.total_scratch) { + OUT_RELOC(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(brw->vs.prog_data->total_scratch) - 11); + ffs(brw->vs.prog_data->base.base.total_scratch) - 11); } else { OUT_BATCH(0); } - OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | - (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | + OUT_BATCH((brw->vs.prog_data->base.base.dispatch_grf_start_reg << + GEN6_VS_DISPATCH_START_GRF_SHIFT) | + (brw->vs.prog_data->base.urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); OUT_BATCH(((brw->max_vs_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT) | @@ -178,25 +246,21 @@ upload_vs_state(struct brw_context *brw) * bug reports that led to this workaround, and may be more than * what is strictly required to avoid the issue. */ - intel_emit_post_sync_nonzero_flush(intel); - - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); - OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | - PIPE_CONTROL_INSTRUCTION_FLUSH | - PIPE_CONTROL_STATE_CACHE_INVALIDATE); - OUT_BATCH(0); /* address */ - OUT_BATCH(0); /* write data */ - ADVANCE_BATCH(); + brw_emit_pipe_control_flush(brw, + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_INSTRUCTION_INVALIDATE | + PIPE_CONTROL_STATE_CACHE_INVALIDATE); } const struct brw_tracked_state gen6_vs_state = { .dirty = { - .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS, - .brw = (BRW_NEW_CONTEXT | - BRW_NEW_VERTEX_PROGRAM | - BRW_NEW_BATCH), - .cache = CACHE_NEW_VS_PROG | CACHE_NEW_SAMPLER + .mesa = _NEW_PROGRAM_CONSTANTS | + _NEW_TRANSFORM, + .brw = BRW_NEW_BATCH | + BRW_NEW_CONTEXT | + BRW_NEW_PUSH_CONSTANT_ALLOCATION | + BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_VS_PROG_DATA, }, .emit = upload_vs_state, };