X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fgen6_vs_state.c;h=f1123af6c0b07058e6814b0eb2b01f9270f32957;hb=483f5b348b0f3c0ca7082fd2047c354e8af285e7;hp=0299dc6768c12f0d3d2073ec84360fe557ec9cd9;hpb=70be48dff6bb68c61285641e4d976bfd53e0f00c;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index 0299dc6768c..f1123af6c0b 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -29,93 +29,189 @@ #include "brw_state.h" #include "brw_defines.h" #include "brw_util.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/prog_parameter.h" -#include "shader/prog_statevars.h" +#include "program/prog_parameter.h" +#include "program/prog_statevars.h" #include "intel_batchbuffer.h" static void -upload_vs_state(struct brw_context *brw) +gen6_prepare_vs_push_constants(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - GLcontext *ctx = &intel->ctx; + struct gl_context *ctx = &intel->ctx; + /* _BRW_NEW_VERTEX_PROGRAM */ const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); - unsigned int nr_params = vp->program.Base.Parameters->NumParameters; - drm_intel_bo *constant_bo; - int i; + unsigned int nr_params = brw->vs.prog_data->nr_params / 4; - BEGIN_BATCH(6); - OUT_BATCH(CMD_3D_VS_STATE << 16 | (6 - 2)); - OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | - (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - OUT_BATCH(0); /* scratch space base offset */ - OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | - (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | - (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); - OUT_BATCH((0 << GEN6_VS_MAX_THREADS_SHIFT) | - GEN6_VS_STATISTICS_ENABLE| - GEN6_VS_ENABLE); - ADVANCE_BATCH(); + if (brw->vertex_program->IsNVProgram) + _mesa_load_tracked_matrices(ctx); - intel_batchbuffer_emit_mi_flush(intel->batch); + /* Updates the ParamaterValues[i] pointers for all parameters of the + * basic type of PROGRAM_STATE_VAR. + */ + /* XXX: Should this happen somewhere before to get our state flag set? */ + _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); + + /* CACHE_NEW_VS_PROG | _NEW_TRANSFORM */ + if (brw->vs.prog_data->nr_params == 0 && !ctx->Transform.ClipPlanesEnabled) { + brw->vs.push_const_size = 0; + } else { + int params_uploaded = 0; + float *param; + int i; - if (vp->use_const_buffer || nr_params == 0) { + param = brw_state_batch(brw, AUB_TRACE_VS_CONSTANTS, + (MAX_CLIP_PLANES + nr_params) * + 4 * sizeof(float), + 32, &brw->vs.push_const_offset); + + /* This should be loaded like any other param, but it's ad-hoc + * until we redo the VS backend. + */ + for (i = 0; i < MAX_CLIP_PLANES; i++) { + if (ctx->Transform.ClipPlanesEnabled & (1 << i)) { + memcpy(param, ctx->Transform._ClipUserPlane[i], 4 * sizeof(float)); + param += 4; + params_uploaded++; + } + } + /* Align to a reg for convenience for brw_vs_emit.c */ + if (params_uploaded & 1) { + param += 4; + params_uploaded++; + } + + if (brw->vs.prog_data->uses_new_param_layout) { + for (i = 0; i < brw->vs.prog_data->nr_params; i++) { + *param = *brw->vs.prog_data->param[i]; + param++; + } + params_uploaded += brw->vs.prog_data->nr_params / 4; + } else { + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + memcpy(param + brw->vs.constant_map[i] * 4, + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + params_uploaded++; + } + } + } + + if (0) { + printf("VS constant buffer:\n"); + for (i = 0; i < params_uploaded; i++) { + float *buf = param + i * 4; + printf("%d: %f %f %f %f\n", + i, buf[0], buf[1], buf[2], buf[3]); + } + } + + brw->vs.push_const_size = (params_uploaded + 1) / 2; + /* We can only push 32 registers of constants at a time. */ + assert(brw->vs.push_const_size <= 32); + } +} + +const struct brw_tracked_state gen6_vs_constants = { + .dirty = { + .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS, + .brw = (BRW_NEW_BATCH | + BRW_NEW_VERTEX_PROGRAM), + .cache = CACHE_NEW_VS_PROG, + }, + .prepare = gen6_prepare_vs_push_constants, +}; + +static void +upload_vs_state(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + if (brw->vs.push_const_size == 0) { /* Disable the push constant buffers. */ BEGIN_BATCH(5); - OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | (5 - 2)); + OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2)); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); } else { - if (brw->vertex_program->IsNVProgram) - _mesa_load_tracked_matrices(ctx); - - /* Updates the ParamaterValues[i] pointers for all parameters of the - * basic type of PROGRAM_STATE_VAR. - */ - _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); - - constant_bo = drm_intel_bo_alloc(intel->bufmgr, "VS constant_bo", - nr_params * 4 * sizeof(float), - 4096); - intel_bo_map_gtt_preferred(intel, constant_bo, GL_TRUE); - for (i = 0; i < nr_params; i++) { - memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float), - vp->program.Base.Parameters->ParameterValues[i], - 4 * sizeof(float)); - } - intel_bo_unmap_gtt_preferred(intel, constant_bo); - BEGIN_BATCH(5); - OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | + OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | GEN6_CONSTANT_BUFFER_0_ENABLE | (5 - 2)); - OUT_RELOC(constant_bo, - I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ - ALIGN(nr_params, 2) / 2 - 1); + /* Pointer to the VS constant buffer. Covered by the set of + * state flags from gen6_prepare_wm_constants + */ + OUT_BATCH(brw->vs.push_const_offset + + brw->vs.push_const_size - 1); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); + } + + BEGIN_BATCH(6); + OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); + OUT_BATCH(brw->vs.prog_offset); + OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | + GEN6_VS_FLOATING_POINT_MODE_ALT | + (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - drm_intel_bo_unreference(constant_bo); + if (brw->vs.prog_data->total_scratch) { + OUT_RELOC(brw->vs.scratch_bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + ffs(brw->vs.prog_data->total_scratch) - 11); + } else { + OUT_BATCH(0); } - intel_batchbuffer_emit_mi_flush(intel->batch); + OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | + (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | + (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); + + OUT_BATCH(((brw->vs_max_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT) | + GEN6_VS_STATISTICS_ENABLE | + GEN6_VS_ENABLE); + ADVANCE_BATCH(); + + /* Based on my reading of the simulator, the VS constants don't get + * pulled into the VS FF unit until an appropriate pipeline flush + * happens, and instead the 3DSTATE_CONSTANT_VS packet just adds + * references to them into a little FIFO. The flushes are common, + * but don't reliably happen between this and a 3DPRIMITIVE, causing + * the primitive to use the wrong constants. Then the FIFO + * containing the constant setup gets added to again on the next + * constants change, and eventually when a flush does happen the + * unit is overwhelmed by constant changes and dies. + * + * To avoid this, send a PIPE_CONTROL down the line that will + * update the unit immediately loading the constants. The flush + * type bits here were those set by the STATE_BASE_ADDRESS whose + * move in a82a43e8d99e1715dd11c9c091b5ab734079b6a6 triggered the + * bug reports that led to this workaround, and may be more than + * what is strictly required to avoid the issue. + */ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL); + OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_INSTRUCTION_FLUSH | + PIPE_CONTROL_STATE_CACHE_INVALIDATE); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); } const struct brw_tracked_state gen6_vs_state = { .dirty = { .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS, - .brw = (BRW_NEW_CURBE_OFFSETS | - BRW_NEW_NR_VS_SURFACES | + .brw = (BRW_NEW_NR_VS_SURFACES | BRW_NEW_URB_FENCE | - BRW_NEW_CONTEXT), + BRW_NEW_CONTEXT | + BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_BATCH), .cache = CACHE_NEW_VS_PROG }, .emit = upload_vs_state,