static void
brw_upload_vs_unit(struct brw_context *brw)
{
- struct intel_context *intel = &brw->intel;
- struct gl_context *ctx = &intel->ctx;
struct brw_vs_unit_state *vs;
vs = brw_state_batch(brw, AUB_TRACE_VS_STATE,
memset(vs, 0, sizeof(*vs));
/* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_VS_PROG */
- vs->thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1;
+ vs->thread0.grf_reg_count =
+ ALIGN(brw->vs.prog_data->base.total_grf, 16) / 16 - 1;
vs->thread0.kernel_start_pointer =
brw_program_reloc(brw,
brw->vs.state_offset +
brw->vs.prog_offset +
(vs->thread0.grf_reg_count << 1)) >> 6;
- vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ /* Use ALT floating point mode for ARB vertex programs, because they
+ * require 0^0 == 1.
+ */
+ if (brw->ctx.Shader.CurrentVertexProgram == NULL)
+ vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ else
+ vs->thread1.floating_point_mode = BRW_FLOATING_POINT_IEEE_754;
+
/* Choosing multiple program flow means that we may get 2-vertex threads,
* which will have the channel mask for dwords 4-7 enabled in the thread,
* and those dwords will be written to the second URB handle when we
* brw_urb_WRITE() results.
*/
- /* Disable single program flow on Ironlake. We cannot reliably get
+ /* Force single program flow on Ironlake. We cannot reliably get
* all applications working without it. See:
* https://bugs.freedesktop.org/show_bug.cgi?id=29172
*
* The most notable and reliably failing application is the Humus
* demo "CelShading"
*/
- vs->thread1.single_program_flow = (intel->gen == 5);
+ vs->thread1.single_program_flow = (brw->gen == 5);
vs->thread1.binding_table_entry_count = 0;
- if (brw->vs.prog_data->total_scratch != 0) {
+ if (brw->vs.prog_data->base.total_scratch != 0) {
vs->thread2.scratch_space_base_pointer =
brw->vs.scratch_bo->offset >> 10; /* reloc */
vs->thread2.per_thread_scratch_space =
- ffs(brw->vs.prog_data->total_scratch) - 11;
+ ffs(brw->vs.prog_data->base.total_scratch) - 11;
} else {
vs->thread2.scratch_space_base_pointer = 0;
vs->thread2.per_thread_scratch_space = 0;
}
- vs->thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length;
- vs->thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length;
- vs->thread3.dispatch_grf_start_reg = 1;
+ vs->thread3.urb_entry_read_length = brw->vs.prog_data->base.urb_read_length;
+ vs->thread3.const_urb_entry_read_length
+ = brw->vs.prog_data->base.curb_read_length;
+ vs->thread3.dispatch_grf_start_reg =
+ brw->vs.prog_data->base.dispatch_grf_start_reg;
vs->thread3.urb_entry_read_offset = 0;
/* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM, BRW_NEW_VERTEX_PROGRAM */
- if (ctx->Transform.ClipPlanesEnabled && !brw->vs.prog_data->uses_new_param_layout) {
- /* Note that we read in the userclip planes as well, hence
- * clip_start:
- */
- vs->thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2;
- }
- else {
- vs->thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2;
- }
-
+ vs->thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2;
/* BRW_NEW_URB_FENCE */
- if (intel->gen == 5) {
+ if (brw->gen == 5) {
switch (brw->urb.nr_vs_entries) {
case 8:
case 12:
case 32:
break;
case 64:
- assert(intel->is_g4x);
+ assert(brw->is_g4x);
break;
default:
assert(0);
vs->thread4.max_threads = CLAMP(brw->urb.nr_vs_entries / 2,
1, brw->max_vs_threads) - 1;
- /* No samplers for ARB_vp programs:
- */
- /* It has to be set to 0 for Ironlake
- */
- vs->vs5.sampler_count = 0;
+ if (brw->gen == 5)
+ vs->vs5.sampler_count = 0; /* hardware requirement */
+ else {
+ /* CACHE_NEW_SAMPLER */
+ vs->vs5.sampler_count = (brw->vs.sampler_count + 3) / 4;
+ }
+
if (unlikely(INTEL_DEBUG & DEBUG_STATS))
vs->thread4.stats_enable = 1;
*/
vs->vs6.vs_enable = 1;
+ /* Set the sampler state pointer, and its reloc
+ */
+ if (brw->vs.sampler_count) {
+ vs->vs5.sampler_state_pointer =
+ (brw->batch.bo->offset + brw->vs.sampler_offset) >> 5;
+ drm_intel_bo_emit_reloc(brw->batch.bo,
+ brw->vs.state_offset +
+ offsetof(struct brw_vs_unit_state, vs5),
+ brw->batch.bo,
+ brw->vs.sampler_offset | vs->vs5.sampler_count,
+ I915_GEM_DOMAIN_INSTRUCTION, 0);
+ }
+
/* Emit scratch space relocation */
- if (brw->vs.prog_data->total_scratch != 0) {
- drm_intel_bo_emit_reloc(intel->batch.bo,
+ if (brw->vs.prog_data->base.total_scratch != 0) {
+ drm_intel_bo_emit_reloc(brw->batch.bo,
brw->vs.state_offset +
offsetof(struct brw_vs_unit_state, thread2),
brw->vs.scratch_bo,
BRW_NEW_CURBE_OFFSETS |
BRW_NEW_URB_FENCE |
BRW_NEW_VERTEX_PROGRAM),
- .cache = CACHE_NEW_VS_PROG
+ .cache = CACHE_NEW_VS_PROG | CACHE_NEW_SAMPLER
},
.emit = brw_upload_vs_unit,
};