53ac335752bbfdd00efaa5646ed4fa43884d1ce0
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vs_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "brw_context.h"
35 #include "brw_state.h"
36 #include "brw_defines.h"
37 #include "main/macros.h"
38
39 static void
40 brw_upload_vs_unit(struct brw_context *brw)
41 {
42 struct brw_stage_state *stage_state = &brw->vs.base;
43
44 struct brw_vs_unit_state *vs;
45
46 vs = brw_state_batch(brw, AUB_TRACE_VS_STATE,
47 sizeof(*vs), 32, &stage_state->state_offset);
48 memset(vs, 0, sizeof(*vs));
49
50 /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_VS_PROG */
51 vs->thread0.grf_reg_count =
52 ALIGN(brw->vs.prog_data->base.total_grf, 16) / 16 - 1;
53 vs->thread0.kernel_start_pointer =
54 brw_program_reloc(brw,
55 stage_state->state_offset +
56 offsetof(struct brw_vs_unit_state, thread0),
57 stage_state->prog_offset +
58 (vs->thread0.grf_reg_count << 1)) >> 6;
59
60 /* Use ALT floating point mode for ARB vertex programs, because they
61 * require 0^0 == 1.
62 */
63 if (brw->ctx.Shader.CurrentProgram[MESA_SHADER_VERTEX] == NULL)
64 vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
65 else
66 vs->thread1.floating_point_mode = BRW_FLOATING_POINT_IEEE_754;
67
68 /* Choosing multiple program flow means that we may get 2-vertex threads,
69 * which will have the channel mask for dwords 4-7 enabled in the thread,
70 * and those dwords will be written to the second URB handle when we
71 * brw_urb_WRITE() results.
72 */
73 /* Force single program flow on Ironlake. We cannot reliably get
74 * all applications working without it. See:
75 * https://bugs.freedesktop.org/show_bug.cgi?id=29172
76 *
77 * The most notable and reliably failing application is the Humus
78 * demo "CelShading"
79 */
80 vs->thread1.single_program_flow = (brw->gen == 5);
81
82 vs->thread1.binding_table_entry_count =
83 brw->vs.prog_data->base.base.binding_table.size_bytes / 4;
84
85 if (brw->vs.prog_data->base.base.total_scratch != 0) {
86 vs->thread2.scratch_space_base_pointer =
87 stage_state->scratch_bo->offset64 >> 10; /* reloc */
88 vs->thread2.per_thread_scratch_space =
89 ffs(brw->vs.prog_data->base.base.total_scratch) - 11;
90 } else {
91 vs->thread2.scratch_space_base_pointer = 0;
92 vs->thread2.per_thread_scratch_space = 0;
93 }
94
95 vs->thread3.urb_entry_read_length = brw->vs.prog_data->base.urb_read_length;
96 vs->thread3.const_urb_entry_read_length
97 = brw->vs.prog_data->base.base.curb_read_length;
98 vs->thread3.dispatch_grf_start_reg =
99 brw->vs.prog_data->base.base.dispatch_grf_start_reg;
100 vs->thread3.urb_entry_read_offset = 0;
101
102 /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM, BRW_NEW_VERTEX_PROGRAM */
103 vs->thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2;
104
105 /* BRW_NEW_URB_FENCE */
106 if (brw->gen == 5) {
107 switch (brw->urb.nr_vs_entries) {
108 case 8:
109 case 12:
110 case 16:
111 case 32:
112 case 64:
113 case 96:
114 case 128:
115 case 168:
116 case 192:
117 case 224:
118 case 256:
119 vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries >> 2;
120 break;
121 default:
122 unreachable("not reached");
123 }
124 } else {
125 switch (brw->urb.nr_vs_entries) {
126 case 8:
127 case 12:
128 case 16:
129 case 32:
130 break;
131 case 64:
132 assert(brw->is_g4x);
133 break;
134 default:
135 unreachable("not reached");
136 }
137 vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries;
138 }
139
140 vs->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
141
142 vs->thread4.max_threads = CLAMP(brw->urb.nr_vs_entries / 2,
143 1, brw->max_vs_threads) - 1;
144
145 if (brw->gen == 5)
146 vs->vs5.sampler_count = 0; /* hardware requirement */
147 else {
148 /* CACHE_NEW_SAMPLER */
149 vs->vs5.sampler_count = (stage_state->sampler_count + 3) / 4;
150 }
151
152
153 if (unlikely(INTEL_DEBUG & DEBUG_STATS))
154 vs->thread4.stats_enable = 1;
155
156 /* Vertex program always enabled:
157 */
158 vs->vs6.vs_enable = 1;
159
160 /* Set the sampler state pointer, and its reloc
161 */
162 if (stage_state->sampler_count) {
163 vs->vs5.sampler_state_pointer =
164 (brw->batch.bo->offset64 + stage_state->sampler_offset) >> 5;
165 drm_intel_bo_emit_reloc(brw->batch.bo,
166 stage_state->state_offset +
167 offsetof(struct brw_vs_unit_state, vs5),
168 brw->batch.bo,
169 (stage_state->sampler_offset |
170 vs->vs5.sampler_count),
171 I915_GEM_DOMAIN_INSTRUCTION, 0);
172 }
173
174 /* Emit scratch space relocation */
175 if (brw->vs.prog_data->base.base.total_scratch != 0) {
176 drm_intel_bo_emit_reloc(brw->batch.bo,
177 stage_state->state_offset +
178 offsetof(struct brw_vs_unit_state, thread2),
179 stage_state->scratch_bo,
180 vs->thread2.per_thread_scratch_space,
181 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
182 }
183
184 SET_DIRTY_BIT(cache, CACHE_NEW_VS_UNIT);
185 }
186
187 const struct brw_tracked_state brw_vs_unit = {
188 .dirty = {
189 .mesa = _NEW_TRANSFORM,
190 .brw = (BRW_NEW_BATCH |
191 BRW_NEW_PROGRAM_CACHE |
192 BRW_NEW_CURBE_OFFSETS |
193 BRW_NEW_URB_FENCE |
194 BRW_NEW_VERTEX_PROGRAM),
195 .cache = CACHE_NEW_VS_PROG | CACHE_NEW_SAMPLER
196 },
197 .emit = brw_upload_vs_unit,
198 };