i965/vs: Drop hack that created NIR for fixed function vertex programs.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vs.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "main/compiler.h"
34 #include "brw_context.h"
35 #include "brw_vs.h"
36 #include "brw_util.h"
37 #include "brw_state.h"
38 #include "program/prog_print.h"
39 #include "program/prog_parameter.h"
40 #include "brw_nir.h"
41
42 #include "util/ralloc.h"
43
44 bool
45 brw_codegen_vs_prog(struct brw_context *brw,
46 struct gl_shader_program *prog,
47 struct brw_vertex_program *vp,
48 struct brw_vs_prog_key *key)
49 {
50 GLuint program_size;
51 const GLuint *program;
52 struct brw_vs_prog_data prog_data;
53 struct brw_stage_prog_data *stage_prog_data = &prog_data.base.base;
54 void *mem_ctx;
55 int i;
56 struct brw_shader *vs = NULL;
57 bool start_busy = false;
58 double start_time = 0;
59
60 if (prog)
61 vs = (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
62
63 memset(&prog_data, 0, sizeof(prog_data));
64
65 /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
66 if (!prog)
67 stage_prog_data->use_alt_mode = true;
68
69 mem_ctx = ralloc_context(NULL);
70
71 brw_assign_common_binding_table_offsets(MESA_SHADER_VERTEX,
72 brw->intelScreen->devinfo,
73 prog, &vp->program.Base,
74 &prog_data.base.base, 0);
75
76 /* Allocate the references to the uniforms that will end up in the
77 * prog_data associated with the compiled program, and which will be freed
78 * by the state cache.
79 */
80 int param_count = vp->program.Base.nir->num_uniforms;
81 if (!brw->intelScreen->compiler->scalar_vs)
82 param_count *= 4;
83
84 if (vs)
85 prog_data.base.base.nr_image_params = vs->base.NumImages;
86
87 /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip
88 * planes as uniforms.
89 */
90 param_count += key->nr_userclip_plane_consts * 4;
91
92 stage_prog_data->param =
93 rzalloc_array(NULL, const gl_constant_value *, param_count);
94 stage_prog_data->pull_param =
95 rzalloc_array(NULL, const gl_constant_value *, param_count);
96 stage_prog_data->image_param =
97 rzalloc_array(NULL, struct brw_image_param,
98 stage_prog_data->nr_image_params);
99 stage_prog_data->nr_params = param_count;
100
101 if (prog) {
102 brw_nir_setup_glsl_uniforms(vp->program.Base.nir, prog, &vp->program.Base,
103 &prog_data.base.base,
104 brw->intelScreen->compiler->scalar_vs);
105 } else {
106 brw_nir_setup_arb_uniforms(vp->program.Base.nir, &vp->program.Base,
107 &prog_data.base.base);
108 }
109
110 GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
111 prog_data.inputs_read = vp->program.Base.InputsRead;
112
113 if (key->copy_edgeflag) {
114 outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE);
115 prog_data.inputs_read |= VERT_BIT_EDGEFLAG;
116 }
117
118 if (brw->gen < 6) {
119 /* Put dummy slots into the VUE for the SF to put the replaced
120 * point sprite coords in. We shouldn't need these dummy slots,
121 * which take up precious URB space, but it would mean that the SF
122 * doesn't get nice aligned pairs of input coords into output
123 * coords, which would be a pain to handle.
124 */
125 for (i = 0; i < 8; i++) {
126 if (key->point_coord_replace & (1 << i))
127 outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);
128 }
129
130 /* if back colors are written, allocate slots for front colors too */
131 if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0))
132 outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0);
133 if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1))
134 outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1);
135 }
136
137 /* In order for legacy clipping to work, we need to populate the clip
138 * distance varying slots whenever clipping is enabled, even if the vertex
139 * shader doesn't write to gl_ClipDistance.
140 */
141 if (key->nr_userclip_plane_consts > 0) {
142 outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
143 outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
144 }
145
146 brw_compute_vue_map(brw->intelScreen->devinfo,
147 &prog_data.base.vue_map, outputs_written,
148 prog ? prog->SeparateShader : false);
149
150 unsigned nr_attributes = _mesa_bitcount_64(prog_data.inputs_read);
151
152 /* gl_VertexID and gl_InstanceID are system values, but arrive via an
153 * incoming vertex attribute. So, add an extra slot.
154 */
155 if (vp->program.Base.SystemValuesRead &
156 (BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
157 BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) {
158 nr_attributes++;
159 }
160
161 /* The 3DSTATE_VS documentation lists the lower bound on "Vertex URB Entry
162 * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode. Empirically, in
163 * vec4 mode, the hardware appears to wedge unless we read something.
164 */
165 if (brw->intelScreen->compiler->scalar_vs)
166 prog_data.base.urb_read_length = DIV_ROUND_UP(nr_attributes, 2);
167 else
168 prog_data.base.urb_read_length = DIV_ROUND_UP(MAX2(nr_attributes, 1), 2);
169
170 prog_data.nr_attributes = nr_attributes;
171
172 /* Since vertex shaders reuse the same VUE entry for inputs and outputs
173 * (overwriting the original contents), we need to make sure the size is
174 * the larger of the two.
175 */
176 const unsigned vue_entries =
177 MAX2(nr_attributes, prog_data.base.vue_map.num_slots);
178
179 if (brw->gen == 6)
180 prog_data.base.urb_entry_size = DIV_ROUND_UP(vue_entries, 8);
181 else
182 prog_data.base.urb_entry_size = DIV_ROUND_UP(vue_entries, 4);
183
184 if (0) {
185 _mesa_fprint_program_opt(stderr, &vp->program.Base, PROG_PRINT_DEBUG,
186 true);
187 }
188
189 if (unlikely(brw->perf_debug)) {
190 start_busy = (brw->batch.last_bo &&
191 drm_intel_bo_busy(brw->batch.last_bo));
192 start_time = get_time();
193 }
194
195 if (unlikely(INTEL_DEBUG & DEBUG_VS))
196 brw_dump_ir("vertex", prog, vs ? &vs->base : NULL, &vp->program.Base);
197
198 int st_index = -1;
199 if (INTEL_DEBUG & DEBUG_SHADER_TIME)
200 st_index = brw_get_shader_time_index(brw, prog, &vp->program.Base, ST_VS);
201
202 /* Emit GEN4 code.
203 */
204 program = brw_vs_emit(brw, mem_ctx, key, &prog_data,
205 &vp->program, prog, st_index, &program_size);
206 if (program == NULL) {
207 ralloc_free(mem_ctx);
208 return false;
209 }
210
211 if (unlikely(brw->perf_debug) && vs) {
212 if (vs->compiled_once) {
213 brw_vs_debug_recompile(brw, prog, key);
214 }
215 if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
216 perf_debug("VS compile took %.03f ms and stalled the GPU\n",
217 (get_time() - start_time) * 1000);
218 }
219 vs->compiled_once = true;
220 }
221
222 /* Scratch space is used for register spilling */
223 if (prog_data.base.base.total_scratch) {
224 brw_get_scratch_bo(brw, &brw->vs.base.scratch_bo,
225 prog_data.base.base.total_scratch *
226 brw->max_vs_threads);
227 }
228
229 brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG,
230 key, sizeof(struct brw_vs_prog_key),
231 program, program_size,
232 &prog_data, sizeof(prog_data),
233 &brw->vs.base.prog_offset, &brw->vs.prog_data);
234 ralloc_free(mem_ctx);
235
236 return true;
237 }
238
239 static bool
240 key_debug(struct brw_context *brw, const char *name, int a, int b)
241 {
242 if (a != b) {
243 perf_debug(" %s %d->%d\n", name, a, b);
244 return true;
245 }
246 return false;
247 }
248
249 void
250 brw_vs_debug_recompile(struct brw_context *brw,
251 struct gl_shader_program *prog,
252 const struct brw_vs_prog_key *key)
253 {
254 struct brw_cache_item *c = NULL;
255 const struct brw_vs_prog_key *old_key = NULL;
256 bool found = false;
257
258 perf_debug("Recompiling vertex shader for program %d\n", prog->Name);
259
260 for (unsigned int i = 0; i < brw->cache.size; i++) {
261 for (c = brw->cache.items[i]; c; c = c->next) {
262 if (c->cache_id == BRW_CACHE_VS_PROG) {
263 old_key = c->key;
264
265 if (old_key->program_string_id == key->program_string_id)
266 break;
267 }
268 }
269 if (c)
270 break;
271 }
272
273 if (!c) {
274 perf_debug(" Didn't find previous compile in the shader cache for "
275 "debug\n");
276 return;
277 }
278
279 for (unsigned int i = 0; i < VERT_ATTRIB_MAX; i++) {
280 found |= key_debug(brw, "Vertex attrib w/a flags",
281 old_key->gl_attrib_wa_flags[i],
282 key->gl_attrib_wa_flags[i]);
283 }
284
285 found |= key_debug(brw, "legacy user clipping",
286 old_key->nr_userclip_plane_consts,
287 key->nr_userclip_plane_consts);
288
289 found |= key_debug(brw, "copy edgeflag",
290 old_key->copy_edgeflag, key->copy_edgeflag);
291 found |= key_debug(brw, "PointCoord replace",
292 old_key->point_coord_replace, key->point_coord_replace);
293 found |= key_debug(brw, "vertex color clamping",
294 old_key->clamp_vertex_color, key->clamp_vertex_color);
295
296 found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);
297
298 if (!found) {
299 perf_debug(" Something else\n");
300 }
301 }
302
303 static bool
304 brw_vs_state_dirty(struct brw_context *brw)
305 {
306 return brw_state_dirty(brw,
307 _NEW_BUFFERS |
308 _NEW_LIGHT |
309 _NEW_POINT |
310 _NEW_POLYGON |
311 _NEW_TEXTURE |
312 _NEW_TRANSFORM,
313 BRW_NEW_VERTEX_PROGRAM |
314 BRW_NEW_VS_ATTRIB_WORKAROUNDS);
315 }
316
317 static void
318 brw_vs_populate_key(struct brw_context *brw,
319 struct brw_vs_prog_key *key)
320 {
321 struct gl_context *ctx = &brw->ctx;
322 /* BRW_NEW_VERTEX_PROGRAM */
323 struct brw_vertex_program *vp =
324 (struct brw_vertex_program *)brw->vertex_program;
325 struct gl_program *prog = (struct gl_program *) brw->vertex_program;
326 int i;
327
328 memset(key, 0, sizeof(*key));
329
330 /* Just upload the program verbatim for now. Always send it all
331 * the inputs it asks for, whether they are varying or not.
332 */
333 key->program_string_id = vp->id;
334
335 if (ctx->Transform.ClipPlanesEnabled != 0 &&
336 ctx->API == API_OPENGL_COMPAT &&
337 !vp->program.Base.UsesClipDistanceOut) {
338 key->nr_userclip_plane_consts =
339 _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
340 }
341
342 /* _NEW_POLYGON */
343 if (brw->gen < 6) {
344 key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
345 ctx->Polygon.BackMode != GL_FILL);
346 }
347
348 if (prog->OutputsWritten & (VARYING_BIT_COL0 | VARYING_BIT_COL1 |
349 VARYING_BIT_BFC0 | VARYING_BIT_BFC1)) {
350 /* _NEW_LIGHT | _NEW_BUFFERS */
351 key->clamp_vertex_color = ctx->Light._ClampVertexColor;
352 }
353
354 /* _NEW_POINT */
355 if (brw->gen < 6 && ctx->Point.PointSprite) {
356 for (i = 0; i < 8; i++) {
357 if (ctx->Point.CoordReplace[i])
358 key->point_coord_replace |= (1 << i);
359 }
360 }
361
362 /* _NEW_TEXTURE */
363 brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count,
364 &key->tex);
365
366 /* BRW_NEW_VS_ATTRIB_WORKAROUNDS */
367 memcpy(key->gl_attrib_wa_flags, brw->vb.attrib_wa_flags,
368 sizeof(brw->vb.attrib_wa_flags));
369 }
370
371 void
372 brw_upload_vs_prog(struct brw_context *brw)
373 {
374 struct gl_context *ctx = &brw->ctx;
375 struct gl_shader_program **current = ctx->_Shader->CurrentProgram;
376 struct brw_vs_prog_key key;
377 /* BRW_NEW_VERTEX_PROGRAM */
378 struct brw_vertex_program *vp =
379 (struct brw_vertex_program *)brw->vertex_program;
380
381 if (!brw_vs_state_dirty(brw))
382 return;
383
384 brw_vs_populate_key(brw, &key);
385
386 if (!brw_search_cache(&brw->cache, BRW_CACHE_VS_PROG,
387 &key, sizeof(key),
388 &brw->vs.base.prog_offset, &brw->vs.prog_data)) {
389 bool success = brw_codegen_vs_prog(brw, current[MESA_SHADER_VERTEX],
390 vp, &key);
391 (void) success;
392 assert(success);
393 }
394 brw->vs.base.prog_data = &brw->vs.prog_data->base.base;
395 }
396
397 bool
398 brw_vs_precompile(struct gl_context *ctx,
399 struct gl_shader_program *shader_prog,
400 struct gl_program *prog)
401 {
402 struct brw_context *brw = brw_context(ctx);
403 struct brw_vs_prog_key key;
404 uint32_t old_prog_offset = brw->vs.base.prog_offset;
405 struct brw_vs_prog_data *old_prog_data = brw->vs.prog_data;
406 bool success;
407
408 struct gl_vertex_program *vp = (struct gl_vertex_program *) prog;
409 struct brw_vertex_program *bvp = brw_vertex_program(vp);
410
411 memset(&key, 0, sizeof(key));
412
413 brw_setup_tex_for_precompile(brw, &key.tex, prog);
414 key.program_string_id = bvp->id;
415 key.clamp_vertex_color =
416 (prog->OutputsWritten & (VARYING_BIT_COL0 | VARYING_BIT_COL1 |
417 VARYING_BIT_BFC0 | VARYING_BIT_BFC1));
418
419 success = brw_codegen_vs_prog(brw, shader_prog, bvp, &key);
420
421 brw->vs.base.prog_offset = old_prog_offset;
422 brw->vs.prog_data = old_prog_data;
423
424 return success;
425 }