7c783f66864abccf82b5988d148f4fe851947503
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vs.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "main/compiler.h"
34 #include "main/context.h"
35 #include "brw_context.h"
36 #include "brw_vs.h"
37 #include "brw_util.h"
38 #include "brw_state.h"
39 #include "program/prog_print.h"
40 #include "program/prog_parameter.h"
41 #include "brw_nir.h"
42
43 #include "util/ralloc.h"
44
45 bool
46 brw_codegen_vs_prog(struct brw_context *brw,
47 struct gl_shader_program *prog,
48 struct brw_vertex_program *vp,
49 struct brw_vs_prog_key *key)
50 {
51 const struct brw_compiler *compiler = brw->intelScreen->compiler;
52 GLuint program_size;
53 const GLuint *program;
54 struct brw_vs_prog_data prog_data;
55 struct brw_stage_prog_data *stage_prog_data = &prog_data.base.base;
56 void *mem_ctx;
57 int i;
58 struct brw_shader *vs = NULL;
59 bool start_busy = false;
60 double start_time = 0;
61
62 if (prog)
63 vs = (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
64
65 memset(&prog_data, 0, sizeof(prog_data));
66
67 /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
68 if (!prog)
69 stage_prog_data->use_alt_mode = true;
70
71 mem_ctx = ralloc_context(NULL);
72
73 brw_assign_common_binding_table_offsets(MESA_SHADER_VERTEX,
74 brw->intelScreen->devinfo,
75 prog, &vp->program.Base,
76 &prog_data.base.base, 0);
77
78 /* Allocate the references to the uniforms that will end up in the
79 * prog_data associated with the compiled program, and which will be freed
80 * by the state cache.
81 */
82 int param_count = vp->program.Base.nir->num_uniforms;
83 if (!compiler->scalar_stage[MESA_SHADER_VERTEX])
84 param_count *= 4;
85
86 if (vs)
87 prog_data.base.base.nr_image_params = vs->base.NumImages;
88
89 /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip
90 * planes as uniforms.
91 */
92 param_count += key->nr_userclip_plane_consts * 4;
93
94 stage_prog_data->param =
95 rzalloc_array(NULL, const gl_constant_value *, param_count);
96 stage_prog_data->pull_param =
97 rzalloc_array(NULL, const gl_constant_value *, param_count);
98 stage_prog_data->image_param =
99 rzalloc_array(NULL, struct brw_image_param,
100 stage_prog_data->nr_image_params);
101 stage_prog_data->nr_params = param_count;
102
103 if (prog) {
104 brw_nir_setup_glsl_uniforms(vp->program.Base.nir, prog, &vp->program.Base,
105 &prog_data.base.base,
106 compiler->scalar_stage[MESA_SHADER_VERTEX]);
107 } else {
108 brw_nir_setup_arb_uniforms(vp->program.Base.nir, &vp->program.Base,
109 &prog_data.base.base);
110 }
111
112 GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
113 prog_data.inputs_read = vp->program.Base.InputsRead;
114
115 if (key->copy_edgeflag) {
116 outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE);
117 prog_data.inputs_read |= VERT_BIT_EDGEFLAG;
118 }
119
120 if (brw->gen < 6) {
121 /* Put dummy slots into the VUE for the SF to put the replaced
122 * point sprite coords in. We shouldn't need these dummy slots,
123 * which take up precious URB space, but it would mean that the SF
124 * doesn't get nice aligned pairs of input coords into output
125 * coords, which would be a pain to handle.
126 */
127 for (i = 0; i < 8; i++) {
128 if (key->point_coord_replace & (1 << i))
129 outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);
130 }
131
132 /* if back colors are written, allocate slots for front colors too */
133 if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0))
134 outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0);
135 if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1))
136 outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1);
137 }
138
139 /* In order for legacy clipping to work, we need to populate the clip
140 * distance varying slots whenever clipping is enabled, even if the vertex
141 * shader doesn't write to gl_ClipDistance.
142 */
143 if (key->nr_userclip_plane_consts > 0) {
144 outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
145 outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
146 }
147
148 brw_compute_vue_map(brw->intelScreen->devinfo,
149 &prog_data.base.vue_map, outputs_written,
150 prog ? prog->SeparateShader : false);
151
152 if (0) {
153 _mesa_fprint_program_opt(stderr, &vp->program.Base, PROG_PRINT_DEBUG,
154 true);
155 }
156
157 if (unlikely(brw->perf_debug)) {
158 start_busy = (brw->batch.last_bo &&
159 drm_intel_bo_busy(brw->batch.last_bo));
160 start_time = get_time();
161 }
162
163 if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
164 brw_dump_ir("vertex", prog, vs ? &vs->base : NULL, &vp->program.Base);
165
166 fprintf(stderr, "VS Output ");
167 brw_print_vue_map(stderr, &prog_data.base.vue_map);
168 }
169
170 int st_index = -1;
171 if (INTEL_DEBUG & DEBUG_SHADER_TIME)
172 st_index = brw_get_shader_time_index(brw, prog, &vp->program.Base, ST_VS);
173
174 /* Emit GEN4 code.
175 */
176 char *error_str;
177 program = brw_compile_vs(compiler, brw, mem_ctx, key,
178 &prog_data, vp->program.Base.nir,
179 brw_select_clip_planes(&brw->ctx),
180 !_mesa_is_gles3(&brw->ctx),
181 st_index, &program_size, &error_str);
182 if (program == NULL) {
183 if (prog) {
184 prog->LinkStatus = false;
185 ralloc_strcat(&prog->InfoLog, error_str);
186 }
187
188 _mesa_problem(NULL, "Failed to compile vertex shader: %s\n", error_str);
189
190 ralloc_free(mem_ctx);
191 return false;
192 }
193
194 if (unlikely(brw->perf_debug) && vs) {
195 if (vs->compiled_once) {
196 brw_vs_debug_recompile(brw, prog, key);
197 }
198 if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
199 perf_debug("VS compile took %.03f ms and stalled the GPU\n",
200 (get_time() - start_time) * 1000);
201 }
202 vs->compiled_once = true;
203 }
204
205 /* Scratch space is used for register spilling */
206 if (prog_data.base.base.total_scratch) {
207 brw_get_scratch_bo(brw, &brw->vs.base.scratch_bo,
208 prog_data.base.base.total_scratch *
209 brw->max_vs_threads);
210 }
211
212 brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG,
213 key, sizeof(struct brw_vs_prog_key),
214 program, program_size,
215 &prog_data, sizeof(prog_data),
216 &brw->vs.base.prog_offset, &brw->vs.prog_data);
217 ralloc_free(mem_ctx);
218
219 return true;
220 }
221
222 static bool
223 key_debug(struct brw_context *brw, const char *name, int a, int b)
224 {
225 if (a != b) {
226 perf_debug(" %s %d->%d\n", name, a, b);
227 return true;
228 }
229 return false;
230 }
231
232 void
233 brw_vs_debug_recompile(struct brw_context *brw,
234 struct gl_shader_program *prog,
235 const struct brw_vs_prog_key *key)
236 {
237 struct brw_cache_item *c = NULL;
238 const struct brw_vs_prog_key *old_key = NULL;
239 bool found = false;
240
241 perf_debug("Recompiling vertex shader for program %d\n", prog->Name);
242
243 for (unsigned int i = 0; i < brw->cache.size; i++) {
244 for (c = brw->cache.items[i]; c; c = c->next) {
245 if (c->cache_id == BRW_CACHE_VS_PROG) {
246 old_key = c->key;
247
248 if (old_key->program_string_id == key->program_string_id)
249 break;
250 }
251 }
252 if (c)
253 break;
254 }
255
256 if (!c) {
257 perf_debug(" Didn't find previous compile in the shader cache for "
258 "debug\n");
259 return;
260 }
261
262 for (unsigned int i = 0; i < VERT_ATTRIB_MAX; i++) {
263 found |= key_debug(brw, "Vertex attrib w/a flags",
264 old_key->gl_attrib_wa_flags[i],
265 key->gl_attrib_wa_flags[i]);
266 }
267
268 found |= key_debug(brw, "legacy user clipping",
269 old_key->nr_userclip_plane_consts,
270 key->nr_userclip_plane_consts);
271
272 found |= key_debug(brw, "copy edgeflag",
273 old_key->copy_edgeflag, key->copy_edgeflag);
274 found |= key_debug(brw, "PointCoord replace",
275 old_key->point_coord_replace, key->point_coord_replace);
276 found |= key_debug(brw, "vertex color clamping",
277 old_key->clamp_vertex_color, key->clamp_vertex_color);
278
279 found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);
280
281 if (!found) {
282 perf_debug(" Something else\n");
283 }
284 }
285
286 static bool
287 brw_vs_state_dirty(struct brw_context *brw)
288 {
289 return brw_state_dirty(brw,
290 _NEW_BUFFERS |
291 _NEW_LIGHT |
292 _NEW_POINT |
293 _NEW_POLYGON |
294 _NEW_TEXTURE |
295 _NEW_TRANSFORM,
296 BRW_NEW_VERTEX_PROGRAM |
297 BRW_NEW_VS_ATTRIB_WORKAROUNDS);
298 }
299
300 static void
301 brw_vs_populate_key(struct brw_context *brw,
302 struct brw_vs_prog_key *key)
303 {
304 struct gl_context *ctx = &brw->ctx;
305 /* BRW_NEW_VERTEX_PROGRAM */
306 struct brw_vertex_program *vp =
307 (struct brw_vertex_program *)brw->vertex_program;
308 struct gl_program *prog = (struct gl_program *) brw->vertex_program;
309 int i;
310
311 memset(key, 0, sizeof(*key));
312
313 /* Just upload the program verbatim for now. Always send it all
314 * the inputs it asks for, whether they are varying or not.
315 */
316 key->program_string_id = vp->id;
317
318 if (ctx->Transform.ClipPlanesEnabled != 0 &&
319 (ctx->API == API_OPENGL_COMPAT ||
320 ctx->API == API_OPENGLES) &&
321 vp->program.Base.ClipDistanceArraySize == 0) {
322 key->nr_userclip_plane_consts =
323 _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
324 }
325
326 /* _NEW_POLYGON */
327 if (brw->gen < 6) {
328 key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
329 ctx->Polygon.BackMode != GL_FILL);
330 }
331
332 if (prog->OutputsWritten & (VARYING_BIT_COL0 | VARYING_BIT_COL1 |
333 VARYING_BIT_BFC0 | VARYING_BIT_BFC1)) {
334 /* _NEW_LIGHT | _NEW_BUFFERS */
335 key->clamp_vertex_color = ctx->Light._ClampVertexColor;
336 }
337
338 /* _NEW_POINT */
339 if (brw->gen < 6 && ctx->Point.PointSprite) {
340 for (i = 0; i < 8; i++) {
341 if (ctx->Point.CoordReplace[i])
342 key->point_coord_replace |= (1 << i);
343 }
344 }
345
346 /* _NEW_TEXTURE */
347 brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count,
348 &key->tex);
349
350 /* BRW_NEW_VS_ATTRIB_WORKAROUNDS */
351 memcpy(key->gl_attrib_wa_flags, brw->vb.attrib_wa_flags,
352 sizeof(brw->vb.attrib_wa_flags));
353 }
354
355 void
356 brw_upload_vs_prog(struct brw_context *brw)
357 {
358 struct gl_context *ctx = &brw->ctx;
359 struct gl_shader_program **current = ctx->_Shader->CurrentProgram;
360 struct brw_vs_prog_key key;
361 /* BRW_NEW_VERTEX_PROGRAM */
362 struct brw_vertex_program *vp =
363 (struct brw_vertex_program *)brw->vertex_program;
364
365 if (!brw_vs_state_dirty(brw))
366 return;
367
368 brw_vs_populate_key(brw, &key);
369
370 if (!brw_search_cache(&brw->cache, BRW_CACHE_VS_PROG,
371 &key, sizeof(key),
372 &brw->vs.base.prog_offset, &brw->vs.prog_data)) {
373 bool success = brw_codegen_vs_prog(brw, current[MESA_SHADER_VERTEX],
374 vp, &key);
375 (void) success;
376 assert(success);
377 }
378 brw->vs.base.prog_data = &brw->vs.prog_data->base.base;
379 }
380
381 bool
382 brw_vs_precompile(struct gl_context *ctx,
383 struct gl_shader_program *shader_prog,
384 struct gl_program *prog)
385 {
386 struct brw_context *brw = brw_context(ctx);
387 struct brw_vs_prog_key key;
388 uint32_t old_prog_offset = brw->vs.base.prog_offset;
389 struct brw_vs_prog_data *old_prog_data = brw->vs.prog_data;
390 bool success;
391
392 struct gl_vertex_program *vp = (struct gl_vertex_program *) prog;
393 struct brw_vertex_program *bvp = brw_vertex_program(vp);
394
395 memset(&key, 0, sizeof(key));
396
397 brw_setup_tex_for_precompile(brw, &key.tex, prog);
398 key.program_string_id = bvp->id;
399 key.clamp_vertex_color =
400 (prog->OutputsWritten & (VARYING_BIT_COL0 | VARYING_BIT_COL1 |
401 VARYING_BIT_BFC0 | VARYING_BIT_BFC1));
402
403 success = brw_codegen_vs_prog(brw, shader_prog, bvp, &key);
404
405 brw->vs.base.prog_offset = old_prog_offset;
406 brw->vs.prog_data = old_prog_data;
407
408 return success;
409 }