2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "main/compiler.h"
34 #include "brw_context.h"
37 #include "brw_state.h"
38 #include "program/prog_print.h"
39 #include "program/prog_parameter.h"
42 #include "util/ralloc.h"
45 brw_codegen_vs_prog(struct brw_context
*brw
,
46 struct gl_shader_program
*prog
,
47 struct brw_vertex_program
*vp
,
48 struct brw_vs_prog_key
*key
)
51 const GLuint
*program
;
52 struct brw_vs_prog_data prog_data
;
53 struct brw_stage_prog_data
*stage_prog_data
= &prog_data
.base
.base
;
56 struct brw_shader
*vs
= NULL
;
57 bool start_busy
= false;
58 double start_time
= 0;
60 if (!vp
->program
.Base
.nir
) {
61 /* Normally we generate NIR in LinkShader() or
62 * ProgramStringNotify(), but Mesa's fixed-function vertex program
63 * handling doesn't notify the driver at all. Just do it here, at
64 * the last minute, even though it's lame.
66 assert(vp
->program
.Base
.Id
== 0 && prog
== NULL
);
67 vp
->program
.Base
.nir
=
68 brw_create_nir(brw
, NULL
, &vp
->program
.Base
, MESA_SHADER_VERTEX
,
69 brw
->intelScreen
->compiler
->scalar_vs
);
73 vs
= (struct brw_shader
*) prog
->_LinkedShaders
[MESA_SHADER_VERTEX
];
75 memset(&prog_data
, 0, sizeof(prog_data
));
77 /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
79 stage_prog_data
->use_alt_mode
= true;
81 mem_ctx
= ralloc_context(NULL
);
83 brw_assign_common_binding_table_offsets(MESA_SHADER_VERTEX
,
84 brw
->intelScreen
->devinfo
,
85 prog
, &vp
->program
.Base
,
86 &prog_data
.base
.base
, 0);
88 /* Allocate the references to the uniforms that will end up in the
89 * prog_data associated with the compiled program, and which will be freed
92 int param_count
= vp
->program
.Base
.nir
->num_uniforms
;
93 if (!brw
->intelScreen
->compiler
->scalar_vs
)
97 prog_data
.base
.base
.nr_image_params
= vs
->base
.NumImages
;
99 /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip
100 * planes as uniforms.
102 param_count
+= key
->nr_userclip_plane_consts
* 4;
104 stage_prog_data
->param
=
105 rzalloc_array(NULL
, const gl_constant_value
*, param_count
);
106 stage_prog_data
->pull_param
=
107 rzalloc_array(NULL
, const gl_constant_value
*, param_count
);
108 stage_prog_data
->image_param
=
109 rzalloc_array(NULL
, struct brw_image_param
,
110 stage_prog_data
->nr_image_params
);
111 stage_prog_data
->nr_params
= param_count
;
114 brw_nir_setup_glsl_uniforms(vp
->program
.Base
.nir
, prog
, &vp
->program
.Base
,
115 &prog_data
.base
.base
,
116 brw
->intelScreen
->compiler
->scalar_vs
);
118 brw_nir_setup_arb_uniforms(vp
->program
.Base
.nir
, &vp
->program
.Base
,
119 &prog_data
.base
.base
);
122 GLbitfield64 outputs_written
= vp
->program
.Base
.OutputsWritten
;
123 prog_data
.inputs_read
= vp
->program
.Base
.InputsRead
;
125 if (key
->copy_edgeflag
) {
126 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_EDGE
);
127 prog_data
.inputs_read
|= VERT_BIT_EDGEFLAG
;
131 /* Put dummy slots into the VUE for the SF to put the replaced
132 * point sprite coords in. We shouldn't need these dummy slots,
133 * which take up precious URB space, but it would mean that the SF
134 * doesn't get nice aligned pairs of input coords into output
135 * coords, which would be a pain to handle.
137 for (i
= 0; i
< 8; i
++) {
138 if (key
->point_coord_replace
& (1 << i
))
139 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_TEX0
+ i
);
142 /* if back colors are written, allocate slots for front colors too */
143 if (outputs_written
& BITFIELD64_BIT(VARYING_SLOT_BFC0
))
144 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_COL0
);
145 if (outputs_written
& BITFIELD64_BIT(VARYING_SLOT_BFC1
))
146 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_COL1
);
149 /* In order for legacy clipping to work, we need to populate the clip
150 * distance varying slots whenever clipping is enabled, even if the vertex
151 * shader doesn't write to gl_ClipDistance.
153 if (key
->nr_userclip_plane_consts
> 0) {
154 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0
);
155 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1
);
158 brw_compute_vue_map(brw
->intelScreen
->devinfo
,
159 &prog_data
.base
.vue_map
, outputs_written
,
160 prog
? prog
->SeparateShader
: false);
162 unsigned nr_attributes
= _mesa_bitcount_64(prog_data
.inputs_read
);
164 /* gl_VertexID and gl_InstanceID are system values, but arrive via an
165 * incoming vertex attribute. So, add an extra slot.
167 if (vp
->program
.Base
.SystemValuesRead
&
168 (BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
) |
169 BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID
))) {
173 /* The BSpec says we always have to read at least one thing from the VF,
174 * and it appears that the hardware wedges otherwise.
176 if (nr_attributes
== 0 && !brw
->intelScreen
->compiler
->scalar_vs
)
179 prog_data
.nr_attributes
= nr_attributes
;
180 prog_data
.base
.urb_read_length
= DIV_ROUND_UP(nr_attributes
, 2);
182 /* Since vertex shaders reuse the same VUE entry for inputs and outputs
183 * (overwriting the original contents), we need to make sure the size is
184 * the larger of the two.
186 const unsigned vue_entries
=
187 MAX2(nr_attributes
, prog_data
.base
.vue_map
.num_slots
);
190 prog_data
.base
.urb_entry_size
= DIV_ROUND_UP(vue_entries
, 8);
192 prog_data
.base
.urb_entry_size
= DIV_ROUND_UP(vue_entries
, 4);
195 _mesa_fprint_program_opt(stderr
, &vp
->program
.Base
, PROG_PRINT_DEBUG
,
199 if (unlikely(brw
->perf_debug
)) {
200 start_busy
= (brw
->batch
.last_bo
&&
201 drm_intel_bo_busy(brw
->batch
.last_bo
));
202 start_time
= get_time();
205 if (unlikely(INTEL_DEBUG
& DEBUG_VS
))
206 brw_dump_ir("vertex", prog
, &vs
->base
, &vp
->program
.Base
);
209 if (INTEL_DEBUG
& DEBUG_SHADER_TIME
)
210 st_index
= brw_get_shader_time_index(brw
, prog
, &vp
->program
.Base
, ST_VS
);
214 program
= brw_vs_emit(brw
, mem_ctx
, key
, &prog_data
,
215 &vp
->program
, prog
, st_index
, &program_size
);
216 if (program
== NULL
) {
217 ralloc_free(mem_ctx
);
221 if (unlikely(brw
->perf_debug
) && vs
) {
222 if (vs
->compiled_once
) {
223 brw_vs_debug_recompile(brw
, prog
, key
);
225 if (start_busy
&& !drm_intel_bo_busy(brw
->batch
.last_bo
)) {
226 perf_debug("VS compile took %.03f ms and stalled the GPU\n",
227 (get_time() - start_time
) * 1000);
229 vs
->compiled_once
= true;
232 /* Scratch space is used for register spilling */
233 if (prog_data
.base
.base
.total_scratch
) {
234 brw_get_scratch_bo(brw
, &brw
->vs
.base
.scratch_bo
,
235 prog_data
.base
.base
.total_scratch
*
236 brw
->max_vs_threads
);
239 brw_upload_cache(&brw
->cache
, BRW_CACHE_VS_PROG
,
240 key
, sizeof(struct brw_vs_prog_key
),
241 program
, program_size
,
242 &prog_data
, sizeof(prog_data
),
243 &brw
->vs
.base
.prog_offset
, &brw
->vs
.prog_data
);
244 ralloc_free(mem_ctx
);
250 key_debug(struct brw_context
*brw
, const char *name
, int a
, int b
)
253 perf_debug(" %s %d->%d\n", name
, a
, b
);
260 brw_vs_debug_recompile(struct brw_context
*brw
,
261 struct gl_shader_program
*prog
,
262 const struct brw_vs_prog_key
*key
)
264 struct brw_cache_item
*c
= NULL
;
265 const struct brw_vs_prog_key
*old_key
= NULL
;
268 perf_debug("Recompiling vertex shader for program %d\n", prog
->Name
);
270 for (unsigned int i
= 0; i
< brw
->cache
.size
; i
++) {
271 for (c
= brw
->cache
.items
[i
]; c
; c
= c
->next
) {
272 if (c
->cache_id
== BRW_CACHE_VS_PROG
) {
275 if (old_key
->program_string_id
== key
->program_string_id
)
284 perf_debug(" Didn't find previous compile in the shader cache for "
289 for (unsigned int i
= 0; i
< VERT_ATTRIB_MAX
; i
++) {
290 found
|= key_debug(brw
, "Vertex attrib w/a flags",
291 old_key
->gl_attrib_wa_flags
[i
],
292 key
->gl_attrib_wa_flags
[i
]);
295 found
|= key_debug(brw
, "legacy user clipping",
296 old_key
->nr_userclip_plane_consts
,
297 key
->nr_userclip_plane_consts
);
299 found
|= key_debug(brw
, "copy edgeflag",
300 old_key
->copy_edgeflag
, key
->copy_edgeflag
);
301 found
|= key_debug(brw
, "PointCoord replace",
302 old_key
->point_coord_replace
, key
->point_coord_replace
);
303 found
|= key_debug(brw
, "vertex color clamping",
304 old_key
->clamp_vertex_color
, key
->clamp_vertex_color
);
306 found
|= brw_debug_recompile_sampler_key(brw
, &old_key
->tex
, &key
->tex
);
309 perf_debug(" Something else\n");
314 brw_vs_state_dirty(struct brw_context
*brw
)
316 return brw_state_dirty(brw
,
323 BRW_NEW_VERTEX_PROGRAM
|
324 BRW_NEW_VS_ATTRIB_WORKAROUNDS
);
328 brw_vs_populate_key(struct brw_context
*brw
,
329 struct brw_vs_prog_key
*key
)
331 struct gl_context
*ctx
= &brw
->ctx
;
332 /* BRW_NEW_VERTEX_PROGRAM */
333 struct brw_vertex_program
*vp
=
334 (struct brw_vertex_program
*)brw
->vertex_program
;
335 struct gl_program
*prog
= (struct gl_program
*) brw
->vertex_program
;
338 memset(key
, 0, sizeof(*key
));
340 /* Just upload the program verbatim for now. Always send it all
341 * the inputs it asks for, whether they are varying or not.
343 key
->program_string_id
= vp
->id
;
345 if (ctx
->Transform
.ClipPlanesEnabled
!= 0 &&
346 ctx
->API
== API_OPENGL_COMPAT
&&
347 !vp
->program
.Base
.UsesClipDistanceOut
) {
348 key
->nr_userclip_plane_consts
=
349 _mesa_logbase2(ctx
->Transform
.ClipPlanesEnabled
) + 1;
354 key
->copy_edgeflag
= (ctx
->Polygon
.FrontMode
!= GL_FILL
||
355 ctx
->Polygon
.BackMode
!= GL_FILL
);
358 if (prog
->OutputsWritten
& (VARYING_BIT_COL0
| VARYING_BIT_COL1
|
359 VARYING_BIT_BFC0
| VARYING_BIT_BFC1
)) {
360 /* _NEW_LIGHT | _NEW_BUFFERS */
361 key
->clamp_vertex_color
= ctx
->Light
._ClampVertexColor
;
365 if (brw
->gen
< 6 && ctx
->Point
.PointSprite
) {
366 for (i
= 0; i
< 8; i
++) {
367 if (ctx
->Point
.CoordReplace
[i
])
368 key
->point_coord_replace
|= (1 << i
);
373 brw_populate_sampler_prog_key_data(ctx
, prog
, brw
->vs
.base
.sampler_count
,
376 /* BRW_NEW_VS_ATTRIB_WORKAROUNDS */
377 memcpy(key
->gl_attrib_wa_flags
, brw
->vb
.attrib_wa_flags
,
378 sizeof(brw
->vb
.attrib_wa_flags
));
382 brw_upload_vs_prog(struct brw_context
*brw
)
384 struct gl_context
*ctx
= &brw
->ctx
;
385 struct gl_shader_program
**current
= ctx
->_Shader
->CurrentProgram
;
386 struct brw_vs_prog_key key
;
387 /* BRW_NEW_VERTEX_PROGRAM */
388 struct brw_vertex_program
*vp
=
389 (struct brw_vertex_program
*)brw
->vertex_program
;
391 if (!brw_vs_state_dirty(brw
))
394 brw_vs_populate_key(brw
, &key
);
396 if (!brw_search_cache(&brw
->cache
, BRW_CACHE_VS_PROG
,
398 &brw
->vs
.base
.prog_offset
, &brw
->vs
.prog_data
)) {
399 bool success
= brw_codegen_vs_prog(brw
, current
[MESA_SHADER_VERTEX
],
404 brw
->vs
.base
.prog_data
= &brw
->vs
.prog_data
->base
.base
;
408 brw_vs_precompile(struct gl_context
*ctx
,
409 struct gl_shader_program
*shader_prog
,
410 struct gl_program
*prog
)
412 struct brw_context
*brw
= brw_context(ctx
);
413 struct brw_vs_prog_key key
;
414 uint32_t old_prog_offset
= brw
->vs
.base
.prog_offset
;
415 struct brw_vs_prog_data
*old_prog_data
= brw
->vs
.prog_data
;
418 struct gl_vertex_program
*vp
= (struct gl_vertex_program
*) prog
;
419 struct brw_vertex_program
*bvp
= brw_vertex_program(vp
);
421 memset(&key
, 0, sizeof(key
));
423 brw_setup_tex_for_precompile(brw
, &key
.tex
, prog
);
424 key
.program_string_id
= bvp
->id
;
425 key
.clamp_vertex_color
=
426 (prog
->OutputsWritten
& (VARYING_BIT_COL0
| VARYING_BIT_COL1
|
427 VARYING_BIT_BFC0
| VARYING_BIT_BFC1
));
429 success
= brw_codegen_vs_prog(brw
, shader_prog
, bvp
, &key
);
431 brw
->vs
.base
.prog_offset
= old_prog_offset
;
432 brw
->vs
.prog_data
= old_prog_data
;