2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "main/compiler.h"
34 #include "brw_context.h"
37 #include "brw_state.h"
38 #include "program/prog_print.h"
39 #include "program/prog_parameter.h"
42 #include "util/ralloc.h"
45 brw_codegen_vs_prog(struct brw_context
*brw
,
46 struct gl_shader_program
*prog
,
47 struct brw_vertex_program
*vp
,
48 struct brw_vs_prog_key
*key
)
51 const GLuint
*program
;
52 struct brw_vs_prog_data prog_data
;
53 struct brw_stage_prog_data
*stage_prog_data
= &prog_data
.base
.base
;
56 struct brw_shader
*vs
= NULL
;
57 bool start_busy
= false;
58 double start_time
= 0;
61 vs
= (struct brw_shader
*) prog
->_LinkedShaders
[MESA_SHADER_VERTEX
];
63 memset(&prog_data
, 0, sizeof(prog_data
));
65 /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
67 stage_prog_data
->use_alt_mode
= true;
69 mem_ctx
= ralloc_context(NULL
);
71 brw_assign_common_binding_table_offsets(MESA_SHADER_VERTEX
,
72 brw
->intelScreen
->devinfo
,
73 prog
, &vp
->program
.Base
,
74 &prog_data
.base
.base
, 0);
76 /* Allocate the references to the uniforms that will end up in the
77 * prog_data associated with the compiled program, and which will be freed
80 int param_count
= vp
->program
.Base
.nir
->num_uniforms
;
81 if (!brw
->intelScreen
->compiler
->scalar_vs
)
85 prog_data
.base
.base
.nr_image_params
= vs
->base
.NumImages
;
87 /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip
90 param_count
+= key
->nr_userclip_plane_consts
* 4;
92 stage_prog_data
->param
=
93 rzalloc_array(NULL
, const gl_constant_value
*, param_count
);
94 stage_prog_data
->pull_param
=
95 rzalloc_array(NULL
, const gl_constant_value
*, param_count
);
96 stage_prog_data
->image_param
=
97 rzalloc_array(NULL
, struct brw_image_param
,
98 stage_prog_data
->nr_image_params
);
99 stage_prog_data
->nr_params
= param_count
;
102 brw_nir_setup_glsl_uniforms(vp
->program
.Base
.nir
, prog
, &vp
->program
.Base
,
103 &prog_data
.base
.base
,
104 brw
->intelScreen
->compiler
->scalar_vs
);
106 brw_nir_setup_arb_uniforms(vp
->program
.Base
.nir
, &vp
->program
.Base
,
107 &prog_data
.base
.base
);
110 GLbitfield64 outputs_written
= vp
->program
.Base
.OutputsWritten
;
111 prog_data
.inputs_read
= vp
->program
.Base
.InputsRead
;
113 if (key
->copy_edgeflag
) {
114 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_EDGE
);
115 prog_data
.inputs_read
|= VERT_BIT_EDGEFLAG
;
119 /* Put dummy slots into the VUE for the SF to put the replaced
120 * point sprite coords in. We shouldn't need these dummy slots,
121 * which take up precious URB space, but it would mean that the SF
122 * doesn't get nice aligned pairs of input coords into output
123 * coords, which would be a pain to handle.
125 for (i
= 0; i
< 8; i
++) {
126 if (key
->point_coord_replace
& (1 << i
))
127 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_TEX0
+ i
);
130 /* if back colors are written, allocate slots for front colors too */
131 if (outputs_written
& BITFIELD64_BIT(VARYING_SLOT_BFC0
))
132 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_COL0
);
133 if (outputs_written
& BITFIELD64_BIT(VARYING_SLOT_BFC1
))
134 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_COL1
);
137 /* In order for legacy clipping to work, we need to populate the clip
138 * distance varying slots whenever clipping is enabled, even if the vertex
139 * shader doesn't write to gl_ClipDistance.
141 if (key
->nr_userclip_plane_consts
> 0) {
142 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0
);
143 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1
);
146 brw_compute_vue_map(brw
->intelScreen
->devinfo
,
147 &prog_data
.base
.vue_map
, outputs_written
,
148 prog
? prog
->SeparateShader
: false);
150 unsigned nr_attributes
= _mesa_bitcount_64(prog_data
.inputs_read
);
152 /* gl_VertexID and gl_InstanceID are system values, but arrive via an
153 * incoming vertex attribute. So, add an extra slot.
155 if (vp
->program
.Base
.SystemValuesRead
&
156 (BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
) |
157 BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID
))) {
161 /* The 3DSTATE_VS documentation lists the lower bound on "Vertex URB Entry
162 * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode. Empirically, in
163 * vec4 mode, the hardware appears to wedge unless we read something.
165 if (brw
->intelScreen
->compiler
->scalar_vs
)
166 prog_data
.base
.urb_read_length
= DIV_ROUND_UP(nr_attributes
, 2);
168 prog_data
.base
.urb_read_length
= DIV_ROUND_UP(MAX2(nr_attributes
, 1), 2);
170 prog_data
.nr_attributes
= nr_attributes
;
172 /* Since vertex shaders reuse the same VUE entry for inputs and outputs
173 * (overwriting the original contents), we need to make sure the size is
174 * the larger of the two.
176 const unsigned vue_entries
=
177 MAX2(nr_attributes
, prog_data
.base
.vue_map
.num_slots
);
180 prog_data
.base
.urb_entry_size
= DIV_ROUND_UP(vue_entries
, 8);
182 prog_data
.base
.urb_entry_size
= DIV_ROUND_UP(vue_entries
, 4);
185 _mesa_fprint_program_opt(stderr
, &vp
->program
.Base
, PROG_PRINT_DEBUG
,
189 if (unlikely(brw
->perf_debug
)) {
190 start_busy
= (brw
->batch
.last_bo
&&
191 drm_intel_bo_busy(brw
->batch
.last_bo
));
192 start_time
= get_time();
195 if (unlikely(INTEL_DEBUG
& DEBUG_VS
))
196 brw_dump_ir("vertex", prog
, vs
? &vs
->base
: NULL
, &vp
->program
.Base
);
199 if (INTEL_DEBUG
& DEBUG_SHADER_TIME
)
200 st_index
= brw_get_shader_time_index(brw
, prog
, &vp
->program
.Base
, ST_VS
);
204 program
= brw_vs_emit(brw
, mem_ctx
, key
, &prog_data
,
205 &vp
->program
, prog
, st_index
, &program_size
);
206 if (program
== NULL
) {
207 ralloc_free(mem_ctx
);
211 if (unlikely(brw
->perf_debug
) && vs
) {
212 if (vs
->compiled_once
) {
213 brw_vs_debug_recompile(brw
, prog
, key
);
215 if (start_busy
&& !drm_intel_bo_busy(brw
->batch
.last_bo
)) {
216 perf_debug("VS compile took %.03f ms and stalled the GPU\n",
217 (get_time() - start_time
) * 1000);
219 vs
->compiled_once
= true;
222 /* Scratch space is used for register spilling */
223 if (prog_data
.base
.base
.total_scratch
) {
224 brw_get_scratch_bo(brw
, &brw
->vs
.base
.scratch_bo
,
225 prog_data
.base
.base
.total_scratch
*
226 brw
->max_vs_threads
);
229 brw_upload_cache(&brw
->cache
, BRW_CACHE_VS_PROG
,
230 key
, sizeof(struct brw_vs_prog_key
),
231 program
, program_size
,
232 &prog_data
, sizeof(prog_data
),
233 &brw
->vs
.base
.prog_offset
, &brw
->vs
.prog_data
);
234 ralloc_free(mem_ctx
);
240 key_debug(struct brw_context
*brw
, const char *name
, int a
, int b
)
243 perf_debug(" %s %d->%d\n", name
, a
, b
);
250 brw_vs_debug_recompile(struct brw_context
*brw
,
251 struct gl_shader_program
*prog
,
252 const struct brw_vs_prog_key
*key
)
254 struct brw_cache_item
*c
= NULL
;
255 const struct brw_vs_prog_key
*old_key
= NULL
;
258 perf_debug("Recompiling vertex shader for program %d\n", prog
->Name
);
260 for (unsigned int i
= 0; i
< brw
->cache
.size
; i
++) {
261 for (c
= brw
->cache
.items
[i
]; c
; c
= c
->next
) {
262 if (c
->cache_id
== BRW_CACHE_VS_PROG
) {
265 if (old_key
->program_string_id
== key
->program_string_id
)
274 perf_debug(" Didn't find previous compile in the shader cache for "
279 for (unsigned int i
= 0; i
< VERT_ATTRIB_MAX
; i
++) {
280 found
|= key_debug(brw
, "Vertex attrib w/a flags",
281 old_key
->gl_attrib_wa_flags
[i
],
282 key
->gl_attrib_wa_flags
[i
]);
285 found
|= key_debug(brw
, "legacy user clipping",
286 old_key
->nr_userclip_plane_consts
,
287 key
->nr_userclip_plane_consts
);
289 found
|= key_debug(brw
, "copy edgeflag",
290 old_key
->copy_edgeflag
, key
->copy_edgeflag
);
291 found
|= key_debug(brw
, "PointCoord replace",
292 old_key
->point_coord_replace
, key
->point_coord_replace
);
293 found
|= key_debug(brw
, "vertex color clamping",
294 old_key
->clamp_vertex_color
, key
->clamp_vertex_color
);
296 found
|= brw_debug_recompile_sampler_key(brw
, &old_key
->tex
, &key
->tex
);
299 perf_debug(" Something else\n");
304 brw_vs_state_dirty(struct brw_context
*brw
)
306 return brw_state_dirty(brw
,
313 BRW_NEW_VERTEX_PROGRAM
|
314 BRW_NEW_VS_ATTRIB_WORKAROUNDS
);
318 brw_vs_populate_key(struct brw_context
*brw
,
319 struct brw_vs_prog_key
*key
)
321 struct gl_context
*ctx
= &brw
->ctx
;
322 /* BRW_NEW_VERTEX_PROGRAM */
323 struct brw_vertex_program
*vp
=
324 (struct brw_vertex_program
*)brw
->vertex_program
;
325 struct gl_program
*prog
= (struct gl_program
*) brw
->vertex_program
;
328 memset(key
, 0, sizeof(*key
));
330 /* Just upload the program verbatim for now. Always send it all
331 * the inputs it asks for, whether they are varying or not.
333 key
->program_string_id
= vp
->id
;
335 if (ctx
->Transform
.ClipPlanesEnabled
!= 0 &&
336 ctx
->API
== API_OPENGL_COMPAT
&&
337 !vp
->program
.Base
.UsesClipDistanceOut
) {
338 key
->nr_userclip_plane_consts
=
339 _mesa_logbase2(ctx
->Transform
.ClipPlanesEnabled
) + 1;
344 key
->copy_edgeflag
= (ctx
->Polygon
.FrontMode
!= GL_FILL
||
345 ctx
->Polygon
.BackMode
!= GL_FILL
);
348 if (prog
->OutputsWritten
& (VARYING_BIT_COL0
| VARYING_BIT_COL1
|
349 VARYING_BIT_BFC0
| VARYING_BIT_BFC1
)) {
350 /* _NEW_LIGHT | _NEW_BUFFERS */
351 key
->clamp_vertex_color
= ctx
->Light
._ClampVertexColor
;
355 if (brw
->gen
< 6 && ctx
->Point
.PointSprite
) {
356 for (i
= 0; i
< 8; i
++) {
357 if (ctx
->Point
.CoordReplace
[i
])
358 key
->point_coord_replace
|= (1 << i
);
363 brw_populate_sampler_prog_key_data(ctx
, prog
, brw
->vs
.base
.sampler_count
,
366 /* BRW_NEW_VS_ATTRIB_WORKAROUNDS */
367 memcpy(key
->gl_attrib_wa_flags
, brw
->vb
.attrib_wa_flags
,
368 sizeof(brw
->vb
.attrib_wa_flags
));
372 brw_upload_vs_prog(struct brw_context
*brw
)
374 struct gl_context
*ctx
= &brw
->ctx
;
375 struct gl_shader_program
**current
= ctx
->_Shader
->CurrentProgram
;
376 struct brw_vs_prog_key key
;
377 /* BRW_NEW_VERTEX_PROGRAM */
378 struct brw_vertex_program
*vp
=
379 (struct brw_vertex_program
*)brw
->vertex_program
;
381 if (!brw_vs_state_dirty(brw
))
384 brw_vs_populate_key(brw
, &key
);
386 if (!brw_search_cache(&brw
->cache
, BRW_CACHE_VS_PROG
,
388 &brw
->vs
.base
.prog_offset
, &brw
->vs
.prog_data
)) {
389 bool success
= brw_codegen_vs_prog(brw
, current
[MESA_SHADER_VERTEX
],
394 brw
->vs
.base
.prog_data
= &brw
->vs
.prog_data
->base
.base
;
398 brw_vs_precompile(struct gl_context
*ctx
,
399 struct gl_shader_program
*shader_prog
,
400 struct gl_program
*prog
)
402 struct brw_context
*brw
= brw_context(ctx
);
403 struct brw_vs_prog_key key
;
404 uint32_t old_prog_offset
= brw
->vs
.base
.prog_offset
;
405 struct brw_vs_prog_data
*old_prog_data
= brw
->vs
.prog_data
;
408 struct gl_vertex_program
*vp
= (struct gl_vertex_program
*) prog
;
409 struct brw_vertex_program
*bvp
= brw_vertex_program(vp
);
411 memset(&key
, 0, sizeof(key
));
413 brw_setup_tex_for_precompile(brw
, &key
.tex
, prog
);
414 key
.program_string_id
= bvp
->id
;
415 key
.clamp_vertex_color
=
416 (prog
->OutputsWritten
& (VARYING_BIT_COL0
| VARYING_BIT_COL1
|
417 VARYING_BIT_BFC0
| VARYING_BIT_BFC1
));
419 success
= brw_codegen_vs_prog(brw
, shader_prog
, bvp
, &key
);
421 brw
->vs
.base
.prog_offset
= old_prog_offset
;
422 brw
->vs
.prog_data
= old_prog_data
;