2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "main/compiler.h"
34 #include "main/context.h"
35 #include "brw_context.h"
38 #include "brw_state.h"
39 #include "program/prog_print.h"
40 #include "program/prog_parameter.h"
43 #include "util/ralloc.h"
46 brw_codegen_vs_prog(struct brw_context
*brw
,
47 struct gl_shader_program
*prog
,
48 struct brw_vertex_program
*vp
,
49 struct brw_vs_prog_key
*key
)
51 const struct brw_compiler
*compiler
= brw
->intelScreen
->compiler
;
53 const GLuint
*program
;
54 struct brw_vs_prog_data prog_data
;
55 struct brw_stage_prog_data
*stage_prog_data
= &prog_data
.base
.base
;
58 struct brw_shader
*vs
= NULL
;
59 bool start_busy
= false;
60 double start_time
= 0;
63 vs
= (struct brw_shader
*) prog
->_LinkedShaders
[MESA_SHADER_VERTEX
];
65 memset(&prog_data
, 0, sizeof(prog_data
));
67 /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
69 stage_prog_data
->use_alt_mode
= true;
71 mem_ctx
= ralloc_context(NULL
);
73 brw_assign_common_binding_table_offsets(MESA_SHADER_VERTEX
,
74 brw
->intelScreen
->devinfo
,
75 prog
, &vp
->program
.Base
,
76 &prog_data
.base
.base
, 0);
78 /* Allocate the references to the uniforms that will end up in the
79 * prog_data associated with the compiled program, and which will be freed
82 int param_count
= vp
->program
.Base
.nir
->num_uniforms
;
83 if (!compiler
->scalar_stage
[MESA_SHADER_VERTEX
])
87 prog_data
.base
.base
.nr_image_params
= vs
->base
.NumImages
;
89 /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip
92 param_count
+= key
->nr_userclip_plane_consts
* 4;
94 stage_prog_data
->param
=
95 rzalloc_array(NULL
, const gl_constant_value
*, param_count
);
96 stage_prog_data
->pull_param
=
97 rzalloc_array(NULL
, const gl_constant_value
*, param_count
);
98 stage_prog_data
->image_param
=
99 rzalloc_array(NULL
, struct brw_image_param
,
100 stage_prog_data
->nr_image_params
);
101 stage_prog_data
->nr_params
= param_count
;
104 brw_nir_setup_glsl_uniforms(vp
->program
.Base
.nir
, prog
, &vp
->program
.Base
,
105 &prog_data
.base
.base
,
106 compiler
->scalar_stage
[MESA_SHADER_VERTEX
]);
108 brw_nir_setup_arb_uniforms(vp
->program
.Base
.nir
, &vp
->program
.Base
,
109 &prog_data
.base
.base
);
112 GLbitfield64 outputs_written
= vp
->program
.Base
.OutputsWritten
;
113 prog_data
.inputs_read
= vp
->program
.Base
.InputsRead
;
115 if (key
->copy_edgeflag
) {
116 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_EDGE
);
117 prog_data
.inputs_read
|= VERT_BIT_EDGEFLAG
;
121 /* Put dummy slots into the VUE for the SF to put the replaced
122 * point sprite coords in. We shouldn't need these dummy slots,
123 * which take up precious URB space, but it would mean that the SF
124 * doesn't get nice aligned pairs of input coords into output
125 * coords, which would be a pain to handle.
127 for (i
= 0; i
< 8; i
++) {
128 if (key
->point_coord_replace
& (1 << i
))
129 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_TEX0
+ i
);
132 /* if back colors are written, allocate slots for front colors too */
133 if (outputs_written
& BITFIELD64_BIT(VARYING_SLOT_BFC0
))
134 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_COL0
);
135 if (outputs_written
& BITFIELD64_BIT(VARYING_SLOT_BFC1
))
136 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_COL1
);
139 /* In order for legacy clipping to work, we need to populate the clip
140 * distance varying slots whenever clipping is enabled, even if the vertex
141 * shader doesn't write to gl_ClipDistance.
143 if (key
->nr_userclip_plane_consts
> 0) {
144 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0
);
145 outputs_written
|= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1
);
148 brw_compute_vue_map(brw
->intelScreen
->devinfo
,
149 &prog_data
.base
.vue_map
, outputs_written
,
150 prog
? prog
->SeparateShader
: false);
153 _mesa_fprint_program_opt(stderr
, &vp
->program
.Base
, PROG_PRINT_DEBUG
,
157 if (unlikely(brw
->perf_debug
)) {
158 start_busy
= (brw
->batch
.last_bo
&&
159 drm_intel_bo_busy(brw
->batch
.last_bo
));
160 start_time
= get_time();
163 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
164 brw_dump_ir("vertex", prog
, vs
? &vs
->base
: NULL
, &vp
->program
.Base
);
166 fprintf(stderr
, "VS Output ");
167 brw_print_vue_map(stderr
, &prog_data
.base
.vue_map
);
171 if (INTEL_DEBUG
& DEBUG_SHADER_TIME
)
172 st_index
= brw_get_shader_time_index(brw
, prog
, &vp
->program
.Base
, ST_VS
);
177 program
= brw_compile_vs(compiler
, brw
, mem_ctx
, key
,
178 &prog_data
, vp
->program
.Base
.nir
,
179 brw_select_clip_planes(&brw
->ctx
),
180 !_mesa_is_gles3(&brw
->ctx
),
181 st_index
, &program_size
, &error_str
);
182 if (program
== NULL
) {
184 prog
->LinkStatus
= false;
185 ralloc_strcat(&prog
->InfoLog
, error_str
);
188 _mesa_problem(NULL
, "Failed to compile vertex shader: %s\n", error_str
);
190 ralloc_free(mem_ctx
);
194 if (unlikely(brw
->perf_debug
) && vs
) {
195 if (vs
->compiled_once
) {
196 brw_vs_debug_recompile(brw
, prog
, key
);
198 if (start_busy
&& !drm_intel_bo_busy(brw
->batch
.last_bo
)) {
199 perf_debug("VS compile took %.03f ms and stalled the GPU\n",
200 (get_time() - start_time
) * 1000);
202 vs
->compiled_once
= true;
205 /* Scratch space is used for register spilling */
206 if (prog_data
.base
.base
.total_scratch
) {
207 brw_get_scratch_bo(brw
, &brw
->vs
.base
.scratch_bo
,
208 prog_data
.base
.base
.total_scratch
*
209 brw
->max_vs_threads
);
212 brw_upload_cache(&brw
->cache
, BRW_CACHE_VS_PROG
,
213 key
, sizeof(struct brw_vs_prog_key
),
214 program
, program_size
,
215 &prog_data
, sizeof(prog_data
),
216 &brw
->vs
.base
.prog_offset
, &brw
->vs
.prog_data
);
217 ralloc_free(mem_ctx
);
223 key_debug(struct brw_context
*brw
, const char *name
, int a
, int b
)
226 perf_debug(" %s %d->%d\n", name
, a
, b
);
233 brw_vs_debug_recompile(struct brw_context
*brw
,
234 struct gl_shader_program
*prog
,
235 const struct brw_vs_prog_key
*key
)
237 struct brw_cache_item
*c
= NULL
;
238 const struct brw_vs_prog_key
*old_key
= NULL
;
241 perf_debug("Recompiling vertex shader for program %d\n", prog
->Name
);
243 for (unsigned int i
= 0; i
< brw
->cache
.size
; i
++) {
244 for (c
= brw
->cache
.items
[i
]; c
; c
= c
->next
) {
245 if (c
->cache_id
== BRW_CACHE_VS_PROG
) {
248 if (old_key
->program_string_id
== key
->program_string_id
)
257 perf_debug(" Didn't find previous compile in the shader cache for "
262 for (unsigned int i
= 0; i
< VERT_ATTRIB_MAX
; i
++) {
263 found
|= key_debug(brw
, "Vertex attrib w/a flags",
264 old_key
->gl_attrib_wa_flags
[i
],
265 key
->gl_attrib_wa_flags
[i
]);
268 found
|= key_debug(brw
, "legacy user clipping",
269 old_key
->nr_userclip_plane_consts
,
270 key
->nr_userclip_plane_consts
);
272 found
|= key_debug(brw
, "copy edgeflag",
273 old_key
->copy_edgeflag
, key
->copy_edgeflag
);
274 found
|= key_debug(brw
, "PointCoord replace",
275 old_key
->point_coord_replace
, key
->point_coord_replace
);
276 found
|= key_debug(brw
, "vertex color clamping",
277 old_key
->clamp_vertex_color
, key
->clamp_vertex_color
);
279 found
|= brw_debug_recompile_sampler_key(brw
, &old_key
->tex
, &key
->tex
);
282 perf_debug(" Something else\n");
287 brw_vs_state_dirty(struct brw_context
*brw
)
289 return brw_state_dirty(brw
,
296 BRW_NEW_VERTEX_PROGRAM
|
297 BRW_NEW_VS_ATTRIB_WORKAROUNDS
);
301 brw_vs_populate_key(struct brw_context
*brw
,
302 struct brw_vs_prog_key
*key
)
304 struct gl_context
*ctx
= &brw
->ctx
;
305 /* BRW_NEW_VERTEX_PROGRAM */
306 struct brw_vertex_program
*vp
=
307 (struct brw_vertex_program
*)brw
->vertex_program
;
308 struct gl_program
*prog
= (struct gl_program
*) brw
->vertex_program
;
311 memset(key
, 0, sizeof(*key
));
313 /* Just upload the program verbatim for now. Always send it all
314 * the inputs it asks for, whether they are varying or not.
316 key
->program_string_id
= vp
->id
;
318 if (ctx
->Transform
.ClipPlanesEnabled
!= 0 &&
319 (ctx
->API
== API_OPENGL_COMPAT
||
320 ctx
->API
== API_OPENGLES
) &&
321 vp
->program
.Base
.ClipDistanceArraySize
== 0) {
322 key
->nr_userclip_plane_consts
=
323 _mesa_logbase2(ctx
->Transform
.ClipPlanesEnabled
) + 1;
328 key
->copy_edgeflag
= (ctx
->Polygon
.FrontMode
!= GL_FILL
||
329 ctx
->Polygon
.BackMode
!= GL_FILL
);
332 if (prog
->OutputsWritten
& (VARYING_BIT_COL0
| VARYING_BIT_COL1
|
333 VARYING_BIT_BFC0
| VARYING_BIT_BFC1
)) {
334 /* _NEW_LIGHT | _NEW_BUFFERS */
335 key
->clamp_vertex_color
= ctx
->Light
._ClampVertexColor
;
339 if (brw
->gen
< 6 && ctx
->Point
.PointSprite
) {
340 for (i
= 0; i
< 8; i
++) {
341 if (ctx
->Point
.CoordReplace
[i
])
342 key
->point_coord_replace
|= (1 << i
);
347 brw_populate_sampler_prog_key_data(ctx
, prog
, brw
->vs
.base
.sampler_count
,
350 /* BRW_NEW_VS_ATTRIB_WORKAROUNDS */
351 memcpy(key
->gl_attrib_wa_flags
, brw
->vb
.attrib_wa_flags
,
352 sizeof(brw
->vb
.attrib_wa_flags
));
356 brw_upload_vs_prog(struct brw_context
*brw
)
358 struct gl_context
*ctx
= &brw
->ctx
;
359 struct gl_shader_program
**current
= ctx
->_Shader
->CurrentProgram
;
360 struct brw_vs_prog_key key
;
361 /* BRW_NEW_VERTEX_PROGRAM */
362 struct brw_vertex_program
*vp
=
363 (struct brw_vertex_program
*)brw
->vertex_program
;
365 if (!brw_vs_state_dirty(brw
))
368 brw_vs_populate_key(brw
, &key
);
370 if (!brw_search_cache(&brw
->cache
, BRW_CACHE_VS_PROG
,
372 &brw
->vs
.base
.prog_offset
, &brw
->vs
.prog_data
)) {
373 bool success
= brw_codegen_vs_prog(brw
, current
[MESA_SHADER_VERTEX
],
378 brw
->vs
.base
.prog_data
= &brw
->vs
.prog_data
->base
.base
;
382 brw_vs_precompile(struct gl_context
*ctx
,
383 struct gl_shader_program
*shader_prog
,
384 struct gl_program
*prog
)
386 struct brw_context
*brw
= brw_context(ctx
);
387 struct brw_vs_prog_key key
;
388 uint32_t old_prog_offset
= brw
->vs
.base
.prog_offset
;
389 struct brw_vs_prog_data
*old_prog_data
= brw
->vs
.prog_data
;
392 struct gl_vertex_program
*vp
= (struct gl_vertex_program
*) prog
;
393 struct brw_vertex_program
*bvp
= brw_vertex_program(vp
);
395 memset(&key
, 0, sizeof(key
));
397 brw_setup_tex_for_precompile(brw
, &key
.tex
, prog
);
398 key
.program_string_id
= bvp
->id
;
399 key
.clamp_vertex_color
=
400 (prog
->OutputsWritten
& (VARYING_BIT_COL0
| VARYING_BIT_COL1
|
401 VARYING_BIT_BFC0
| VARYING_BIT_BFC1
));
403 success
= brw_codegen_vs_prog(brw
, shader_prog
, bvp
, &key
);
405 brw
->vs
.base
.prog_offset
= old_prog_offset
;
406 brw
->vs
.prog_data
= old_prog_data
;