2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
32 #include "brw_context.h"
34 #include "brw_state.h"
35 #include "main/formats.h"
36 #include "main/fbobject.h"
37 #include "main/samplerobj.h"
38 #include "program/prog_parameter.h"
40 #include "glsl/ralloc.h"
42 /** Return number of src args for given instruction */
43 GLuint
brw_wm_nr_args( GLuint opcode
)
60 assert(opcode
< MAX_OPCODE
);
61 return _mesa_num_inst_src_regs(opcode
);
66 GLuint
brw_wm_is_scalar_result( GLuint opcode
)
89 * Return a bitfield where bit n is set if barycentric interpolation mode n
90 * (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader.
93 brw_compute_barycentric_interp_modes(struct brw_context
*brw
,
94 bool shade_model_flat
,
95 const struct gl_fragment_program
*fprog
)
97 unsigned barycentric_interp_modes
= 0;
100 /* Loop through all fragment shader inputs to figure out what interpolation
101 * modes are in use, and set the appropriate bits in
102 * barycentric_interp_modes.
104 for (attr
= 0; attr
< FRAG_ATTRIB_MAX
; ++attr
) {
105 enum glsl_interp_qualifier interp_qualifier
=
106 fprog
->InterpQualifier
[attr
];
107 bool is_centroid
= fprog
->IsCentroid
& BITFIELD64_BIT(attr
);
108 bool is_gl_Color
= attr
== FRAG_ATTRIB_COL0
|| attr
== FRAG_ATTRIB_COL1
;
110 /* Ignore unused inputs. */
111 if (!(fprog
->Base
.InputsRead
& BITFIELD64_BIT(attr
)))
114 /* Ignore WPOS and FACE, because they don't require interpolation. */
115 if (attr
== FRAG_ATTRIB_WPOS
|| attr
== FRAG_ATTRIB_FACE
)
118 /* Determine the set (or sets) of barycentric coordinates needed to
119 * interpolate this variable. Note that when
120 * brw->needs_unlit_centroid_workaround is set, centroid interpolation
121 * uses PIXEL interpolation for unlit pixels and CENTROID interpolation
122 * for lit pixels, so we need both sets of barycentric coordinates.
124 if (interp_qualifier
== INTERP_QUALIFIER_NOPERSPECTIVE
) {
126 barycentric_interp_modes
|=
127 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC
;
129 if (!is_centroid
|| brw
->needs_unlit_centroid_workaround
) {
130 barycentric_interp_modes
|=
131 1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC
;
133 } else if (interp_qualifier
== INTERP_QUALIFIER_SMOOTH
||
134 (!(shade_model_flat
&& is_gl_Color
) &&
135 interp_qualifier
== INTERP_QUALIFIER_NONE
)) {
137 barycentric_interp_modes
|=
138 1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC
;
140 if (!is_centroid
|| brw
->needs_unlit_centroid_workaround
) {
141 barycentric_interp_modes
|=
142 1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC
;
147 return barycentric_interp_modes
;
152 brw_wm_payload_setup(struct brw_context
*brw
,
153 struct brw_wm_compile
*c
)
155 struct intel_context
*intel
= &brw
->intel
;
156 bool uses_depth
= (c
->fp
->program
.Base
.InputsRead
&
157 (1 << FRAG_ATTRIB_WPOS
)) != 0;
158 unsigned barycentric_interp_modes
= c
->prog_data
.barycentric_interp_modes
;
161 if (intel
->gen
>= 6) {
162 /* R0-1: masks, pixel X/Y coordinates. */
163 c
->nr_payload_regs
= 2;
164 /* R2: only for 32-pixel dispatch.*/
166 /* R3-26: barycentric interpolation coordinates. These appear in the
167 * same order that they appear in the brw_wm_barycentric_interp_mode
168 * enum. Each set of coordinates occupies 2 registers if dispatch width
169 * == 8 and 4 registers if dispatch width == 16. Coordinates only
170 * appear if they were enabled using the "Barycentric Interpolation
171 * Mode" bits in WM_STATE.
173 for (i
= 0; i
< BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT
; ++i
) {
174 if (barycentric_interp_modes
& (1 << i
)) {
175 c
->barycentric_coord_reg
[i
] = c
->nr_payload_regs
;
176 c
->nr_payload_regs
+= 2;
177 if (c
->dispatch_width
== 16) {
178 c
->nr_payload_regs
+= 2;
183 /* R27: interpolated depth if uses source depth */
185 c
->source_depth_reg
= c
->nr_payload_regs
;
186 c
->nr_payload_regs
++;
187 if (c
->dispatch_width
== 16) {
188 /* R28: interpolated depth if not 8-wide. */
189 c
->nr_payload_regs
++;
192 /* R29: interpolated W set if GEN6_WM_USES_SOURCE_W.
195 c
->source_w_reg
= c
->nr_payload_regs
;
196 c
->nr_payload_regs
++;
197 if (c
->dispatch_width
== 16) {
198 /* R30: interpolated W if not 8-wide. */
199 c
->nr_payload_regs
++;
202 /* R31: MSAA position offsets. */
203 /* R32-: bary for 32-pixel. */
204 /* R58-59: interp W for 32-pixel. */
206 if (c
->fp
->program
.Base
.OutputsWritten
&
207 BITFIELD64_BIT(FRAG_RESULT_DEPTH
)) {
208 c
->source_depth_to_render_target
= true;
209 c
->computes_depth
= true;
212 brw_wm_lookup_iz(intel
, c
);
217 brw_wm_prog_data_compare(const void *in_a
, const void *in_b
,
218 int aux_size
, const void *in_key
)
220 const struct brw_wm_prog_data
*a
= in_a
;
221 const struct brw_wm_prog_data
*b
= in_b
;
223 /* Compare all the struct up to the pointers. */
224 if (memcmp(a
, b
, offsetof(struct brw_wm_prog_data
, param
)))
227 if (memcmp(a
->param
, b
->param
, a
->nr_params
* sizeof(void *)))
230 if (memcmp(a
->pull_param
, b
->pull_param
, a
->nr_pull_params
* sizeof(void *)))
237 brw_wm_prog_data_free(const void *in_prog_data
)
239 const struct brw_wm_prog_data
*prog_data
= in_prog_data
;
241 ralloc_free((void *)prog_data
->param
);
242 ralloc_free((void *)prog_data
->pull_param
);
246 * All Mesa program -> GPU code generation goes through this function.
247 * Depending on the instructions used (i.e. flow control instructions)
248 * we'll use one of two code generators.
250 bool do_wm_prog(struct brw_context
*brw
,
251 struct gl_shader_program
*prog
,
252 struct brw_fragment_program
*fp
,
253 struct brw_wm_prog_key
*key
)
255 struct intel_context
*intel
= &brw
->intel
;
256 struct brw_wm_compile
*c
;
257 const GLuint
*program
;
258 struct gl_shader
*fs
= NULL
;
262 fs
= prog
->_LinkedShaders
[MESA_SHADER_FRAGMENT
];
264 c
= brw
->wm
.compile_data
;
266 brw
->wm
.compile_data
= rzalloc(NULL
, struct brw_wm_compile
);
267 c
= brw
->wm
.compile_data
;
269 /* Ouch - big out of memory problem. Can't continue
270 * without triggering a segfault, no way to signal,
276 void *instruction
= c
->instruction
;
277 void *prog_instructions
= c
->prog_instructions
;
278 void *vreg
= c
->vreg
;
279 void *refs
= c
->refs
;
280 memset(c
, 0, sizeof(*brw
->wm
.compile_data
));
281 c
->instruction
= instruction
;
282 c
->prog_instructions
= prog_instructions
;
287 /* Allocate the references to the uniforms that will end up in the
288 * prog_data associated with the compiled program, and which will be freed
289 * by the state cache.
292 int param_count
= fs
->num_uniform_components
;
293 /* The backend also sometimes adds params for texture size. */
294 param_count
+= 2 * BRW_MAX_TEX_UNIT
;
296 c
->prog_data
.param
= rzalloc_array(c
, const float *, param_count
);
297 c
->prog_data
.pull_param
= rzalloc_array(c
, const float *, param_count
);
299 /* brw_wm_pass0.c will also add references to 0.0 and 1.0 which are
300 * uploaded as push parameters.
302 int param_count
= (fp
->program
.Base
.Parameters
->NumParameters
+ 2) * 4;
303 c
->prog_data
.param
= rzalloc_array(c
, const float *, param_count
);
304 /* The old backend never does pull constants. */
305 c
->prog_data
.pull_param
= NULL
;
308 memcpy(&c
->key
, key
, sizeof(*key
));
311 c
->env_param
= brw
->intel
.ctx
.FragmentProgram
.Parameters
;
313 brw_init_compile(brw
, &c
->func
, c
);
315 c
->prog_data
.barycentric_interp_modes
=
316 brw_compute_barycentric_interp_modes(brw
, c
->key
.flat_shade
,
319 brw_wm_fs_emit(brw
, c
, prog
);
321 /* Scratch space is used for register spilling */
322 if (c
->last_scratch
) {
323 perf_debug("Fragment shader triggered register spilling. "
324 "Try reducing the number of live scalar values to "
325 "improve performance.\n");
327 c
->prog_data
.total_scratch
= brw_get_scratch_size(c
->last_scratch
);
329 brw_get_scratch_bo(intel
, &brw
->wm
.scratch_bo
,
330 c
->prog_data
.total_scratch
* brw
->max_wm_threads
);
333 if (unlikely(INTEL_DEBUG
& DEBUG_WM
))
334 fprintf(stderr
, "\n");
338 program
= brw_get_program(&c
->func
, &program_size
);
340 brw_upload_cache(&brw
->cache
, BRW_WM_PROG
,
341 &c
->key
, sizeof(c
->key
),
342 program
, program_size
,
343 &c
->prog_data
, sizeof(c
->prog_data
),
344 &brw
->wm
.prog_offset
, &brw
->wm
.prog_data
);
350 key_debug(const char *name
, int a
, int b
)
353 perf_debug(" %s %d->%d\n", name
, a
, b
);
361 brw_debug_recompile_sampler_key(const struct brw_sampler_prog_key_data
*old_key
,
362 const struct brw_sampler_prog_key_data
*key
)
366 for (unsigned int i
= 0; i
< MAX_SAMPLERS
; i
++) {
367 found
|= key_debug("EXT_texture_swizzle or DEPTH_TEXTURE_MODE",
368 old_key
->swizzles
[i
], key
->swizzles
[i
]);
370 found
|= key_debug("GL_CLAMP enabled on any texture unit's 1st coordinate",
371 old_key
->gl_clamp_mask
[0], key
->gl_clamp_mask
[0]);
372 found
|= key_debug("GL_CLAMP enabled on any texture unit's 2nd coordinate",
373 old_key
->gl_clamp_mask
[1], key
->gl_clamp_mask
[1]);
374 found
|= key_debug("GL_CLAMP enabled on any texture unit's 3rd coordinate",
375 old_key
->gl_clamp_mask
[2], key
->gl_clamp_mask
[2]);
376 found
|= key_debug("GL_MESA_ycbcr texturing\n",
377 old_key
->yuvtex_mask
, key
->yuvtex_mask
);
378 found
|= key_debug("GL_MESA_ycbcr UV swapping\n",
379 old_key
->yuvtex_swap_mask
, key
->yuvtex_swap_mask
);
385 brw_wm_debug_recompile(struct brw_context
*brw
,
386 struct gl_shader_program
*prog
,
387 const struct brw_wm_prog_key
*key
)
389 struct brw_cache_item
*c
= NULL
;
390 const struct brw_wm_prog_key
*old_key
= NULL
;
393 perf_debug("Recompiling fragment shader for program %d\n", prog
->Name
);
395 for (unsigned int i
= 0; i
< brw
->cache
.size
; i
++) {
396 for (c
= brw
->cache
.items
[i
]; c
; c
= c
->next
) {
397 if (c
->cache_id
== BRW_WM_PROG
) {
400 if (old_key
->program_string_id
== key
->program_string_id
)
409 perf_debug(" Didn't find previous compile in the shader cache for "
414 found
|= key_debug("alphatest, computed depth, depth test, or depth write",
415 old_key
->iz_lookup
, key
->iz_lookup
);
416 found
|= key_debug("depth statistics", old_key
->stats_wm
, key
->stats_wm
);
417 found
|= key_debug("flat shading", old_key
->flat_shade
, key
->flat_shade
);
418 found
|= key_debug("number of color buffers", old_key
->nr_color_regions
, key
->nr_color_regions
);
419 found
|= key_debug("rendering to FBO", old_key
->render_to_fbo
, key
->render_to_fbo
);
420 found
|= key_debug("fragment color clamping", old_key
->clamp_fragment_color
, key
->clamp_fragment_color
);
421 found
|= key_debug("line smoothing", old_key
->line_aa
, key
->line_aa
);
422 found
|= key_debug("proj_attrib_mask", old_key
->proj_attrib_mask
, key
->proj_attrib_mask
);
423 found
|= key_debug("renderbuffer height", old_key
->drawable_height
, key
->drawable_height
);
424 found
|= key_debug("vertex shader outputs", old_key
->vp_outputs_written
, key
->vp_outputs_written
);
426 found
|= brw_debug_recompile_sampler_key(&old_key
->tex
, &key
->tex
);
429 perf_debug(" Something else\n");
434 brw_populate_sampler_prog_key_data(struct gl_context
*ctx
,
435 const struct gl_program
*prog
,
436 struct brw_sampler_prog_key_data
*key
)
438 struct intel_context
*intel
= intel_context(ctx
);
440 for (int s
= 0; s
< MAX_SAMPLERS
; s
++) {
441 key
->swizzles
[s
] = SWIZZLE_NOOP
;
443 if (!(prog
->SamplersUsed
& (1 << s
)))
446 int unit_id
= prog
->SamplerUnits
[s
];
447 const struct gl_texture_unit
*unit
= &ctx
->Texture
.Unit
[unit_id
];
449 if (unit
->_ReallyEnabled
&& unit
->_Current
->Target
!= GL_TEXTURE_BUFFER
) {
450 const struct gl_texture_object
*t
= unit
->_Current
;
451 const struct gl_texture_image
*img
= t
->Image
[0][t
->BaseLevel
];
452 struct gl_sampler_object
*sampler
= _mesa_get_samplerobj(ctx
, unit_id
);
454 const bool alpha_depth
= t
->DepthMode
== GL_ALPHA
&&
455 (img
->_BaseFormat
== GL_DEPTH_COMPONENT
||
456 img
->_BaseFormat
== GL_DEPTH_STENCIL
);
458 /* Haswell handles texture swizzling as surface format overrides
459 * (except for GL_ALPHA); all other platforms need MOVs in the shader.
461 if (!intel
->is_haswell
|| alpha_depth
)
462 key
->swizzles
[s
] = brw_get_texture_swizzle(t
);
464 if (img
->InternalFormat
== GL_YCBCR_MESA
) {
465 key
->yuvtex_mask
|= 1 << s
;
466 if (img
->TexFormat
== MESA_FORMAT_YCBCR
)
467 key
->yuvtex_swap_mask
|= 1 << s
;
470 if (sampler
->MinFilter
!= GL_NEAREST
&&
471 sampler
->MagFilter
!= GL_NEAREST
) {
472 if (sampler
->WrapS
== GL_CLAMP
)
473 key
->gl_clamp_mask
[0] |= 1 << s
;
474 if (sampler
->WrapT
== GL_CLAMP
)
475 key
->gl_clamp_mask
[1] |= 1 << s
;
476 if (sampler
->WrapR
== GL_CLAMP
)
477 key
->gl_clamp_mask
[2] |= 1 << s
;
483 static void brw_wm_populate_key( struct brw_context
*brw
,
484 struct brw_wm_prog_key
*key
)
486 struct gl_context
*ctx
= &brw
->intel
.ctx
;
487 struct intel_context
*intel
= &brw
->intel
;
488 /* BRW_NEW_FRAGMENT_PROGRAM */
489 const struct brw_fragment_program
*fp
=
490 (struct brw_fragment_program
*)brw
->fragment_program
;
491 const struct gl_program
*prog
= (struct gl_program
*) brw
->fragment_program
;
494 bool program_uses_dfdy
= fp
->program
.UsesDFdy
;
496 memset(key
, 0, sizeof(*key
));
498 /* Build the index for table lookup
500 if (intel
->gen
< 6) {
502 if (fp
->program
.UsesKill
|| ctx
->Color
.AlphaEnabled
)
503 lookup
|= IZ_PS_KILL_ALPHATEST_BIT
;
505 if (fp
->program
.Base
.OutputsWritten
& BITFIELD64_BIT(FRAG_RESULT_DEPTH
))
506 lookup
|= IZ_PS_COMPUTES_DEPTH_BIT
;
510 lookup
|= IZ_DEPTH_TEST_ENABLE_BIT
;
512 if (ctx
->Depth
.Test
&& ctx
->Depth
.Mask
) /* ?? */
513 lookup
|= IZ_DEPTH_WRITE_ENABLE_BIT
;
516 if (ctx
->Stencil
._Enabled
) {
517 lookup
|= IZ_STENCIL_TEST_ENABLE_BIT
;
519 if (ctx
->Stencil
.WriteMask
[0] ||
520 ctx
->Stencil
.WriteMask
[ctx
->Stencil
._BackFace
])
521 lookup
|= IZ_STENCIL_WRITE_ENABLE_BIT
;
523 key
->iz_lookup
= lookup
;
528 /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
529 if (ctx
->Line
.SmoothFlag
) {
530 if (brw
->intel
.reduced_primitive
== GL_LINES
) {
533 else if (brw
->intel
.reduced_primitive
== GL_TRIANGLES
) {
534 if (ctx
->Polygon
.FrontMode
== GL_LINE
) {
535 line_aa
= AA_SOMETIMES
;
537 if (ctx
->Polygon
.BackMode
== GL_LINE
||
538 (ctx
->Polygon
.CullFlag
&&
539 ctx
->Polygon
.CullFaceMode
== GL_BACK
))
542 else if (ctx
->Polygon
.BackMode
== GL_LINE
) {
543 line_aa
= AA_SOMETIMES
;
545 if ((ctx
->Polygon
.CullFlag
&&
546 ctx
->Polygon
.CullFaceMode
== GL_FRONT
))
552 key
->line_aa
= line_aa
;
555 key
->stats_wm
= brw
->intel
.stats_wm
;
557 /* BRW_NEW_WM_INPUT_DIMENSIONS */
558 /* Only set this for fixed function. The optimization it enables isn't
559 * useful for programs using shaders.
561 if (ctx
->Shader
.CurrentFragmentProgram
)
562 key
->proj_attrib_mask
= 0xffffffff;
564 key
->proj_attrib_mask
= brw
->wm
.input_size_masks
[4-1];
567 key
->flat_shade
= (ctx
->Light
.ShadeModel
== GL_FLAT
);
569 /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */
570 key
->clamp_fragment_color
= ctx
->Color
._ClampFragmentColor
;
573 brw_populate_sampler_prog_key_data(ctx
, prog
, &key
->tex
);
577 * Include the draw buffer origin and height so that we can calculate
578 * fragment position values relative to the bottom left of the drawable,
579 * from the incoming screen origin relative position we get as part of our
582 * This is only needed for the WM_WPOSXY opcode when the fragment program
583 * uses the gl_FragCoord input.
585 * We could avoid recompiling by including this as a constant referenced by
586 * our program, but if we were to do that it would also be nice to handle
587 * getting that constant updated at batchbuffer submit time (when we
588 * hold the lock and know where the buffer really is) rather than at emit
589 * time when we don't hold the lock and are just guessing. We could also
590 * just avoid using this as key data if the program doesn't use
593 * For DRI2 the origin_x/y will always be (0,0) but we still need the
594 * drawable height in order to invert the Y axis.
596 if (fp
->program
.Base
.InputsRead
& FRAG_BIT_WPOS
) {
597 key
->drawable_height
= ctx
->DrawBuffer
->Height
;
600 if ((fp
->program
.Base
.InputsRead
& FRAG_BIT_WPOS
) || program_uses_dfdy
) {
601 key
->render_to_fbo
= _mesa_is_user_fbo(ctx
->DrawBuffer
);
605 key
->nr_color_regions
= ctx
->DrawBuffer
->_NumColorDrawBuffers
;
606 /* _NEW_MULTISAMPLE */
607 key
->sample_alpha_to_coverage
= ctx
->Multisample
.SampleAlphaToCoverage
;
609 /* CACHE_NEW_VS_PROG */
611 key
->vp_outputs_written
= brw
->vs
.prog_data
->outputs_written
;
613 /* The unique fragment program ID */
614 key
->program_string_id
= fp
->id
;
619 brw_upload_wm_prog(struct brw_context
*brw
)
621 struct intel_context
*intel
= &brw
->intel
;
622 struct gl_context
*ctx
= &intel
->ctx
;
623 struct brw_wm_prog_key key
;
624 struct brw_fragment_program
*fp
= (struct brw_fragment_program
*)
625 brw
->fragment_program
;
627 brw_wm_populate_key(brw
, &key
);
629 if (!brw_search_cache(&brw
->cache
, BRW_WM_PROG
,
631 &brw
->wm
.prog_offset
, &brw
->wm
.prog_data
)) {
632 bool success
= do_wm_prog(brw
, ctx
->Shader
._CurrentFragmentProgram
, fp
,
640 const struct brw_tracked_state brw_wm_prog
= {
642 .mesa
= (_NEW_COLOR
|
652 .brw
= (BRW_NEW_FRAGMENT_PROGRAM
|
653 BRW_NEW_WM_INPUT_DIMENSIONS
|
654 BRW_NEW_REDUCED_PRIMITIVE
),
655 .cache
= CACHE_NEW_VS_PROG
,
657 .emit
= brw_upload_wm_prog