2 * Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 * Intel funded Tungsten Graphics to
4 * develop this 3D driver.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 #include "brw_context.h"
28 #include "brw_state.h"
29 #include "brw_shader.h"
30 #include "main/enums.h"
31 #include "main/formats.h"
32 #include "main/fbobject.h"
33 #include "main/samplerobj.h"
34 #include "main/framebuffer.h"
35 #include "program/prog_parameter.h"
36 #include "program/program.h"
37 #include "intel_mipmap_tree.h"
39 #include "util/ralloc.h"
42 * Return a bitfield where bit n is set if barycentric interpolation mode n
43 * (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader.
46 brw_compute_barycentric_interp_modes(struct brw_context
*brw
,
47 bool shade_model_flat
,
48 bool persample_shading
,
49 const struct gl_fragment_program
*fprog
)
51 unsigned barycentric_interp_modes
= 0;
54 /* Loop through all fragment shader inputs to figure out what interpolation
55 * modes are in use, and set the appropriate bits in
56 * barycentric_interp_modes.
58 for (attr
= 0; attr
< VARYING_SLOT_MAX
; ++attr
) {
59 enum glsl_interp_qualifier interp_qualifier
=
60 fprog
->InterpQualifier
[attr
];
61 bool is_centroid
= (fprog
->IsCentroid
& BITFIELD64_BIT(attr
)) &&
63 bool is_sample
= (fprog
->IsSample
& BITFIELD64_BIT(attr
)) ||
65 bool is_gl_Color
= attr
== VARYING_SLOT_COL0
|| attr
== VARYING_SLOT_COL1
;
67 /* Ignore unused inputs. */
68 if (!(fprog
->Base
.InputsRead
& BITFIELD64_BIT(attr
)))
71 /* Ignore WPOS and FACE, because they don't require interpolation. */
72 if (attr
== VARYING_SLOT_POS
|| attr
== VARYING_SLOT_FACE
)
75 /* Determine the set (or sets) of barycentric coordinates needed to
76 * interpolate this variable. Note that when
77 * brw->needs_unlit_centroid_workaround is set, centroid interpolation
78 * uses PIXEL interpolation for unlit pixels and CENTROID interpolation
79 * for lit pixels, so we need both sets of barycentric coordinates.
81 if (interp_qualifier
== INTERP_QUALIFIER_NOPERSPECTIVE
) {
83 barycentric_interp_modes
|=
84 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC
;
85 } else if (is_sample
) {
86 barycentric_interp_modes
|=
87 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC
;
89 if ((!is_centroid
&& !is_sample
) ||
90 brw
->needs_unlit_centroid_workaround
) {
91 barycentric_interp_modes
|=
92 1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC
;
94 } else if (interp_qualifier
== INTERP_QUALIFIER_SMOOTH
||
95 (!(shade_model_flat
&& is_gl_Color
) &&
96 interp_qualifier
== INTERP_QUALIFIER_NONE
)) {
98 barycentric_interp_modes
|=
99 1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC
;
100 } else if (is_sample
) {
101 barycentric_interp_modes
|=
102 1 << BRW_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC
;
104 if ((!is_centroid
&& !is_sample
) ||
105 brw
->needs_unlit_centroid_workaround
) {
106 barycentric_interp_modes
|=
107 1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC
;
112 return barycentric_interp_modes
;
116 computed_depth_mode(struct gl_fragment_program
*fp
)
118 if (fp
->Base
.OutputsWritten
& BITFIELD64_BIT(FRAG_RESULT_DEPTH
)) {
119 switch (fp
->FragDepthLayout
) {
120 case FRAG_DEPTH_LAYOUT_NONE
:
121 case FRAG_DEPTH_LAYOUT_ANY
:
122 return BRW_PSCDEPTH_ON
;
123 case FRAG_DEPTH_LAYOUT_GREATER
:
124 return BRW_PSCDEPTH_ON_GE
;
125 case FRAG_DEPTH_LAYOUT_LESS
:
126 return BRW_PSCDEPTH_ON_LE
;
127 case FRAG_DEPTH_LAYOUT_UNCHANGED
:
128 return BRW_PSCDEPTH_OFF
;
131 return BRW_PSCDEPTH_OFF
;
135 brw_wm_prog_data_compare(const void *in_a
, const void *in_b
)
137 const struct brw_wm_prog_data
*a
= in_a
;
138 const struct brw_wm_prog_data
*b
= in_b
;
140 /* Compare the base structure. */
141 if (!brw_stage_prog_data_compare(&a
->base
, &b
->base
))
144 /* Compare the rest of the structure. */
145 const unsigned offset
= sizeof(struct brw_stage_prog_data
);
146 if (memcmp(((char *) a
) + offset
, ((char *) b
) + offset
,
147 sizeof(struct brw_wm_prog_data
) - offset
))
154 * All Mesa program -> GPU code generation goes through this function.
155 * Depending on the instructions used (i.e. flow control instructions)
156 * we'll use one of two code generators.
159 brw_codegen_wm_prog(struct brw_context
*brw
,
160 struct gl_shader_program
*prog
,
161 struct brw_fragment_program
*fp
,
162 struct brw_wm_prog_key
*key
)
164 struct gl_context
*ctx
= &brw
->ctx
;
165 void *mem_ctx
= ralloc_context(NULL
);
166 struct brw_wm_prog_data prog_data
;
167 const GLuint
*program
;
168 struct gl_shader
*fs
= NULL
;
172 fs
= prog
->_LinkedShaders
[MESA_SHADER_FRAGMENT
];
174 memset(&prog_data
, 0, sizeof(prog_data
));
175 /* key->alpha_test_func means simulating alpha testing via discards,
176 * so the shader definitely kills pixels.
178 prog_data
.uses_kill
= fp
->program
.UsesKill
|| key
->alpha_test_func
;
179 prog_data
.uses_omask
=
180 fp
->program
.Base
.OutputsWritten
& BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK
);
181 prog_data
.computed_depth_mode
= computed_depth_mode(&fp
->program
);
183 prog_data
.early_fragment_tests
= fs
&& fs
->EarlyFragmentTests
;
185 /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
187 prog_data
.base
.use_alt_mode
= true;
189 /* Allocate the references to the uniforms that will end up in the
190 * prog_data associated with the compiled program, and which will be freed
191 * by the state cache.
195 param_count
= fs
->num_uniform_components
+
196 fs
->NumImages
* BRW_IMAGE_PARAM_SIZE
;
197 prog_data
.base
.nr_image_params
= fs
->NumImages
;
199 param_count
= fp
->program
.Base
.Parameters
->NumParameters
* 4;
201 /* The backend also sometimes adds params for texture size. */
202 param_count
+= 2 * ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxTextureImageUnits
;
203 prog_data
.base
.param
=
204 rzalloc_array(NULL
, const gl_constant_value
*, param_count
);
205 prog_data
.base
.pull_param
=
206 rzalloc_array(NULL
, const gl_constant_value
*, param_count
);
207 prog_data
.base
.image_param
=
208 rzalloc_array(NULL
, struct brw_image_param
,
209 prog_data
.base
.nr_image_params
);
210 prog_data
.base
.nr_params
= param_count
;
212 prog_data
.barycentric_interp_modes
=
213 brw_compute_barycentric_interp_modes(brw
, key
->flat_shade
,
214 key
->persample_shading
,
217 program
= brw_wm_fs_emit(brw
, mem_ctx
, key
, &prog_data
,
218 &fp
->program
, prog
, &program_size
);
219 if (program
== NULL
) {
220 ralloc_free(mem_ctx
);
224 if (prog_data
.base
.total_scratch
) {
225 brw_get_scratch_bo(brw
, &brw
->wm
.base
.scratch_bo
,
226 prog_data
.base
.total_scratch
* brw
->max_wm_threads
);
229 if (unlikely(INTEL_DEBUG
& DEBUG_WM
))
230 fprintf(stderr
, "\n");
232 brw_upload_cache(&brw
->cache
, BRW_CACHE_FS_PROG
,
233 key
, sizeof(struct brw_wm_prog_key
),
234 program
, program_size
,
235 &prog_data
, sizeof(prog_data
),
236 &brw
->wm
.base
.prog_offset
, &brw
->wm
.prog_data
);
238 ralloc_free(mem_ctx
);
244 key_debug(struct brw_context
*brw
, const char *name
, int a
, int b
)
247 perf_debug(" %s %d->%d\n", name
, a
, b
);
255 brw_debug_recompile_sampler_key(struct brw_context
*brw
,
256 const struct brw_sampler_prog_key_data
*old_key
,
257 const struct brw_sampler_prog_key_data
*key
)
261 for (unsigned int i
= 0; i
< MAX_SAMPLERS
; i
++) {
262 found
|= key_debug(brw
, "EXT_texture_swizzle or DEPTH_TEXTURE_MODE",
263 old_key
->swizzles
[i
], key
->swizzles
[i
]);
265 found
|= key_debug(brw
, "GL_CLAMP enabled on any texture unit's 1st coordinate",
266 old_key
->gl_clamp_mask
[0], key
->gl_clamp_mask
[0]);
267 found
|= key_debug(brw
, "GL_CLAMP enabled on any texture unit's 2nd coordinate",
268 old_key
->gl_clamp_mask
[1], key
->gl_clamp_mask
[1]);
269 found
|= key_debug(brw
, "GL_CLAMP enabled on any texture unit's 3rd coordinate",
270 old_key
->gl_clamp_mask
[2], key
->gl_clamp_mask
[2]);
271 found
|= key_debug(brw
, "gather channel quirk on any texture unit",
272 old_key
->gather_channel_quirk_mask
, key
->gather_channel_quirk_mask
);
273 found
|= key_debug(brw
, "compressed multisample layout",
274 old_key
->compressed_multisample_layout_mask
,
275 key
->compressed_multisample_layout_mask
);
277 for (unsigned int i
= 0; i
< MAX_SAMPLERS
; i
++) {
278 found
|= key_debug(brw
, "textureGather workarounds",
279 old_key
->gen6_gather_wa
[i
], key
->gen6_gather_wa
[i
]);
286 brw_wm_debug_recompile(struct brw_context
*brw
,
287 struct gl_shader_program
*prog
,
288 const struct brw_wm_prog_key
*key
)
290 struct brw_cache_item
*c
= NULL
;
291 const struct brw_wm_prog_key
*old_key
= NULL
;
294 perf_debug("Recompiling fragment shader for program %d\n", prog
->Name
);
296 for (unsigned int i
= 0; i
< brw
->cache
.size
; i
++) {
297 for (c
= brw
->cache
.items
[i
]; c
; c
= c
->next
) {
298 if (c
->cache_id
== BRW_CACHE_FS_PROG
) {
301 if (old_key
->program_string_id
== key
->program_string_id
)
310 perf_debug(" Didn't find previous compile in the shader cache for debug\n");
314 found
|= key_debug(brw
, "alphatest, computed depth, depth test, or "
316 old_key
->iz_lookup
, key
->iz_lookup
);
317 found
|= key_debug(brw
, "depth statistics",
318 old_key
->stats_wm
, key
->stats_wm
);
319 found
|= key_debug(brw
, "flat shading",
320 old_key
->flat_shade
, key
->flat_shade
);
321 found
|= key_debug(brw
, "per-sample shading",
322 old_key
->persample_shading
, key
->persample_shading
);
323 found
|= key_debug(brw
, "per-sample shading and 2x MSAA",
324 old_key
->persample_2x
, key
->persample_2x
);
325 found
|= key_debug(brw
, "number of color buffers",
326 old_key
->nr_color_regions
, key
->nr_color_regions
);
327 found
|= key_debug(brw
, "MRT alpha test or alpha-to-coverage",
328 old_key
->replicate_alpha
, key
->replicate_alpha
);
329 found
|= key_debug(brw
, "rendering to FBO",
330 old_key
->render_to_fbo
, key
->render_to_fbo
);
331 found
|= key_debug(brw
, "fragment color clamping",
332 old_key
->clamp_fragment_color
, key
->clamp_fragment_color
);
333 found
|= key_debug(brw
, "line smoothing",
334 old_key
->line_aa
, key
->line_aa
);
335 found
|= key_debug(brw
, "renderbuffer height",
336 old_key
->drawable_height
, key
->drawable_height
);
337 found
|= key_debug(brw
, "input slots valid",
338 old_key
->input_slots_valid
, key
->input_slots_valid
);
339 found
|= key_debug(brw
, "mrt alpha test function",
340 old_key
->alpha_test_func
, key
->alpha_test_func
);
341 found
|= key_debug(brw
, "mrt alpha test reference value",
342 old_key
->alpha_test_ref
, key
->alpha_test_ref
);
344 found
|= brw_debug_recompile_sampler_key(brw
, &old_key
->tex
, &key
->tex
);
347 perf_debug(" Something else\n");
352 gen6_gather_workaround(GLenum internalformat
)
354 switch (internalformat
) {
355 case GL_R8I
: return WA_SIGN
| WA_8BIT
;
356 case GL_R8UI
: return WA_8BIT
;
357 case GL_R16I
: return WA_SIGN
| WA_16BIT
;
358 case GL_R16UI
: return WA_16BIT
;
360 /* Note that even though GL_R32I and GL_R32UI have format overrides in
361 * the surface state, there is no shader w/a required.
368 brw_populate_sampler_prog_key_data(struct gl_context
*ctx
,
369 const struct gl_program
*prog
,
370 unsigned sampler_count
,
371 struct brw_sampler_prog_key_data
*key
)
373 struct brw_context
*brw
= brw_context(ctx
);
375 for (int s
= 0; s
< sampler_count
; s
++) {
376 key
->swizzles
[s
] = SWIZZLE_NOOP
;
378 if (!(prog
->SamplersUsed
& (1 << s
)))
381 int unit_id
= prog
->SamplerUnits
[s
];
382 const struct gl_texture_unit
*unit
= &ctx
->Texture
.Unit
[unit_id
];
384 if (unit
->_Current
&& unit
->_Current
->Target
!= GL_TEXTURE_BUFFER
) {
385 const struct gl_texture_object
*t
= unit
->_Current
;
386 const struct gl_texture_image
*img
= t
->Image
[0][t
->BaseLevel
];
387 struct gl_sampler_object
*sampler
= _mesa_get_samplerobj(ctx
, unit_id
);
389 const bool alpha_depth
= t
->DepthMode
== GL_ALPHA
&&
390 (img
->_BaseFormat
== GL_DEPTH_COMPONENT
||
391 img
->_BaseFormat
== GL_DEPTH_STENCIL
);
393 /* Haswell handles texture swizzling as surface format overrides
394 * (except for GL_ALPHA); all other platforms need MOVs in the shader.
396 if (alpha_depth
|| (brw
->gen
< 8 && !brw
->is_haswell
))
397 key
->swizzles
[s
] = brw_get_texture_swizzle(ctx
, t
);
400 sampler
->MinFilter
!= GL_NEAREST
&&
401 sampler
->MagFilter
!= GL_NEAREST
) {
402 if (sampler
->WrapS
== GL_CLAMP
)
403 key
->gl_clamp_mask
[0] |= 1 << s
;
404 if (sampler
->WrapT
== GL_CLAMP
)
405 key
->gl_clamp_mask
[1] |= 1 << s
;
406 if (sampler
->WrapR
== GL_CLAMP
)
407 key
->gl_clamp_mask
[2] |= 1 << s
;
410 /* gather4's channel select for green from RG32F is broken; requires
411 * a shader w/a on IVB; fixable with just SCS on HSW.
413 if (brw
->gen
== 7 && !brw
->is_haswell
&& prog
->UsesGather
) {
414 if (img
->InternalFormat
== GL_RG32F
)
415 key
->gather_channel_quirk_mask
|= 1 << s
;
418 /* Gen6's gather4 is broken for UINT/SINT; we treat them as
419 * UNORM/FLOAT instead and fix it in the shader.
421 if (brw
->gen
== 6 && prog
->UsesGather
) {
422 key
->gen6_gather_wa
[s
] = gen6_gather_workaround(img
->InternalFormat
);
425 /* If this is a multisample sampler, and uses the CMS MSAA layout,
426 * then we need to emit slightly different code to first sample the
429 struct intel_texture_object
*intel_tex
=
430 intel_texture_object((struct gl_texture_object
*)t
);
433 intel_tex
->mt
->msaa_layout
== INTEL_MSAA_LAYOUT_CMS
) {
434 key
->compressed_multisample_layout_mask
|= 1 << s
;
441 brw_wm_state_dirty (struct brw_context
*brw
)
443 return brw_state_dirty(brw
,
455 BRW_NEW_FRAGMENT_PROGRAM
|
456 BRW_NEW_REDUCED_PRIMITIVE
|
458 BRW_NEW_VUE_MAP_GEOM_OUT
);
462 brw_wm_populate_key(struct brw_context
*brw
, struct brw_wm_prog_key
*key
)
464 struct gl_context
*ctx
= &brw
->ctx
;
465 /* BRW_NEW_FRAGMENT_PROGRAM */
466 const struct brw_fragment_program
*fp
=
467 (struct brw_fragment_program
*) brw
->fragment_program
;
468 const struct gl_program
*prog
= (struct gl_program
*) brw
->fragment_program
;
471 bool program_uses_dfdy
= fp
->program
.UsesDFdy
;
472 const bool multisample_fbo
= _mesa_geometric_samples(ctx
->DrawBuffer
) > 1;
474 memset(key
, 0, sizeof(*key
));
476 /* Build the index for table lookup
480 if (fp
->program
.UsesKill
|| ctx
->Color
.AlphaEnabled
)
481 lookup
|= IZ_PS_KILL_ALPHATEST_BIT
;
483 if (fp
->program
.Base
.OutputsWritten
& BITFIELD64_BIT(FRAG_RESULT_DEPTH
))
484 lookup
|= IZ_PS_COMPUTES_DEPTH_BIT
;
488 lookup
|= IZ_DEPTH_TEST_ENABLE_BIT
;
490 if (ctx
->Depth
.Test
&& ctx
->Depth
.Mask
) /* ?? */
491 lookup
|= IZ_DEPTH_WRITE_ENABLE_BIT
;
493 /* _NEW_STENCIL | _NEW_BUFFERS */
494 if (ctx
->Stencil
._Enabled
) {
495 lookup
|= IZ_STENCIL_TEST_ENABLE_BIT
;
497 if (ctx
->Stencil
.WriteMask
[0] ||
498 ctx
->Stencil
.WriteMask
[ctx
->Stencil
._BackFace
])
499 lookup
|= IZ_STENCIL_WRITE_ENABLE_BIT
;
501 key
->iz_lookup
= lookup
;
506 /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
507 if (ctx
->Line
.SmoothFlag
) {
508 if (brw
->reduced_primitive
== GL_LINES
) {
511 else if (brw
->reduced_primitive
== GL_TRIANGLES
) {
512 if (ctx
->Polygon
.FrontMode
== GL_LINE
) {
513 line_aa
= AA_SOMETIMES
;
515 if (ctx
->Polygon
.BackMode
== GL_LINE
||
516 (ctx
->Polygon
.CullFlag
&&
517 ctx
->Polygon
.CullFaceMode
== GL_BACK
))
520 else if (ctx
->Polygon
.BackMode
== GL_LINE
) {
521 line_aa
= AA_SOMETIMES
;
523 if ((ctx
->Polygon
.CullFlag
&&
524 ctx
->Polygon
.CullFaceMode
== GL_FRONT
))
530 key
->line_aa
= line_aa
;
533 key
->high_quality_derivatives
=
534 ctx
->Hint
.FragmentShaderDerivative
== GL_NICEST
;
537 key
->stats_wm
= brw
->stats_wm
;
540 key
->flat_shade
= (ctx
->Light
.ShadeModel
== GL_FLAT
);
542 /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */
543 key
->clamp_fragment_color
= ctx
->Color
._ClampFragmentColor
;
546 brw_populate_sampler_prog_key_data(ctx
, prog
, brw
->wm
.base
.sampler_count
,
551 * Include the draw buffer origin and height so that we can calculate
552 * fragment position values relative to the bottom left of the drawable,
553 * from the incoming screen origin relative position we get as part of our
556 * This is only needed for the WM_WPOSXY opcode when the fragment program
557 * uses the gl_FragCoord input.
559 * We could avoid recompiling by including this as a constant referenced by
560 * our program, but if we were to do that it would also be nice to handle
561 * getting that constant updated at batchbuffer submit time (when we
562 * hold the lock and know where the buffer really is) rather than at emit
563 * time when we don't hold the lock and are just guessing. We could also
564 * just avoid using this as key data if the program doesn't use
567 * For DRI2 the origin_x/y will always be (0,0) but we still need the
568 * drawable height in order to invert the Y axis.
570 if (fp
->program
.Base
.InputsRead
& VARYING_BIT_POS
) {
571 key
->drawable_height
= _mesa_geometric_height(ctx
->DrawBuffer
);
574 if ((fp
->program
.Base
.InputsRead
& VARYING_BIT_POS
) || program_uses_dfdy
) {
575 key
->render_to_fbo
= _mesa_is_user_fbo(ctx
->DrawBuffer
);
579 key
->nr_color_regions
= ctx
->DrawBuffer
->_NumColorDrawBuffers
;
581 /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */
582 key
->replicate_alpha
= ctx
->DrawBuffer
->_NumColorDrawBuffers
> 1 &&
583 (ctx
->Multisample
.SampleAlphaToCoverage
|| ctx
->Color
.AlphaEnabled
);
585 /* _NEW_BUFFERS _NEW_MULTISAMPLE */
586 /* Ignore sample qualifier while computing this flag. */
587 key
->persample_shading
=
588 _mesa_get_min_invocations_per_fragment(ctx
, &fp
->program
, true) > 1;
589 if (key
->persample_shading
)
590 key
->persample_2x
= _mesa_geometric_samples(ctx
->DrawBuffer
) == 2;
592 key
->compute_pos_offset
=
593 _mesa_get_min_invocations_per_fragment(ctx
, &fp
->program
, false) > 1 &&
594 fp
->program
.Base
.SystemValuesRead
& SYSTEM_BIT_SAMPLE_POS
;
596 key
->compute_sample_id
=
598 ctx
->Multisample
.Enabled
&&
599 (fp
->program
.Base
.SystemValuesRead
& SYSTEM_BIT_SAMPLE_ID
);
601 /* BRW_NEW_VUE_MAP_GEOM_OUT */
602 if (brw
->gen
< 6 || _mesa_bitcount_64(fp
->program
.Base
.InputsRead
&
603 BRW_FS_VARYING_INPUT_MASK
) > 16)
604 key
->input_slots_valid
= brw
->vue_map_geom_out
.slots_valid
;
607 /* _NEW_COLOR | _NEW_BUFFERS */
608 /* Pre-gen6, the hardware alpha test always used each render
609 * target's alpha to do alpha test, as opposed to render target 0's alpha
610 * like GL requires. Fix that by building the alpha test into the
611 * shader, and we'll skip enabling the fixed function alpha test.
613 if (brw
->gen
< 6 && ctx
->DrawBuffer
->_NumColorDrawBuffers
> 1 &&
614 ctx
->Color
.AlphaEnabled
) {
615 key
->alpha_test_func
= ctx
->Color
.AlphaFunc
;
616 key
->alpha_test_ref
= ctx
->Color
.AlphaRef
;
619 /* The unique fragment program ID */
620 key
->program_string_id
= fp
->id
;
624 brw_upload_wm_prog(struct brw_context
*brw
)
626 struct gl_context
*ctx
= &brw
->ctx
;
627 struct gl_shader_program
*current
= ctx
->_Shader
->_CurrentFragmentProgram
;
628 struct brw_wm_prog_key key
;
629 struct brw_fragment_program
*fp
= (struct brw_fragment_program
*)
630 brw
->fragment_program
;
632 if (!brw_wm_state_dirty(brw
))
635 brw_wm_populate_key(brw
, &key
);
637 if (!brw_search_cache(&brw
->cache
, BRW_CACHE_FS_PROG
,
639 &brw
->wm
.base
.prog_offset
, &brw
->wm
.prog_data
)) {
640 bool success
= brw_codegen_wm_prog(brw
, current
, fp
, &key
);
644 brw
->wm
.base
.prog_data
= &brw
->wm
.prog_data
->base
;
648 brw_fs_precompile(struct gl_context
*ctx
,
649 struct gl_shader_program
*shader_prog
,
650 struct gl_program
*prog
)
652 struct brw_context
*brw
= brw_context(ctx
);
653 struct brw_wm_prog_key key
;
655 struct gl_fragment_program
*fp
= (struct gl_fragment_program
*) prog
;
656 struct brw_fragment_program
*bfp
= brw_fragment_program(fp
);
657 bool program_uses_dfdy
= fp
->UsesDFdy
;
659 memset(&key
, 0, sizeof(key
));
663 key
.iz_lookup
|= IZ_PS_KILL_ALPHATEST_BIT
;
665 if (fp
->Base
.OutputsWritten
& BITFIELD64_BIT(FRAG_RESULT_DEPTH
))
666 key
.iz_lookup
|= IZ_PS_COMPUTES_DEPTH_BIT
;
668 /* Just assume depth testing. */
669 key
.iz_lookup
|= IZ_DEPTH_TEST_ENABLE_BIT
;
670 key
.iz_lookup
|= IZ_DEPTH_WRITE_ENABLE_BIT
;
673 if (brw
->gen
< 6 || _mesa_bitcount_64(fp
->Base
.InputsRead
&
674 BRW_FS_VARYING_INPUT_MASK
) > 16)
675 key
.input_slots_valid
= fp
->Base
.InputsRead
| VARYING_BIT_POS
;
677 brw_setup_tex_for_precompile(brw
, &key
.tex
, &fp
->Base
);
679 if (fp
->Base
.InputsRead
& VARYING_BIT_POS
) {
680 key
.drawable_height
= ctx
->DrawBuffer
->Height
;
683 key
.nr_color_regions
= _mesa_bitcount_64(fp
->Base
.OutputsWritten
&
684 ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH
) |
685 BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK
)));
687 if ((fp
->Base
.InputsRead
& VARYING_BIT_POS
) || program_uses_dfdy
) {
688 key
.render_to_fbo
= _mesa_is_user_fbo(ctx
->DrawBuffer
) ||
689 key
.nr_color_regions
> 1;
692 key
.program_string_id
= bfp
->id
;
694 uint32_t old_prog_offset
= brw
->wm
.base
.prog_offset
;
695 struct brw_wm_prog_data
*old_prog_data
= brw
->wm
.prog_data
;
697 bool success
= brw_codegen_wm_prog(brw
, shader_prog
, bfp
, &key
);
699 brw
->wm
.base
.prog_offset
= old_prog_offset
;
700 brw
->wm
.prog_data
= old_prog_data
;