2 * Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 * Intel funded Tungsten Graphics to
4 * develop this 3D driver.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 #include "brw_context.h"
28 #include "brw_state.h"
29 #include "main/enums.h"
30 #include "main/formats.h"
31 #include "main/fbobject.h"
32 #include "main/samplerobj.h"
33 #include "main/framebuffer.h"
34 #include "program/prog_parameter.h"
35 #include "program/program.h"
36 #include "intel_mipmap_tree.h"
37 #include "intel_image.h"
38 #include "intel_fbo.h"
39 #include "compiler/brw_nir.h"
40 #include "brw_program.h"
42 #include "util/ralloc.h"
43 #include "util/u_math.h"
46 assign_fs_binding_table_offsets(const struct gen_device_info
*devinfo
,
47 const struct gl_program
*prog
,
48 const struct brw_wm_prog_key
*key
,
49 struct brw_wm_prog_data
*prog_data
)
51 /* Render targets implicitly start at surface index 0. Even if there are
52 * no color regions, we still perform an FB write to a null render target,
53 * which will be surface 0.
55 uint32_t next_binding_table_offset
= MAX2(key
->nr_color_regions
, 1);
57 next_binding_table_offset
=
58 brw_assign_common_binding_table_offsets(devinfo
, prog
, &prog_data
->base
,
59 next_binding_table_offset
);
61 if (prog
->nir
->info
.outputs_read
&& !key
->coherent_fb_fetch
) {
62 prog_data
->binding_table
.render_target_read_start
=
63 next_binding_table_offset
;
64 next_binding_table_offset
+= key
->nr_color_regions
;
67 /* Update the binding table size */
68 prog_data
->base
.binding_table
.size_bytes
= next_binding_table_offset
* 4;
72 brw_wm_debug_recompile(struct brw_context
*brw
, struct gl_program
*prog
,
73 const struct brw_wm_prog_key
*key
)
75 perf_debug("Recompiling fragment shader for program %d\n", prog
->Id
);
78 const struct brw_wm_prog_key
*old_key
=
79 brw_find_previous_compile(&brw
->cache
, BRW_CACHE_FS_PROG
,
80 key
->program_string_id
);
83 perf_debug(" Didn't find previous compile in the shader cache for debug\n");
87 found
|= key_debug(brw
, "alphatest, computed depth, depth test, or "
89 old_key
->iz_lookup
, key
->iz_lookup
);
90 found
|= key_debug(brw
, "depth statistics",
91 old_key
->stats_wm
, key
->stats_wm
);
92 found
|= key_debug(brw
, "flat shading",
93 old_key
->flat_shade
, key
->flat_shade
);
94 found
|= key_debug(brw
, "number of color buffers",
95 old_key
->nr_color_regions
, key
->nr_color_regions
);
96 found
|= key_debug(brw
, "MRT alpha test or alpha-to-coverage",
97 old_key
->replicate_alpha
, key
->replicate_alpha
);
98 found
|= key_debug(brw
, "fragment color clamping",
99 old_key
->clamp_fragment_color
, key
->clamp_fragment_color
);
100 found
|= key_debug(brw
, "per-sample interpolation",
101 old_key
->persample_interp
, key
->persample_interp
);
102 found
|= key_debug(brw
, "multisampled FBO",
103 old_key
->multisample_fbo
, key
->multisample_fbo
);
104 found
|= key_debug(brw
, "frag coord adds sample pos",
105 old_key
->frag_coord_adds_sample_pos
,
106 key
->frag_coord_adds_sample_pos
);
107 found
|= key_debug(brw
, "line smoothing",
108 old_key
->line_aa
, key
->line_aa
);
109 found
|= key_debug(brw
, "high quality derivatives",
110 old_key
->high_quality_derivatives
,
111 key
->high_quality_derivatives
);
112 found
|= key_debug(brw
, "force dual color blending",
113 old_key
->force_dual_color_blend
,
114 key
->force_dual_color_blend
);
115 found
|= key_debug(brw
, "coherent fb fetch",
116 old_key
->coherent_fb_fetch
, key
->coherent_fb_fetch
);
118 found
|= key_debug(brw
, "input slots valid",
119 old_key
->input_slots_valid
, key
->input_slots_valid
);
120 found
|= key_debug(brw
, "mrt alpha test function",
121 old_key
->alpha_test_func
, key
->alpha_test_func
);
122 found
|= key_debug(brw
, "mrt alpha test reference value",
123 old_key
->alpha_test_ref
, key
->alpha_test_ref
);
125 found
|= brw_debug_recompile_sampler_key(brw
, &old_key
->tex
, &key
->tex
);
128 perf_debug(" Something else\n");
133 brw_codegen_wm_prog(struct brw_context
*brw
,
134 struct brw_program
*fp
,
135 struct brw_wm_prog_key
*key
,
136 struct brw_vue_map
*vue_map
)
138 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
139 void *mem_ctx
= ralloc_context(NULL
);
140 struct brw_wm_prog_data prog_data
;
141 const GLuint
*program
;
142 bool start_busy
= false;
143 double start_time
= 0;
145 nir_shader
*nir
= nir_shader_clone(mem_ctx
, fp
->program
.nir
);
147 memset(&prog_data
, 0, sizeof(prog_data
));
149 /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
150 if (fp
->program
.is_arb_asm
)
151 prog_data
.base
.use_alt_mode
= true;
153 assign_fs_binding_table_offsets(devinfo
, &fp
->program
, key
, &prog_data
);
155 if (!fp
->program
.is_arb_asm
) {
156 brw_nir_setup_glsl_uniforms(mem_ctx
, nir
, &fp
->program
,
157 &prog_data
.base
, true);
158 brw_nir_analyze_ubo_ranges(brw
->screen
->compiler
, nir
,
159 NULL
, prog_data
.base
.ubo_ranges
);
161 brw_nir_setup_arb_uniforms(mem_ctx
, nir
, &fp
->program
, &prog_data
.base
);
163 if (unlikely(INTEL_DEBUG
& DEBUG_WM
))
164 brw_dump_arb_asm("fragment", &fp
->program
);
167 if (unlikely(brw
->perf_debug
)) {
168 start_busy
= (brw
->batch
.last_bo
&&
169 brw_bo_busy(brw
->batch
.last_bo
));
170 start_time
= get_time();
173 int st_index8
= -1, st_index16
= -1, st_index32
= -1;
174 if (INTEL_DEBUG
& DEBUG_SHADER_TIME
) {
175 st_index8
= brw_get_shader_time_index(brw
, &fp
->program
, ST_FS8
,
176 !fp
->program
.is_arb_asm
);
177 st_index16
= brw_get_shader_time_index(brw
, &fp
->program
, ST_FS16
,
178 !fp
->program
.is_arb_asm
);
179 st_index32
= brw_get_shader_time_index(brw
, &fp
->program
, ST_FS32
,
180 !fp
->program
.is_arb_asm
);
183 char *error_str
= NULL
;
184 program
= brw_compile_fs(brw
->screen
->compiler
, brw
, mem_ctx
,
185 key
, &prog_data
, nir
,
186 &fp
->program
, st_index8
, st_index16
, st_index32
,
187 true, false, vue_map
,
190 if (program
== NULL
) {
191 if (!fp
->program
.is_arb_asm
) {
192 fp
->program
.sh
.data
->LinkStatus
= LINKING_FAILURE
;
193 ralloc_strcat(&fp
->program
.sh
.data
->InfoLog
, error_str
);
196 _mesa_problem(NULL
, "Failed to compile fragment shader: %s\n", error_str
);
198 ralloc_free(mem_ctx
);
202 if (unlikely(brw
->perf_debug
)) {
203 if (fp
->compiled_once
)
204 brw_wm_debug_recompile(brw
, &fp
->program
, key
);
205 fp
->compiled_once
= true;
207 if (start_busy
&& !brw_bo_busy(brw
->batch
.last_bo
)) {
208 perf_debug("FS compile took %.03f ms and stalled the GPU\n",
209 (get_time() - start_time
) * 1000);
213 brw_alloc_stage_scratch(brw
, &brw
->wm
.base
, prog_data
.base
.total_scratch
);
215 if (unlikely((INTEL_DEBUG
& DEBUG_WM
) && fp
->program
.is_arb_asm
))
216 fprintf(stderr
, "\n");
218 /* The param and pull_param arrays will be freed by the shader cache. */
219 ralloc_steal(NULL
, prog_data
.base
.param
);
220 ralloc_steal(NULL
, prog_data
.base
.pull_param
);
221 brw_upload_cache(&brw
->cache
, BRW_CACHE_FS_PROG
,
222 key
, sizeof(struct brw_wm_prog_key
),
223 program
, prog_data
.base
.program_size
,
224 &prog_data
, sizeof(prog_data
),
225 &brw
->wm
.base
.prog_offset
, &brw
->wm
.base
.prog_data
);
227 ralloc_free(mem_ctx
);
233 brw_debug_recompile_sampler_key(struct brw_context
*brw
,
234 const struct brw_sampler_prog_key_data
*old_key
,
235 const struct brw_sampler_prog_key_data
*key
)
239 for (unsigned int i
= 0; i
< MAX_SAMPLERS
; i
++) {
240 found
|= key_debug(brw
, "EXT_texture_swizzle or DEPTH_TEXTURE_MODE",
241 old_key
->swizzles
[i
], key
->swizzles
[i
]);
243 found
|= key_debug(brw
, "GL_CLAMP enabled on any texture unit's 1st coordinate",
244 old_key
->gl_clamp_mask
[0], key
->gl_clamp_mask
[0]);
245 found
|= key_debug(brw
, "GL_CLAMP enabled on any texture unit's 2nd coordinate",
246 old_key
->gl_clamp_mask
[1], key
->gl_clamp_mask
[1]);
247 found
|= key_debug(brw
, "GL_CLAMP enabled on any texture unit's 3rd coordinate",
248 old_key
->gl_clamp_mask
[2], key
->gl_clamp_mask
[2]);
249 found
|= key_debug(brw
, "gather channel quirk on any texture unit",
250 old_key
->gather_channel_quirk_mask
, key
->gather_channel_quirk_mask
);
251 found
|= key_debug(brw
, "compressed multisample layout",
252 old_key
->compressed_multisample_layout_mask
,
253 key
->compressed_multisample_layout_mask
);
254 found
|= key_debug(brw
, "16x msaa",
258 found
|= key_debug(brw
, "y_uv image bound",
259 old_key
->y_uv_image_mask
,
260 key
->y_uv_image_mask
);
261 found
|= key_debug(brw
, "y_u_v image bound",
262 old_key
->y_u_v_image_mask
,
263 key
->y_u_v_image_mask
);
264 found
|= key_debug(brw
, "yx_xuxv image bound",
265 old_key
->yx_xuxv_image_mask
,
266 key
->yx_xuxv_image_mask
);
267 found
|= key_debug(brw
, "xy_uxvx image bound",
268 old_key
->xy_uxvx_image_mask
,
269 key
->xy_uxvx_image_mask
);
270 found
|= key_debug(brw
, "ayuv image bound",
271 old_key
->ayuv_image_mask
,
272 key
->ayuv_image_mask
);
274 for (unsigned int i
= 0; i
< MAX_SAMPLERS
; i
++) {
275 found
|= key_debug(brw
, "textureGather workarounds",
276 old_key
->gen6_gather_wa
[i
], key
->gen6_gather_wa
[i
]);
279 for (unsigned int i
= 0; i
< MAX_SAMPLERS
; i
++) {
280 found
|= key_debug_float(brw
, "scale factor",
281 old_key
->scale_factors
[i
],
282 key
->scale_factors
[i
]);
289 gen6_gather_workaround(GLenum internalformat
)
291 switch (internalformat
) {
292 case GL_R8I
: return WA_SIGN
| WA_8BIT
;
293 case GL_R8UI
: return WA_8BIT
;
294 case GL_R16I
: return WA_SIGN
| WA_16BIT
;
295 case GL_R16UI
: return WA_16BIT
;
297 /* Note that even though GL_R32I and GL_R32UI have format overrides in
298 * the surface state, there is no shader w/a required.
305 brw_populate_sampler_prog_key_data(struct gl_context
*ctx
,
306 const struct gl_program
*prog
,
307 struct brw_sampler_prog_key_data
*key
)
309 struct brw_context
*brw
= brw_context(ctx
);
310 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
311 GLbitfield mask
= prog
->SamplersUsed
;
314 const int s
= u_bit_scan(&mask
);
316 key
->swizzles
[s
] = SWIZZLE_NOOP
;
317 key
->scale_factors
[s
] = 0.0f
;
319 int unit_id
= prog
->SamplerUnits
[s
];
320 const struct gl_texture_unit
*unit
= &ctx
->Texture
.Unit
[unit_id
];
322 if (unit
->_Current
&& unit
->_Current
->Target
!= GL_TEXTURE_BUFFER
) {
323 const struct gl_texture_object
*t
= unit
->_Current
;
324 const struct gl_texture_image
*img
= t
->Image
[0][t
->BaseLevel
];
325 struct gl_sampler_object
*sampler
= _mesa_get_samplerobj(ctx
, unit_id
);
327 const bool alpha_depth
= t
->DepthMode
== GL_ALPHA
&&
328 (img
->_BaseFormat
== GL_DEPTH_COMPONENT
||
329 img
->_BaseFormat
== GL_DEPTH_STENCIL
);
331 /* Haswell handles texture swizzling as surface format overrides
332 * (except for GL_ALPHA); all other platforms need MOVs in the shader.
334 if (alpha_depth
|| (devinfo
->gen
< 8 && !devinfo
->is_haswell
))
335 key
->swizzles
[s
] = brw_get_texture_swizzle(ctx
, t
);
337 if (devinfo
->gen
< 8 &&
338 sampler
->MinFilter
!= GL_NEAREST
&&
339 sampler
->MagFilter
!= GL_NEAREST
) {
340 if (sampler
->WrapS
== GL_CLAMP
)
341 key
->gl_clamp_mask
[0] |= 1 << s
;
342 if (sampler
->WrapT
== GL_CLAMP
)
343 key
->gl_clamp_mask
[1] |= 1 << s
;
344 if (sampler
->WrapR
== GL_CLAMP
)
345 key
->gl_clamp_mask
[2] |= 1 << s
;
348 /* gather4 for RG32* is broken in multiple ways on Gen7. */
349 if (devinfo
->gen
== 7 && prog
->info
.uses_texture_gather
) {
350 switch (img
->InternalFormat
) {
353 /* We have to override the format to R32G32_FLOAT_LD.
354 * This means that SCS_ALPHA and SCS_ONE will return 0x3f8
355 * (1.0) rather than integer 1. This needs shader hacks.
357 * On Ivybridge, we whack W (alpha) to ONE in our key's
358 * swizzle. On Haswell, we look at the original texture
359 * swizzle, and use XYZW with channels overridden to ONE,
360 * leaving normal texture swizzling to SCS.
362 unsigned src_swizzle
=
363 devinfo
->is_haswell
? t
->_Swizzle
: key
->swizzles
[s
];
364 for (int i
= 0; i
< 4; i
++) {
365 unsigned src_comp
= GET_SWZ(src_swizzle
, i
);
366 if (src_comp
== SWIZZLE_ONE
|| src_comp
== SWIZZLE_W
) {
367 key
->swizzles
[i
] &= ~(0x7 << (3 * i
));
368 key
->swizzles
[i
] |= SWIZZLE_ONE
<< (3 * i
);
374 /* The channel select for green doesn't work - we have to
375 * request blue. Haswell can use SCS for this, but Ivybridge
376 * needs a shader workaround.
378 if (!devinfo
->is_haswell
)
379 key
->gather_channel_quirk_mask
|= 1 << s
;
384 /* Gen6's gather4 is broken for UINT/SINT; we treat them as
385 * UNORM/FLOAT instead and fix it in the shader.
387 if (devinfo
->gen
== 6 && prog
->info
.uses_texture_gather
) {
388 key
->gen6_gather_wa
[s
] = gen6_gather_workaround(img
->InternalFormat
);
391 /* If this is a multisample sampler, and uses the CMS MSAA layout,
392 * then we need to emit slightly different code to first sample the
395 struct intel_texture_object
*intel_tex
=
396 intel_texture_object((struct gl_texture_object
*)t
);
398 /* From gen9 onwards some single sampled buffers can also be
399 * compressed. These don't need ld2dms sampling along with mcs fetch.
401 if (intel_tex
->mt
->aux_usage
== ISL_AUX_USAGE_MCS
) {
402 assert(devinfo
->gen
>= 7);
403 assert(intel_tex
->mt
->surf
.samples
> 1);
404 assert(intel_tex
->mt
->aux_buf
);
405 assert(intel_tex
->mt
->surf
.msaa_layout
== ISL_MSAA_LAYOUT_ARRAY
);
406 key
->compressed_multisample_layout_mask
|= 1 << s
;
408 if (intel_tex
->mt
->surf
.samples
>= 16) {
409 assert(devinfo
->gen
>= 9);
410 key
->msaa_16
|= 1 << s
;
414 if (t
->Target
== GL_TEXTURE_EXTERNAL_OES
&& intel_tex
->planar_format
) {
416 /* Setup possible scaling factor. */
417 key
->scale_factors
[s
] = intel_tex
->planar_format
->scaling_factor
;
419 switch (intel_tex
->planar_format
->components
) {
420 case __DRI_IMAGE_COMPONENTS_Y_UV
:
421 key
->y_uv_image_mask
|= 1 << s
;
423 case __DRI_IMAGE_COMPONENTS_Y_U_V
:
424 key
->y_u_v_image_mask
|= 1 << s
;
426 case __DRI_IMAGE_COMPONENTS_Y_XUXV
:
427 key
->yx_xuxv_image_mask
|= 1 << s
;
429 case __DRI_IMAGE_COMPONENTS_Y_UXVX
:
430 key
->xy_uxvx_image_mask
|= 1 << s
;
432 case __DRI_IMAGE_COMPONENTS_AYUV
:
433 key
->ayuv_image_mask
|= 1 << s
;
445 brw_wm_state_dirty(const struct brw_context
*brw
)
447 return brw_state_dirty(brw
,
459 BRW_NEW_FRAGMENT_PROGRAM
|
460 BRW_NEW_REDUCED_PRIMITIVE
|
462 BRW_NEW_VUE_MAP_GEOM_OUT
);
466 brw_wm_populate_key(struct brw_context
*brw
, struct brw_wm_prog_key
*key
)
468 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
469 struct gl_context
*ctx
= &brw
->ctx
;
470 /* BRW_NEW_FRAGMENT_PROGRAM */
471 const struct gl_program
*prog
= brw
->programs
[MESA_SHADER_FRAGMENT
];
472 const struct brw_program
*fp
= brw_program_const(prog
);
476 memset(key
, 0, sizeof(*key
));
478 /* Build the index for table lookup
480 if (devinfo
->gen
< 6) {
481 struct intel_renderbuffer
*depth_irb
=
482 intel_get_renderbuffer(ctx
->DrawBuffer
, BUFFER_DEPTH
);
485 if (prog
->info
.fs
.uses_discard
|| ctx
->Color
.AlphaEnabled
) {
486 lookup
|= BRW_WM_IZ_PS_KILL_ALPHATEST_BIT
;
489 if (prog
->info
.outputs_written
& BITFIELD64_BIT(FRAG_RESULT_DEPTH
)) {
490 lookup
|= BRW_WM_IZ_PS_COMPUTES_DEPTH_BIT
;
494 if (depth_irb
&& ctx
->Depth
.Test
) {
495 lookup
|= BRW_WM_IZ_DEPTH_TEST_ENABLE_BIT
;
497 if (brw_depth_writes_enabled(brw
))
498 lookup
|= BRW_WM_IZ_DEPTH_WRITE_ENABLE_BIT
;
501 /* _NEW_STENCIL | _NEW_BUFFERS */
502 if (brw
->stencil_enabled
) {
503 lookup
|= BRW_WM_IZ_STENCIL_TEST_ENABLE_BIT
;
505 if (ctx
->Stencil
.WriteMask
[0] ||
506 ctx
->Stencil
.WriteMask
[ctx
->Stencil
._BackFace
])
507 lookup
|= BRW_WM_IZ_STENCIL_WRITE_ENABLE_BIT
;
509 key
->iz_lookup
= lookup
;
512 line_aa
= BRW_WM_AA_NEVER
;
514 /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
515 if (ctx
->Line
.SmoothFlag
) {
516 if (brw
->reduced_primitive
== GL_LINES
) {
517 line_aa
= BRW_WM_AA_ALWAYS
;
519 else if (brw
->reduced_primitive
== GL_TRIANGLES
) {
520 if (ctx
->Polygon
.FrontMode
== GL_LINE
) {
521 line_aa
= BRW_WM_AA_SOMETIMES
;
523 if (ctx
->Polygon
.BackMode
== GL_LINE
||
524 (ctx
->Polygon
.CullFlag
&&
525 ctx
->Polygon
.CullFaceMode
== GL_BACK
))
526 line_aa
= BRW_WM_AA_ALWAYS
;
528 else if (ctx
->Polygon
.BackMode
== GL_LINE
) {
529 line_aa
= BRW_WM_AA_SOMETIMES
;
531 if ((ctx
->Polygon
.CullFlag
&&
532 ctx
->Polygon
.CullFaceMode
== GL_FRONT
))
533 line_aa
= BRW_WM_AA_ALWAYS
;
538 key
->line_aa
= line_aa
;
541 key
->high_quality_derivatives
=
542 prog
->info
.uses_fddx_fddy
&&
543 ctx
->Hint
.FragmentShaderDerivative
== GL_NICEST
;
545 if (devinfo
->gen
< 6)
546 key
->stats_wm
= brw
->stats_wm
;
550 (prog
->info
.inputs_read
& (VARYING_BIT_COL0
| VARYING_BIT_COL1
)) &&
551 (ctx
->Light
.ShadeModel
== GL_FLAT
);
553 /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */
554 key
->clamp_fragment_color
= ctx
->Color
._ClampFragmentColor
;
557 brw_populate_sampler_prog_key_data(ctx
, prog
, &key
->tex
);
560 key
->nr_color_regions
= ctx
->DrawBuffer
->_NumColorDrawBuffers
;
563 key
->force_dual_color_blend
= brw
->dual_color_blend_by_location
&&
564 (ctx
->Color
.BlendEnabled
& 1) && ctx
->Color
.Blend
[0]._UsesDualSrc
;
566 /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */
567 key
->replicate_alpha
= ctx
->DrawBuffer
->_NumColorDrawBuffers
> 1 &&
568 (_mesa_is_alpha_test_enabled(ctx
) ||
569 _mesa_is_alpha_to_coverage_enabled(ctx
));
571 /* _NEW_BUFFERS _NEW_MULTISAMPLE */
572 /* Ignore sample qualifier while computing this flag. */
573 if (ctx
->Multisample
.Enabled
) {
574 key
->persample_interp
=
575 ctx
->Multisample
.SampleShading
&&
576 (ctx
->Multisample
.MinSampleShadingValue
*
577 _mesa_geometric_samples(ctx
->DrawBuffer
) > 1);
579 key
->multisample_fbo
= _mesa_geometric_samples(ctx
->DrawBuffer
) > 1;
582 /* BRW_NEW_VUE_MAP_GEOM_OUT */
583 if (devinfo
->gen
< 6 || util_bitcount64(prog
->info
.inputs_read
&
584 BRW_FS_VARYING_INPUT_MASK
) > 16) {
585 key
->input_slots_valid
= brw
->vue_map_geom_out
.slots_valid
;
588 /* _NEW_COLOR | _NEW_BUFFERS */
589 /* Pre-gen6, the hardware alpha test always used each render
590 * target's alpha to do alpha test, as opposed to render target 0's alpha
591 * like GL requires. Fix that by building the alpha test into the
592 * shader, and we'll skip enabling the fixed function alpha test.
594 if (devinfo
->gen
< 6 && ctx
->DrawBuffer
->_NumColorDrawBuffers
> 1 &&
595 ctx
->Color
.AlphaEnabled
) {
596 key
->alpha_test_func
= ctx
->Color
.AlphaFunc
;
597 key
->alpha_test_ref
= ctx
->Color
.AlphaRef
;
600 /* The unique fragment program ID */
601 key
->program_string_id
= fp
->id
;
603 /* Whether reads from the framebuffer should behave coherently. */
604 key
->coherent_fb_fetch
= ctx
->Extensions
.EXT_shader_framebuffer_fetch
;
608 brw_upload_wm_prog(struct brw_context
*brw
)
610 struct brw_wm_prog_key key
;
611 struct brw_program
*fp
=
612 (struct brw_program
*) brw
->programs
[MESA_SHADER_FRAGMENT
];
614 if (!brw_wm_state_dirty(brw
))
617 brw_wm_populate_key(brw
, &key
);
619 if (brw_search_cache(&brw
->cache
, BRW_CACHE_FS_PROG
, &key
, sizeof(key
),
620 &brw
->wm
.base
.prog_offset
, &brw
->wm
.base
.prog_data
,
624 if (brw_disk_cache_upload_program(brw
, MESA_SHADER_FRAGMENT
))
627 fp
= (struct brw_program
*) brw
->programs
[MESA_SHADER_FRAGMENT
];
628 fp
->id
= key
.program_string_id
;
630 MAYBE_UNUSED
bool success
= brw_codegen_wm_prog(brw
, fp
, &key
,
631 &brw
->vue_map_geom_out
);
636 brw_wm_populate_default_key(const struct gen_device_info
*devinfo
,
637 struct brw_wm_prog_key
*key
,
638 struct gl_program
*prog
)
640 memset(key
, 0, sizeof(*key
));
642 uint64_t outputs_written
= prog
->info
.outputs_written
;
644 if (devinfo
->gen
< 6) {
645 if (prog
->info
.fs
.uses_discard
)
646 key
->iz_lookup
|= BRW_WM_IZ_PS_KILL_ALPHATEST_BIT
;
648 if (outputs_written
& BITFIELD64_BIT(FRAG_RESULT_DEPTH
))
649 key
->iz_lookup
|= BRW_WM_IZ_PS_COMPUTES_DEPTH_BIT
;
651 /* Just assume depth testing. */
652 key
->iz_lookup
|= BRW_WM_IZ_DEPTH_TEST_ENABLE_BIT
;
653 key
->iz_lookup
|= BRW_WM_IZ_DEPTH_WRITE_ENABLE_BIT
;
656 if (devinfo
->gen
< 6 || util_bitcount64(prog
->info
.inputs_read
&
657 BRW_FS_VARYING_INPUT_MASK
) > 16) {
658 key
->input_slots_valid
= prog
->info
.inputs_read
| VARYING_BIT_POS
;
661 brw_setup_tex_for_precompile(devinfo
, &key
->tex
, prog
);
663 key
->nr_color_regions
= util_bitcount64(outputs_written
&
664 ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH
) |
665 BITFIELD64_BIT(FRAG_RESULT_STENCIL
) |
666 BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK
)));
668 key
->program_string_id
= brw_program(prog
)->id
;
670 /* Whether reads from the framebuffer should behave coherently. */
671 key
->coherent_fb_fetch
= devinfo
->gen
>= 9;
675 brw_fs_precompile(struct gl_context
*ctx
, struct gl_program
*prog
)
677 struct brw_context
*brw
= brw_context(ctx
);
678 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
679 struct brw_wm_prog_key key
;
681 struct brw_program
*bfp
= brw_program(prog
);
683 brw_wm_populate_default_key(&brw
->screen
->devinfo
, &key
, prog
);
685 /* check brw_wm_populate_default_key coherent_fb_fetch setting */
686 assert(key
.coherent_fb_fetch
==
687 ctx
->Extensions
.EXT_shader_framebuffer_fetch
);
689 uint32_t old_prog_offset
= brw
->wm
.base
.prog_offset
;
690 struct brw_stage_prog_data
*old_prog_data
= brw
->wm
.base
.prog_data
;
692 struct brw_vue_map vue_map
;
693 if (devinfo
->gen
< 6) {
694 brw_compute_vue_map(&brw
->screen
->devinfo
, &vue_map
,
695 prog
->info
.inputs_read
| VARYING_BIT_POS
,
699 bool success
= brw_codegen_wm_prog(brw
, bfp
, &key
, &vue_map
);
701 brw
->wm
.base
.prog_offset
= old_prog_offset
;
702 brw
->wm
.base
.prog_data
= old_prog_data
;