2 * Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 * Intel funded Tungsten Graphics to
4 * develop this 3D driver.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 #include "brw_context.h"
28 #include "brw_state.h"
29 #include "brw_shader.h"
30 #include "main/enums.h"
31 #include "main/formats.h"
32 #include "main/fbobject.h"
33 #include "main/samplerobj.h"
34 #include "main/framebuffer.h"
35 #include "program/prog_parameter.h"
36 #include "program/program.h"
37 #include "intel_mipmap_tree.h"
39 #include "brw_program.h"
41 #include "util/ralloc.h"
44 assign_fs_binding_table_offsets(const struct brw_device_info
*devinfo
,
45 const struct gl_shader_program
*shader_prog
,
46 const struct gl_program
*prog
,
47 const struct brw_wm_prog_key
*key
,
48 struct brw_wm_prog_data
*prog_data
)
50 uint32_t next_binding_table_offset
= 0;
52 /* If there are no color regions, we still perform an FB write to a null
53 * renderbuffer, which we place at surface index 0.
55 prog_data
->binding_table
.render_target_start
= next_binding_table_offset
;
56 next_binding_table_offset
+= MAX2(key
->nr_color_regions
, 1);
58 brw_assign_common_binding_table_offsets(MESA_SHADER_FRAGMENT
, devinfo
,
59 shader_prog
, prog
, &prog_data
->base
,
60 next_binding_table_offset
);
64 * All Mesa program -> GPU code generation goes through this function.
65 * Depending on the instructions used (i.e. flow control instructions)
66 * we'll use one of two code generators.
69 brw_codegen_wm_prog(struct brw_context
*brw
,
70 struct gl_shader_program
*prog
,
71 struct brw_fragment_program
*fp
,
72 struct brw_wm_prog_key
*key
)
74 struct gl_context
*ctx
= &brw
->ctx
;
75 void *mem_ctx
= ralloc_context(NULL
);
76 struct brw_wm_prog_data prog_data
;
77 const GLuint
*program
;
78 struct brw_shader
*fs
= NULL
;
80 bool start_busy
= false;
81 double start_time
= 0;
84 fs
= (struct brw_shader
*)prog
->_LinkedShaders
[MESA_SHADER_FRAGMENT
];
86 memset(&prog_data
, 0, sizeof(prog_data
));
88 /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
90 prog_data
.base
.use_alt_mode
= true;
92 assign_fs_binding_table_offsets(brw
->intelScreen
->devinfo
, prog
,
93 &fp
->program
.Base
, key
, &prog_data
);
95 /* Allocate the references to the uniforms that will end up in the
96 * prog_data associated with the compiled program, and which will be freed
99 int param_count
= fp
->program
.Base
.nir
->num_uniforms
;
101 prog_data
.base
.nr_image_params
= fs
->base
.NumImages
;
102 /* The backend also sometimes adds params for texture size. */
103 param_count
+= 2 * ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxTextureImageUnits
;
104 prog_data
.base
.param
=
105 rzalloc_array(NULL
, const gl_constant_value
*, param_count
);
106 prog_data
.base
.pull_param
=
107 rzalloc_array(NULL
, const gl_constant_value
*, param_count
);
108 prog_data
.base
.image_param
=
109 rzalloc_array(NULL
, struct brw_image_param
,
110 prog_data
.base
.nr_image_params
);
111 prog_data
.base
.nr_params
= param_count
;
114 brw_nir_setup_glsl_uniforms(fp
->program
.Base
.nir
, prog
, &fp
->program
.Base
,
115 &prog_data
.base
, true);
117 brw_nir_setup_arb_uniforms(fp
->program
.Base
.nir
, &fp
->program
.Base
,
121 if (unlikely(brw
->perf_debug
)) {
122 start_busy
= (brw
->batch
.last_bo
&&
123 drm_intel_bo_busy(brw
->batch
.last_bo
));
124 start_time
= get_time();
127 if (unlikely(INTEL_DEBUG
& DEBUG_WM
))
128 brw_dump_ir("fragment", prog
, fs
? &fs
->base
: NULL
, &fp
->program
.Base
);
130 int st_index8
= -1, st_index16
= -1;
131 if (INTEL_DEBUG
& DEBUG_SHADER_TIME
) {
132 st_index8
= brw_get_shader_time_index(brw
, prog
, &fp
->program
.Base
, ST_FS8
);
133 st_index16
= brw_get_shader_time_index(brw
, prog
, &fp
->program
.Base
, ST_FS16
);
136 char *error_str
= NULL
;
137 program
= brw_compile_fs(brw
->intelScreen
->compiler
, brw
, mem_ctx
,
138 key
, &prog_data
, fp
->program
.Base
.nir
,
139 &fp
->program
.Base
, st_index8
, st_index16
,
140 brw
->use_rep_send
, &program_size
, &error_str
);
141 if (program
== NULL
) {
143 prog
->LinkStatus
= false;
144 ralloc_strcat(&prog
->InfoLog
, error_str
);
147 _mesa_problem(NULL
, "Failed to compile fragment shader: %s\n", error_str
);
149 ralloc_free(mem_ctx
);
153 if (unlikely(brw
->perf_debug
) && fs
) {
154 if (fs
->compiled_once
)
155 brw_wm_debug_recompile(brw
, prog
, key
);
156 fs
->compiled_once
= true;
158 if (start_busy
&& !drm_intel_bo_busy(brw
->batch
.last_bo
)) {
159 perf_debug("FS compile took %.03f ms and stalled the GPU\n",
160 (get_time() - start_time
) * 1000);
164 if (prog_data
.base
.total_scratch
) {
165 brw_get_scratch_bo(brw
, &brw
->wm
.base
.scratch_bo
,
166 prog_data
.base
.total_scratch
* brw
->max_wm_threads
);
169 if (unlikely(INTEL_DEBUG
& DEBUG_WM
))
170 fprintf(stderr
, "\n");
172 brw_upload_cache(&brw
->cache
, BRW_CACHE_FS_PROG
,
173 key
, sizeof(struct brw_wm_prog_key
),
174 program
, program_size
,
175 &prog_data
, sizeof(prog_data
),
176 &brw
->wm
.base
.prog_offset
, &brw
->wm
.prog_data
);
178 ralloc_free(mem_ctx
);
184 brw_debug_recompile_sampler_key(struct brw_context
*brw
,
185 const struct brw_sampler_prog_key_data
*old_key
,
186 const struct brw_sampler_prog_key_data
*key
)
190 for (unsigned int i
= 0; i
< MAX_SAMPLERS
; i
++) {
191 found
|= key_debug(brw
, "EXT_texture_swizzle or DEPTH_TEXTURE_MODE",
192 old_key
->swizzles
[i
], key
->swizzles
[i
]);
194 found
|= key_debug(brw
, "GL_CLAMP enabled on any texture unit's 1st coordinate",
195 old_key
->gl_clamp_mask
[0], key
->gl_clamp_mask
[0]);
196 found
|= key_debug(brw
, "GL_CLAMP enabled on any texture unit's 2nd coordinate",
197 old_key
->gl_clamp_mask
[1], key
->gl_clamp_mask
[1]);
198 found
|= key_debug(brw
, "GL_CLAMP enabled on any texture unit's 3rd coordinate",
199 old_key
->gl_clamp_mask
[2], key
->gl_clamp_mask
[2]);
200 found
|= key_debug(brw
, "gather channel quirk on any texture unit",
201 old_key
->gather_channel_quirk_mask
, key
->gather_channel_quirk_mask
);
202 found
|= key_debug(brw
, "compressed multisample layout",
203 old_key
->compressed_multisample_layout_mask
,
204 key
->compressed_multisample_layout_mask
);
205 found
|= key_debug(brw
, "16x msaa",
209 for (unsigned int i
= 0; i
< MAX_SAMPLERS
; i
++) {
210 found
|= key_debug(brw
, "textureGather workarounds",
211 old_key
->gen6_gather_wa
[i
], key
->gen6_gather_wa
[i
]);
218 brw_wm_debug_recompile(struct brw_context
*brw
,
219 struct gl_shader_program
*prog
,
220 const struct brw_wm_prog_key
*key
)
222 struct brw_cache_item
*c
= NULL
;
223 const struct brw_wm_prog_key
*old_key
= NULL
;
226 perf_debug("Recompiling fragment shader for program %d\n", prog
->Name
);
228 for (unsigned int i
= 0; i
< brw
->cache
.size
; i
++) {
229 for (c
= brw
->cache
.items
[i
]; c
; c
= c
->next
) {
230 if (c
->cache_id
== BRW_CACHE_FS_PROG
) {
233 if (old_key
->program_string_id
== key
->program_string_id
)
242 perf_debug(" Didn't find previous compile in the shader cache for debug\n");
246 found
|= key_debug(brw
, "alphatest, computed depth, depth test, or "
248 old_key
->iz_lookup
, key
->iz_lookup
);
249 found
|= key_debug(brw
, "depth statistics",
250 old_key
->stats_wm
, key
->stats_wm
);
251 found
|= key_debug(brw
, "flat shading",
252 old_key
->flat_shade
, key
->flat_shade
);
253 found
|= key_debug(brw
, "per-sample shading",
254 old_key
->persample_shading
, key
->persample_shading
);
255 found
|= key_debug(brw
, "number of color buffers",
256 old_key
->nr_color_regions
, key
->nr_color_regions
);
257 found
|= key_debug(brw
, "MRT alpha test or alpha-to-coverage",
258 old_key
->replicate_alpha
, key
->replicate_alpha
);
259 found
|= key_debug(brw
, "rendering to FBO",
260 old_key
->render_to_fbo
, key
->render_to_fbo
);
261 found
|= key_debug(brw
, "fragment color clamping",
262 old_key
->clamp_fragment_color
, key
->clamp_fragment_color
);
263 found
|= key_debug(brw
, "multisampled FBO",
264 old_key
->multisample_fbo
, key
->multisample_fbo
);
265 found
|= key_debug(brw
, "line smoothing",
266 old_key
->line_aa
, key
->line_aa
);
267 found
|= key_debug(brw
, "renderbuffer height",
268 old_key
->drawable_height
, key
->drawable_height
);
269 found
|= key_debug(brw
, "input slots valid",
270 old_key
->input_slots_valid
, key
->input_slots_valid
);
271 found
|= key_debug(brw
, "mrt alpha test function",
272 old_key
->alpha_test_func
, key
->alpha_test_func
);
273 found
|= key_debug(brw
, "mrt alpha test reference value",
274 old_key
->alpha_test_ref
, key
->alpha_test_ref
);
276 found
|= brw_debug_recompile_sampler_key(brw
, &old_key
->tex
, &key
->tex
);
279 perf_debug(" Something else\n");
284 gen6_gather_workaround(GLenum internalformat
)
286 switch (internalformat
) {
287 case GL_R8I
: return WA_SIGN
| WA_8BIT
;
288 case GL_R8UI
: return WA_8BIT
;
289 case GL_R16I
: return WA_SIGN
| WA_16BIT
;
290 case GL_R16UI
: return WA_16BIT
;
292 /* Note that even though GL_R32I and GL_R32UI have format overrides in
293 * the surface state, there is no shader w/a required.
300 brw_populate_sampler_prog_key_data(struct gl_context
*ctx
,
301 const struct gl_program
*prog
,
302 unsigned sampler_count
,
303 struct brw_sampler_prog_key_data
*key
)
305 struct brw_context
*brw
= brw_context(ctx
);
307 for (int s
= 0; s
< sampler_count
; s
++) {
308 key
->swizzles
[s
] = SWIZZLE_NOOP
;
310 if (!(prog
->SamplersUsed
& (1 << s
)))
313 int unit_id
= prog
->SamplerUnits
[s
];
314 const struct gl_texture_unit
*unit
= &ctx
->Texture
.Unit
[unit_id
];
316 if (unit
->_Current
&& unit
->_Current
->Target
!= GL_TEXTURE_BUFFER
) {
317 const struct gl_texture_object
*t
= unit
->_Current
;
318 const struct gl_texture_image
*img
= t
->Image
[0][t
->BaseLevel
];
319 struct gl_sampler_object
*sampler
= _mesa_get_samplerobj(ctx
, unit_id
);
321 const bool alpha_depth
= t
->DepthMode
== GL_ALPHA
&&
322 (img
->_BaseFormat
== GL_DEPTH_COMPONENT
||
323 img
->_BaseFormat
== GL_DEPTH_STENCIL
);
325 /* Haswell handles texture swizzling as surface format overrides
326 * (except for GL_ALPHA); all other platforms need MOVs in the shader.
328 if (alpha_depth
|| (brw
->gen
< 8 && !brw
->is_haswell
))
329 key
->swizzles
[s
] = brw_get_texture_swizzle(ctx
, t
);
332 sampler
->MinFilter
!= GL_NEAREST
&&
333 sampler
->MagFilter
!= GL_NEAREST
) {
334 if (sampler
->WrapS
== GL_CLAMP
)
335 key
->gl_clamp_mask
[0] |= 1 << s
;
336 if (sampler
->WrapT
== GL_CLAMP
)
337 key
->gl_clamp_mask
[1] |= 1 << s
;
338 if (sampler
->WrapR
== GL_CLAMP
)
339 key
->gl_clamp_mask
[2] |= 1 << s
;
342 /* gather4's channel select for green from RG32F is broken; requires
343 * a shader w/a on IVB; fixable with just SCS on HSW.
345 if (brw
->gen
== 7 && !brw
->is_haswell
&& prog
->UsesGather
) {
346 if (img
->InternalFormat
== GL_RG32F
)
347 key
->gather_channel_quirk_mask
|= 1 << s
;
350 /* Gen6's gather4 is broken for UINT/SINT; we treat them as
351 * UNORM/FLOAT instead and fix it in the shader.
353 if (brw
->gen
== 6 && prog
->UsesGather
) {
354 key
->gen6_gather_wa
[s
] = gen6_gather_workaround(img
->InternalFormat
);
357 /* If this is a multisample sampler, and uses the CMS MSAA layout,
358 * then we need to emit slightly different code to first sample the
361 struct intel_texture_object
*intel_tex
=
362 intel_texture_object((struct gl_texture_object
*)t
);
364 /* From gen9 onwards some single sampled buffers can also be
365 * compressed. These don't need ld2dms sampling along with mcs fetch.
368 intel_tex
->mt
->msaa_layout
== INTEL_MSAA_LAYOUT_CMS
&&
369 intel_tex
->mt
->num_samples
> 1) {
370 key
->compressed_multisample_layout_mask
|= 1 << s
;
372 if (intel_tex
->mt
->num_samples
>= 16) {
373 assert(brw
->gen
>= 9);
374 key
->msaa_16
|= 1 << s
;
382 brw_wm_state_dirty (struct brw_context
*brw
)
384 return brw_state_dirty(brw
,
396 BRW_NEW_FRAGMENT_PROGRAM
|
397 BRW_NEW_REDUCED_PRIMITIVE
|
399 BRW_NEW_VUE_MAP_GEOM_OUT
);
403 brw_wm_populate_key(struct brw_context
*brw
, struct brw_wm_prog_key
*key
)
405 struct gl_context
*ctx
= &brw
->ctx
;
406 /* BRW_NEW_FRAGMENT_PROGRAM */
407 const struct brw_fragment_program
*fp
=
408 (struct brw_fragment_program
*) brw
->fragment_program
;
409 const struct gl_program
*prog
= (struct gl_program
*) brw
->fragment_program
;
412 bool program_uses_dfdy
= fp
->program
.UsesDFdy
;
414 memset(key
, 0, sizeof(*key
));
416 /* Build the index for table lookup
420 if (fp
->program
.UsesKill
|| ctx
->Color
.AlphaEnabled
)
421 lookup
|= IZ_PS_KILL_ALPHATEST_BIT
;
423 if (fp
->program
.Base
.OutputsWritten
& BITFIELD64_BIT(FRAG_RESULT_DEPTH
))
424 lookup
|= IZ_PS_COMPUTES_DEPTH_BIT
;
428 lookup
|= IZ_DEPTH_TEST_ENABLE_BIT
;
430 if (ctx
->Depth
.Test
&& ctx
->Depth
.Mask
) /* ?? */
431 lookup
|= IZ_DEPTH_WRITE_ENABLE_BIT
;
433 /* _NEW_STENCIL | _NEW_BUFFERS */
434 if (ctx
->Stencil
._Enabled
) {
435 lookup
|= IZ_STENCIL_TEST_ENABLE_BIT
;
437 if (ctx
->Stencil
.WriteMask
[0] ||
438 ctx
->Stencil
.WriteMask
[ctx
->Stencil
._BackFace
])
439 lookup
|= IZ_STENCIL_WRITE_ENABLE_BIT
;
441 key
->iz_lookup
= lookup
;
446 /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
447 if (ctx
->Line
.SmoothFlag
) {
448 if (brw
->reduced_primitive
== GL_LINES
) {
451 else if (brw
->reduced_primitive
== GL_TRIANGLES
) {
452 if (ctx
->Polygon
.FrontMode
== GL_LINE
) {
453 line_aa
= AA_SOMETIMES
;
455 if (ctx
->Polygon
.BackMode
== GL_LINE
||
456 (ctx
->Polygon
.CullFlag
&&
457 ctx
->Polygon
.CullFaceMode
== GL_BACK
))
460 else if (ctx
->Polygon
.BackMode
== GL_LINE
) {
461 line_aa
= AA_SOMETIMES
;
463 if ((ctx
->Polygon
.CullFlag
&&
464 ctx
->Polygon
.CullFaceMode
== GL_FRONT
))
470 key
->line_aa
= line_aa
;
473 key
->high_quality_derivatives
=
474 ctx
->Hint
.FragmentShaderDerivative
== GL_NICEST
;
477 key
->stats_wm
= brw
->stats_wm
;
480 key
->flat_shade
= (ctx
->Light
.ShadeModel
== GL_FLAT
);
482 /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */
483 key
->clamp_fragment_color
= ctx
->Color
._ClampFragmentColor
;
486 brw_populate_sampler_prog_key_data(ctx
, prog
, brw
->wm
.base
.sampler_count
,
491 * Include the draw buffer origin and height so that we can calculate
492 * fragment position values relative to the bottom left of the drawable,
493 * from the incoming screen origin relative position we get as part of our
496 * This is only needed for the WM_WPOSXY opcode when the fragment program
497 * uses the gl_FragCoord input.
499 * We could avoid recompiling by including this as a constant referenced by
500 * our program, but if we were to do that it would also be nice to handle
501 * getting that constant updated at batchbuffer submit time (when we
502 * hold the lock and know where the buffer really is) rather than at emit
503 * time when we don't hold the lock and are just guessing. We could also
504 * just avoid using this as key data if the program doesn't use
507 * For DRI2 the origin_x/y will always be (0,0) but we still need the
508 * drawable height in order to invert the Y axis.
510 if (fp
->program
.Base
.InputsRead
& VARYING_BIT_POS
) {
511 key
->drawable_height
= _mesa_geometric_height(ctx
->DrawBuffer
);
514 if ((fp
->program
.Base
.InputsRead
& VARYING_BIT_POS
) || program_uses_dfdy
) {
515 key
->render_to_fbo
= _mesa_is_user_fbo(ctx
->DrawBuffer
);
519 key
->nr_color_regions
= ctx
->DrawBuffer
->_NumColorDrawBuffers
;
522 key
->force_dual_color_blend
= brw
->dual_color_blend_by_location
&&
523 (ctx
->Color
.BlendEnabled
& 1) && ctx
->Color
.Blend
[0]._UsesDualSrc
;
525 /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */
526 key
->replicate_alpha
= ctx
->DrawBuffer
->_NumColorDrawBuffers
> 1 &&
527 (ctx
->Multisample
.SampleAlphaToCoverage
|| ctx
->Color
.AlphaEnabled
);
529 /* _NEW_BUFFERS _NEW_MULTISAMPLE */
530 /* Ignore sample qualifier while computing this flag. */
531 key
->persample_shading
=
532 _mesa_get_min_invocations_per_fragment(ctx
, &fp
->program
, true) > 1;
534 key
->compute_pos_offset
=
535 _mesa_get_min_invocations_per_fragment(ctx
, &fp
->program
, false) > 1 &&
536 fp
->program
.Base
.SystemValuesRead
& SYSTEM_BIT_SAMPLE_POS
;
538 key
->multisample_fbo
= ctx
->Multisample
.Enabled
&&
539 _mesa_geometric_samples(ctx
->DrawBuffer
) > 1;
541 /* BRW_NEW_VUE_MAP_GEOM_OUT */
542 if (brw
->gen
< 6 || _mesa_bitcount_64(fp
->program
.Base
.InputsRead
&
543 BRW_FS_VARYING_INPUT_MASK
) > 16)
544 key
->input_slots_valid
= brw
->vue_map_geom_out
.slots_valid
;
547 /* _NEW_COLOR | _NEW_BUFFERS */
548 /* Pre-gen6, the hardware alpha test always used each render
549 * target's alpha to do alpha test, as opposed to render target 0's alpha
550 * like GL requires. Fix that by building the alpha test into the
551 * shader, and we'll skip enabling the fixed function alpha test.
553 if (brw
->gen
< 6 && ctx
->DrawBuffer
->_NumColorDrawBuffers
> 1 &&
554 ctx
->Color
.AlphaEnabled
) {
555 key
->alpha_test_func
= ctx
->Color
.AlphaFunc
;
556 key
->alpha_test_ref
= ctx
->Color
.AlphaRef
;
559 /* The unique fragment program ID */
560 key
->program_string_id
= fp
->id
;
564 brw_upload_wm_prog(struct brw_context
*brw
)
566 struct gl_context
*ctx
= &brw
->ctx
;
567 struct gl_shader_program
*current
= ctx
->_Shader
->_CurrentFragmentProgram
;
568 struct brw_wm_prog_key key
;
569 struct brw_fragment_program
*fp
= (struct brw_fragment_program
*)
570 brw
->fragment_program
;
572 if (!brw_wm_state_dirty(brw
))
575 brw_wm_populate_key(brw
, &key
);
577 if (!brw_search_cache(&brw
->cache
, BRW_CACHE_FS_PROG
,
579 &brw
->wm
.base
.prog_offset
, &brw
->wm
.prog_data
)) {
580 bool success
= brw_codegen_wm_prog(brw
, current
, fp
, &key
);
584 brw
->wm
.base
.prog_data
= &brw
->wm
.prog_data
->base
;
588 brw_fs_precompile(struct gl_context
*ctx
,
589 struct gl_shader_program
*shader_prog
,
590 struct gl_program
*prog
)
592 struct brw_context
*brw
= brw_context(ctx
);
593 struct brw_wm_prog_key key
;
595 struct gl_fragment_program
*fp
= (struct gl_fragment_program
*) prog
;
596 struct brw_fragment_program
*bfp
= brw_fragment_program(fp
);
597 bool program_uses_dfdy
= fp
->UsesDFdy
;
599 memset(&key
, 0, sizeof(key
));
603 key
.iz_lookup
|= IZ_PS_KILL_ALPHATEST_BIT
;
605 if (fp
->Base
.OutputsWritten
& BITFIELD64_BIT(FRAG_RESULT_DEPTH
))
606 key
.iz_lookup
|= IZ_PS_COMPUTES_DEPTH_BIT
;
608 /* Just assume depth testing. */
609 key
.iz_lookup
|= IZ_DEPTH_TEST_ENABLE_BIT
;
610 key
.iz_lookup
|= IZ_DEPTH_WRITE_ENABLE_BIT
;
613 if (brw
->gen
< 6 || _mesa_bitcount_64(fp
->Base
.InputsRead
&
614 BRW_FS_VARYING_INPUT_MASK
) > 16)
615 key
.input_slots_valid
= fp
->Base
.InputsRead
| VARYING_BIT_POS
;
617 brw_setup_tex_for_precompile(brw
, &key
.tex
, &fp
->Base
);
619 if (fp
->Base
.InputsRead
& VARYING_BIT_POS
) {
620 key
.drawable_height
= ctx
->DrawBuffer
->Height
;
623 key
.nr_color_regions
= _mesa_bitcount_64(fp
->Base
.OutputsWritten
&
624 ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH
) |
625 BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK
)));
627 if ((fp
->Base
.InputsRead
& VARYING_BIT_POS
) || program_uses_dfdy
) {
628 key
.render_to_fbo
= _mesa_is_user_fbo(ctx
->DrawBuffer
) ||
629 key
.nr_color_regions
> 1;
632 key
.program_string_id
= bfp
->id
;
634 uint32_t old_prog_offset
= brw
->wm
.base
.prog_offset
;
635 struct brw_wm_prog_data
*old_prog_data
= brw
->wm
.prog_data
;
637 bool success
= brw_codegen_wm_prog(brw
, shader_prog
, bfp
, &key
);
639 brw
->wm
.base
.prog_offset
= old_prog_offset
;
640 brw
->wm
.prog_data
= old_prog_data
;