2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2015 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "ilo_debug.h"
29 #include "ilo_state_shader.h"
32 uint8_t dispatch_modes
;
34 uint32_t kernel_offsets
[3];
35 uint8_t grf_starts
[3];
37 uint8_t per_thread_scratch_space
;
38 uint32_t per_thread_scratch_size
;
40 uint8_t sampler_count
;
41 uint8_t surface_count
;
44 uint16_t thread_count
;
46 struct ilo_state_ps_dispatch_conds conds
;
50 bool dual_source_blending
;
55 ps_kernel_validate_gen6(const struct ilo_dev
*dev
,
56 const struct ilo_state_shader_kernel_info
*kernel
)
58 /* "Dispatch GRF Start Register for Constant/Setup Data" is U7 */
59 const uint8_t max_grf_start
= 128;
61 ILO_DEV_ASSERT(dev
, 6, 8);
63 /* "Kernel Start Pointer" is 64-byte aligned */
64 assert(kernel
->offset
% 64 == 0);
66 assert(kernel
->grf_start
< max_grf_start
);
72 ps_validate_gen6(const struct ilo_dev
*dev
,
73 const struct ilo_state_ps_info
*info
)
75 const struct ilo_state_shader_kernel_info
*kernel_8
= &info
->kernel_8
;
76 const struct ilo_state_shader_kernel_info
*kernel_16
= &info
->kernel_16
;
77 const struct ilo_state_shader_kernel_info
*kernel_32
= &info
->kernel_32
;
78 const struct ilo_state_ps_io_info
*io
= &info
->io
;
80 ILO_DEV_ASSERT(dev
, 6, 8);
82 if (!ps_kernel_validate_gen6(dev
, kernel_8
) ||
83 !ps_kernel_validate_gen6(dev
, kernel_16
) ||
84 !ps_kernel_validate_gen6(dev
, kernel_32
))
87 /* unsupported on Gen6 */
88 if (ilo_dev_gen(dev
) == ILO_GEN(6))
89 assert(!io
->use_coverage_mask
);
92 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
94 * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
95 * field must be set to disabled."
97 if (ilo_dev_gen(dev
) == ILO_GEN(6) && io
->pscdepth
!= GEN7_PSCDEPTH_OFF
)
98 assert(info
->cv_has_depth_buffer
);
100 if (!info
->per_sample_dispatch
) {
102 * From the Sandy Bridge PRM, volume 2 part 1, page 281:
104 * "MSDISPMODE_PERSAMPLE is required in order to select
107 assert(io
->posoffset
!= GEN6_POSOFFSET_SAMPLE
);
110 * From the Sandy Bridge PRM, volume 2 part 1, page 282:
112 * "MSDISPMODE_PERSAMPLE is required in order to select
115 * From the Sandy Bridge PRM, volume 2 part 1, page 283:
117 * "MSDISPMODE_PERSAMPLE is required in order to select Perspective
118 * Sample or Non-perspective Sample barycentric coordinates."
120 assert(!info
->cv_per_sample_interp
);
125 * From the Sandy Bridge PRM, volume 2 part 1, page 314:
127 * "Pixel Shader Dispatch, Alpha... must all be disabled."
129 * Simply disallow any valid kernel when there is early-z op. Also, when
130 * there is no valid kernel, io should be zeroed.
132 if (info
->valid_kernels
)
133 assert(!info
->cv_has_earlyz_op
);
135 assert(ilo_is_zeroed(io
, sizeof(*io
)));
141 ps_get_gen6_dispatch_modes(const struct ilo_dev
*dev
,
142 const struct ilo_state_ps_info
*info
)
144 const struct ilo_state_ps_io_info
*io
= &info
->io
;
145 uint8_t dispatch_modes
= info
->valid_kernels
;
147 ILO_DEV_ASSERT(dev
, 6, 8);
153 * From the Sandy Bridge PRM, volume 2 part 1, page 334:
155 * "Not valid on [DevSNB] if 4x PERPIXEL mode with pixel shader
158 * "Valid on all products, except when in non-1x PERSAMPLE mode
159 * (applies to [DevSNB+] only)"
161 * From the Sandy Bridge PRM, volume 4 part 1, page 239:
163 * "[DevSNB]: When Pixel Shader outputs oDepth and PS invocation mode
164 * is PERPIXEL, Message Type for Render Target Write must be SIMD8.
166 * Errata: [DevSNB+]: When Pixel Shader outputs oMask, this message
167 * type is not supported: SIMD8 (including SIMD8_DUALSRC_xx)."
169 * It is really hard to follow what combinations are valid on what
170 * platforms. Judging from the restrictions on RT write messages on Gen6,
171 * oDepth and oMask related issues should be Gen6-specific. PERSAMPLE
172 * issue should be universal, and disallows multiple dispatch modes.
174 if (ilo_dev_gen(dev
) == ILO_GEN(6)) {
175 if (io
->pscdepth
!= GEN7_PSCDEPTH_OFF
&& !info
->per_sample_dispatch
)
176 dispatch_modes
&= GEN6_PS_DISPATCH_8
;
178 dispatch_modes
&= ~GEN6_PS_DISPATCH_8
;
180 if (info
->per_sample_dispatch
&& !info
->sample_count_one
) {
181 /* prefer 32 over 16 over 8 */
182 if (dispatch_modes
& GEN6_PS_DISPATCH_32
)
183 dispatch_modes
&= GEN6_PS_DISPATCH_32
;
184 else if (dispatch_modes
& GEN6_PS_DISPATCH_16
)
185 dispatch_modes
&= GEN6_PS_DISPATCH_16
;
187 dispatch_modes
&= GEN6_PS_DISPATCH_8
;
191 * From the Broadwell PRM, volume 2b, page 149:
193 * "When Render Target Fast Clear Enable is ENABLED or Render Target
194 * Resolve Type = RESOLVE_PARTIAL or RESOLVE_FULL, this bit (8 Pixel
195 * Dispatch or Dual-8 Pixel Dispatch Enable) must be DISABLED."
197 if (info
->rt_clear_enable
|| info
->rt_resolve_enable
)
198 dispatch_modes
&= ~GEN6_PS_DISPATCH_8
;
200 assert(dispatch_modes
);
202 return dispatch_modes
;
206 ps_get_gen6_thread_count(const struct ilo_dev
*dev
,
207 const struct ilo_state_ps_info
*info
)
209 uint16_t thread_count
;
211 ILO_DEV_ASSERT(dev
, 6, 8);
213 /* Maximum Number of Threads of 3DSTATE_PS */
214 switch (ilo_dev_gen(dev
)) {
216 /* scaled automatically */
217 thread_count
= 64 - 1;
220 thread_count
= (dev
->gt
== 3) ? 408 :
221 (dev
->gt
== 2) ? 204 : 102;
224 thread_count
= (dev
->gt
== 2) ? 172 : 48;
228 /* from the classic driver instead of the PRM */
229 thread_count
= (dev
->gt
== 2) ? 80 : 40;
233 return thread_count
- 1;
237 ps_params_get_gen6_kill_pixel(const struct ilo_dev
*dev
,
238 const struct ilo_state_ps_params_info
*params
,
239 const struct ilo_state_ps_dispatch_conds
*conds
)
241 ILO_DEV_ASSERT(dev
, 6, 8);
244 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
246 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
247 * PS kernel or color calculator has the ability to kill (discard)
248 * pixels or samples, other than due to depth or stencil testing.
249 * This bit is required to be ENABLED in the following situations:
251 * The API pixel shader program contains "killpix" or "discard"
252 * instructions, or other code in the pixel shader kernel that can
253 * cause the final pixel mask to differ from the pixel mask received
256 * A sampler with chroma key enabled with kill pixel mode is used by
259 * Any render target has Alpha Test Enable or AlphaToCoverage Enable
262 * The pixel shader kernel generates and outputs oMask.
264 * Note: As ClipDistance clipping is fully supported in hardware and
265 * therefore not via PS instructions, there should be no need to
266 * ENABLE this bit due to ClipDistance clipping."
268 return (conds
->ps_may_kill
|| params
->alpha_may_kill
);
272 ps_params_get_gen6_dispatch_enable(const struct ilo_dev
*dev
,
273 const struct ilo_state_ps_params_info
*params
,
274 const struct ilo_state_ps_dispatch_conds
*conds
)
277 * We want to skip dispatching when EarlyZ suffices. The conditions that
278 * require dispatching are
280 * - PS writes RTs and RTs are writeable
281 * - PS changes depth value and depth test/write is enabled
282 * - PS changes stencil value and stencil test is enabled
284 * - PS or CC kills pixels
285 * - EDSC is PSEXEC, and depth test/write or stencil test is enabled
287 bool dispatch_required
=
288 ((conds
->has_rt_write
&& params
->has_writeable_rt
) ||
289 conds
->write_odepth
||
290 conds
->write_ostencil
||
291 conds
->has_uav_write
||
292 ps_params_get_gen6_kill_pixel(dev
, params
, conds
) ||
293 params
->earlyz_control_psexec
);
295 ILO_DEV_ASSERT(dev
, 6, 8);
298 * From the Ivy Bridge PRM, volume 2 part 1, page 280:
300 * "If EDSC_PSEXEC mode is selected, Thread Dispatch Enable must be
303 if (ilo_dev_gen(dev
) < ILO_GEN(8) && params
->earlyz_control_psexec
)
304 dispatch_required
= true;
306 /* assert it is valid to dispatch */
307 if (dispatch_required
)
308 assert(conds
->ps_valid
);
310 return dispatch_required
;
314 ps_get_gen6_ff_kernels(const struct ilo_dev
*dev
,
315 const struct ilo_state_ps_info
*info
,
318 const struct ilo_state_shader_kernel_info
*kernel_8
= &info
->kernel_8
;
319 const struct ilo_state_shader_kernel_info
*kernel_16
= &info
->kernel_16
;
320 const struct ilo_state_shader_kernel_info
*kernel_32
= &info
->kernel_32
;
322 ILO_DEV_ASSERT(dev
, 6, 8);
324 ff
->dispatch_modes
= ps_get_gen6_dispatch_modes(dev
, info
);
326 /* initialize kernel offsets and GRF starts */
327 if (util_is_power_of_two(ff
->dispatch_modes
)) {
328 if (ff
->dispatch_modes
& GEN6_PS_DISPATCH_8
) {
329 ff
->kernel_offsets
[0] = kernel_8
->offset
;
330 ff
->grf_starts
[0] = kernel_8
->grf_start
;
331 } else if (ff
->dispatch_modes
& GEN6_PS_DISPATCH_16
) {
332 ff
->kernel_offsets
[0] = kernel_16
->offset
;
333 ff
->grf_starts
[0] = kernel_16
->grf_start
;
334 } else if (ff
->dispatch_modes
& GEN6_PS_DISPATCH_32
) {
335 ff
->kernel_offsets
[0] = kernel_32
->offset
;
336 ff
->grf_starts
[0] = kernel_32
->grf_start
;
339 ff
->kernel_offsets
[0] = kernel_8
->offset
;
340 ff
->kernel_offsets
[1] = kernel_32
->offset
;
341 ff
->kernel_offsets
[2] = kernel_16
->offset
;
343 ff
->grf_starts
[0] = kernel_8
->grf_start
;
344 ff
->grf_starts
[1] = kernel_32
->grf_start
;
345 ff
->grf_starts
[2] = kernel_16
->grf_start
;
348 /* we do not want to save it */
349 assert(ff
->kernel_offsets
[0] == 0);
351 ff
->pcb_enable
= (((ff
->dispatch_modes
& GEN6_PS_DISPATCH_8
) &&
352 kernel_8
->pcb_attr_count
) ||
353 ((ff
->dispatch_modes
& GEN6_PS_DISPATCH_16
) &&
354 kernel_16
->pcb_attr_count
) ||
355 ((ff
->dispatch_modes
& GEN6_PS_DISPATCH_32
) &&
356 kernel_32
->pcb_attr_count
));
358 /* GPU hangs on Haswell if none of the dispatch mode bits is set */
359 if (ilo_dev_gen(dev
) == ILO_GEN(7.5) && !ff
->dispatch_modes
)
360 ff
->dispatch_modes
|= GEN6_PS_DISPATCH_8
;
366 ps_get_gen6_ff(const struct ilo_dev
*dev
,
367 const struct ilo_state_ps_info
*info
,
370 const struct ilo_state_shader_resource_info
*resource
= &info
->resource
;
371 const struct ilo_state_ps_io_info
*io
= &info
->io
;
372 const struct ilo_state_ps_params_info
*params
= &info
->params
;
374 ILO_DEV_ASSERT(dev
, 6, 8);
376 memset(ff
, 0, sizeof(*ff
));
378 if (!ps_validate_gen6(dev
, info
) || !ps_get_gen6_ff_kernels(dev
, info
, ff
))
381 if (info
->per_thread_scratch_size
) {
383 * From the Sandy Bridge PRM, volume 2 part 1, page 271:
385 * "(Per-Thread Scratch Space)
386 * Range [0,11] indicating [1k bytes, 2M bytes] in powers of two"
388 assert(info
->per_thread_scratch_size
<= 2 * 1024 * 1024);
390 /* next power of two, starting from 1KB */
391 ff
->per_thread_scratch_space
= (info
->per_thread_scratch_size
> 1024) ?
392 (util_last_bit(info
->per_thread_scratch_size
- 1) - 10) : 0;
393 ff
->per_thread_scratch_size
= 1 << (10 + ff
->per_thread_scratch_space
);
396 ff
->sampler_count
= (resource
->sampler_count
<= 12) ?
397 (resource
->sampler_count
+ 3) / 4 : 4;
398 ff
->surface_count
= resource
->surface_count
;
399 ff
->has_uav
= resource
->has_uav
;
401 ff
->thread_count
= ps_get_gen6_thread_count(dev
, info
);
403 ff
->conds
.ps_valid
= (info
->valid_kernels
!= 0x0);
404 ff
->conds
.has_rt_write
= io
->has_rt_write
;
405 ff
->conds
.write_odepth
= (io
->pscdepth
!= GEN7_PSCDEPTH_OFF
);
406 ff
->conds
.write_ostencil
= false;
407 ff
->conds
.has_uav_write
= resource
->has_uav
;
408 ff
->conds
.ps_may_kill
= (io
->write_pixel_mask
|| io
->write_omask
);
410 ff
->kill_pixel
= ps_params_get_gen6_kill_pixel(dev
, params
, &ff
->conds
);
411 ff
->dispatch_enable
=
412 ps_params_get_gen6_dispatch_enable(dev
, params
, &ff
->conds
);
413 ff
->dual_source_blending
= params
->dual_source_blending
;
414 ff
->sample_mask
= params
->sample_mask
;
420 ps_set_gen6_3dstate_wm(struct ilo_state_ps
*ps
,
421 const struct ilo_dev
*dev
,
422 const struct ilo_state_ps_info
*info
,
423 const struct pixel_ff
*ff
)
425 const struct ilo_state_ps_io_info
*io
= &info
->io
;
426 uint32_t dw2
, dw3
, dw4
, dw5
, dw6
;
428 ILO_DEV_ASSERT(dev
, 6, 6);
430 dw2
= ff
->sampler_count
<< GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
|
431 ff
->surface_count
<< GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT
;
434 dw2
|= GEN6_THREADDISP_FP_MODE_ALT
;
436 dw3
= ff
->per_thread_scratch_space
<<
437 GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT
;
439 dw4
= ff
->grf_starts
[0] << GEN6_WM_DW4_URB_GRF_START0__SHIFT
|
440 ff
->grf_starts
[1] << GEN6_WM_DW4_URB_GRF_START1__SHIFT
|
441 ff
->grf_starts
[2] << GEN6_WM_DW4_URB_GRF_START2__SHIFT
;
443 dw5
= ff
->thread_count
<< GEN6_WM_DW5_MAX_THREADS__SHIFT
|
444 ff
->dispatch_modes
<< GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT
;
447 dw5
|= GEN6_WM_DW5_PS_KILL_PIXEL
;
449 if (io
->pscdepth
!= GEN7_PSCDEPTH_OFF
)
450 dw5
|= GEN6_WM_DW5_PS_COMPUTE_DEPTH
;
452 dw5
|= GEN6_WM_DW5_PS_USE_DEPTH
;
454 if (ff
->dispatch_enable
)
455 dw5
|= GEN6_WM_DW5_PS_DISPATCH_ENABLE
;
458 dw5
|= GEN6_WM_DW5_PS_COMPUTE_OMASK
;
460 dw5
|= GEN6_WM_DW5_PS_USE_W
;
462 if (ff
->dual_source_blending
)
463 dw5
|= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND
;
465 dw6
= io
->attr_count
<< GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT
|
466 io
->posoffset
<< GEN6_WM_DW6_PS_POSOFFSET__SHIFT
;
468 dw6
|= (info
->per_sample_dispatch
) ?
469 GEN6_WM_DW6_MSDISPMODE_PERSAMPLE
: GEN6_WM_DW6_MSDISPMODE_PERPIXEL
;
471 STATIC_ASSERT(ARRAY_SIZE(ps
->ps
) >= 7);
477 ps
->ps
[5] = ff
->kernel_offsets
[1];
478 ps
->ps
[6] = ff
->kernel_offsets
[2];
484 ps_set_gen7_3dstate_wm(struct ilo_state_ps
*ps
,
485 const struct ilo_dev
*dev
,
486 const struct ilo_state_ps_info
*info
,
487 const struct pixel_ff
*ff
)
489 const struct ilo_state_ps_io_info
*io
= &info
->io
;
492 ILO_DEV_ASSERT(dev
, 7, 7.5);
494 dw1
= io
->pscdepth
<< GEN7_WM_DW1_PSCDEPTH__SHIFT
;
496 if (ff
->dispatch_enable
)
497 dw1
|= GEN7_WM_DW1_PS_DISPATCH_ENABLE
;
499 dw1
|= GEN7_WM_DW1_PS_KILL_PIXEL
;
502 dw1
|= GEN7_WM_DW1_PS_USE_DEPTH
;
504 dw1
|= GEN7_WM_DW1_PS_USE_W
;
505 if (io
->use_coverage_mask
)
506 dw1
|= GEN7_WM_DW1_PS_USE_COVERAGE_MASK
;
508 dw2
= (info
->per_sample_dispatch
) ?
509 GEN7_WM_DW2_MSDISPMODE_PERSAMPLE
: GEN7_WM_DW2_MSDISPMODE_PERPIXEL
;
511 STATIC_ASSERT(ARRAY_SIZE(ps
->ps
) >= 2);
519 ps_set_gen7_3DSTATE_PS(struct ilo_state_ps
*ps
,
520 const struct ilo_dev
*dev
,
521 const struct ilo_state_ps_info
*info
,
522 const struct pixel_ff
*ff
)
524 const struct ilo_state_ps_io_info
*io
= &info
->io
;
525 uint32_t dw2
, dw3
, dw4
, dw5
;
527 ILO_DEV_ASSERT(dev
, 7, 7.5);
529 dw2
= ff
->sampler_count
<< GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
|
530 ff
->surface_count
<< GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT
;
533 dw2
|= GEN6_THREADDISP_FP_MODE_ALT
;
535 dw3
= ff
->per_thread_scratch_space
<<
536 GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT
;
538 dw4
= io
->posoffset
<< GEN7_PS_DW4_POSOFFSET__SHIFT
|
539 ff
->dispatch_modes
<< GEN7_PS_DW4_DISPATCH_MODE__SHIFT
;
541 if (ilo_dev_gen(dev
) == ILO_GEN(7.5)) {
542 dw4
|= ff
->thread_count
<< GEN75_PS_DW4_MAX_THREADS__SHIFT
|
543 (ff
->sample_mask
& 0xff) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT
;
545 dw4
|= ff
->thread_count
<< GEN7_PS_DW4_MAX_THREADS__SHIFT
;
549 dw4
|= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE
;
551 dw4
|= GEN7_PS_DW4_ATTR_ENABLE
;
553 dw4
|= GEN7_PS_DW4_COMPUTE_OMASK
;
554 if (info
->rt_clear_enable
)
555 dw4
|= GEN7_PS_DW4_RT_FAST_CLEAR
;
556 if (ff
->dual_source_blending
)
557 dw4
|= GEN7_PS_DW4_DUAL_SOURCE_BLEND
;
558 if (info
->rt_resolve_enable
)
559 dw4
|= GEN7_PS_DW4_RT_RESOLVE
;
560 if (ilo_dev_gen(dev
) >= ILO_GEN(7.5) && ff
->has_uav
)
561 dw4
|= GEN75_PS_DW4_ACCESS_UAV
;
563 dw5
= ff
->grf_starts
[0] << GEN7_PS_DW5_URB_GRF_START0__SHIFT
|
564 ff
->grf_starts
[1] << GEN7_PS_DW5_URB_GRF_START1__SHIFT
|
565 ff
->grf_starts
[2] << GEN7_PS_DW5_URB_GRF_START2__SHIFT
;
567 STATIC_ASSERT(ARRAY_SIZE(ps
->ps
) >= 8);
572 ps
->ps
[6] = ff
->kernel_offsets
[1];
573 ps
->ps
[7] = ff
->kernel_offsets
[2];
579 ps_set_gen8_3DSTATE_PS(struct ilo_state_ps
*ps
,
580 const struct ilo_dev
*dev
,
581 const struct ilo_state_ps_info
*info
,
582 const struct pixel_ff
*ff
)
584 const struct ilo_state_ps_io_info
*io
= &info
->io
;
585 uint32_t dw3
, dw4
, dw6
, dw7
;
587 ILO_DEV_ASSERT(dev
, 8, 8);
590 * Set VME here for correct computation of LODs and others. Not sure why
593 dw3
= GEN6_THREADDISP_VME
|
594 ff
->sampler_count
<< GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
|
595 ff
->surface_count
<< GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT
;
598 dw3
|= GEN6_THREADDISP_FP_MODE_ALT
;
600 dw4
= ff
->per_thread_scratch_space
<<
601 GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT
;
603 dw6
= ff
->thread_count
<< GEN8_PS_DW6_MAX_THREADS__SHIFT
|
604 io
->posoffset
<< GEN8_PS_DW6_POSOFFSET__SHIFT
|
605 ff
->dispatch_modes
<< GEN8_PS_DW6_DISPATCH_MODE__SHIFT
;
608 dw6
|= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE
;
610 if (info
->rt_clear_enable
)
611 dw6
|= GEN8_PS_DW6_RT_FAST_CLEAR
;
612 if (info
->rt_resolve_enable
)
613 dw6
|= GEN8_PS_DW6_RT_RESOLVE
;
615 dw7
= ff
->grf_starts
[0] << GEN8_PS_DW7_URB_GRF_START0__SHIFT
|
616 ff
->grf_starts
[1] << GEN8_PS_DW7_URB_GRF_START1__SHIFT
|
617 ff
->grf_starts
[2] << GEN8_PS_DW7_URB_GRF_START2__SHIFT
;
619 STATIC_ASSERT(ARRAY_SIZE(ps
->ps
) >= 6);
624 ps
->ps
[4] = ff
->kernel_offsets
[1];
625 ps
->ps
[5] = ff
->kernel_offsets
[2];
631 ps_set_gen8_3DSTATE_PS_EXTRA(struct ilo_state_ps
*ps
,
632 const struct ilo_dev
*dev
,
633 const struct ilo_state_ps_info
*info
,
634 const struct pixel_ff
*ff
)
636 const struct ilo_state_ps_io_info
*io
= &info
->io
;
639 ILO_DEV_ASSERT(dev
, 8, 8);
641 dw1
= io
->pscdepth
<< GEN8_PSX_DW1_PSCDEPTH__SHIFT
;
643 if (info
->valid_kernels
)
644 dw1
|= GEN8_PSX_DW1_VALID
;
645 if (!io
->has_rt_write
)
646 dw1
|= GEN8_PSX_DW1_UAV_ONLY
;
648 dw1
|= GEN8_PSX_DW1_COMPUTE_OMASK
;
649 if (io
->write_pixel_mask
)
650 dw1
|= GEN8_PSX_DW1_KILL_PIXEL
;
653 dw1
|= GEN8_PSX_DW1_USE_DEPTH
;
655 dw1
|= GEN8_PSX_DW1_USE_W
;
657 dw1
|= GEN8_PSX_DW1_ATTR_ENABLE
;
659 if (info
->per_sample_dispatch
)
660 dw1
|= GEN8_PSX_DW1_PER_SAMPLE
;
662 dw1
|= GEN8_PSX_DW1_ACCESS_UAV
;
663 if (io
->use_coverage_mask
)
664 dw1
|= GEN8_PSX_DW1_USE_COVERAGE_MASK
;
667 * From the Broadwell PRM, volume 2b, page 151:
669 * "When this bit (Pixel Shader Valid) clear the rest of this command
670 * should also be clear.
672 if (!info
->valid_kernels
)
675 STATIC_ASSERT(ARRAY_SIZE(ps
->ps
) >= 5);
682 ilo_state_ps_init(struct ilo_state_ps
*ps
,
683 const struct ilo_dev
*dev
,
684 const struct ilo_state_ps_info
*info
)
689 assert(ilo_is_zeroed(ps
, sizeof(*ps
)));
691 ret
&= ps_get_gen6_ff(dev
, info
, &ff
);
693 if (ilo_dev_gen(dev
) >= ILO_GEN(8)) {
694 ret
&= ps_set_gen8_3DSTATE_PS(ps
, dev
, info
, &ff
);
695 ret
&= ps_set_gen8_3DSTATE_PS_EXTRA(ps
, dev
, info
, &ff
);
696 } else if (ilo_dev_gen(dev
) >= ILO_GEN(7)) {
697 ret
&= ps_set_gen7_3dstate_wm(ps
, dev
, info
, &ff
);
698 ret
&= ps_set_gen7_3DSTATE_PS(ps
, dev
, info
, &ff
);
700 ret
&= ps_set_gen6_3dstate_wm(ps
, dev
, info
, &ff
);
703 ps
->scratch_size
= ff
.per_thread_scratch_size
* ff
.thread_count
;
704 /* save conditions */
705 ps
->conds
= ff
.conds
;
713 ilo_state_ps_init_disabled(struct ilo_state_ps
*ps
,
714 const struct ilo_dev
*dev
)
716 struct ilo_state_ps_info info
;
718 memset(&info
, 0, sizeof(info
));
720 return ilo_state_ps_init(ps
, dev
, &info
);
724 ilo_state_ps_set_params(struct ilo_state_ps
*ps
,
725 const struct ilo_dev
*dev
,
726 const struct ilo_state_ps_params_info
*params
)
728 ILO_DEV_ASSERT(dev
, 6, 8);
730 /* modify sample mask */
731 if (ilo_dev_gen(dev
) == ILO_GEN(7.5)) {
732 ps
->ps
[4] = (ps
->ps
[4] & ~GEN75_PS_DW4_SAMPLE_MASK__MASK
) |
733 (params
->sample_mask
& 0xff) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT
;
736 /* modify dispatch enable, pixel kill, and dual source blending */
737 if (ilo_dev_gen(dev
) < ILO_GEN(8)) {
738 if (ilo_dev_gen(dev
) >= ILO_GEN(7)) {
739 if (ps_params_get_gen6_dispatch_enable(dev
, params
, &ps
->conds
))
740 ps
->ps
[0] |= GEN7_WM_DW1_PS_DISPATCH_ENABLE
;
742 ps
->ps
[0] &= ~GEN7_WM_DW1_PS_DISPATCH_ENABLE
;
744 if (ps_params_get_gen6_kill_pixel(dev
, params
, &ps
->conds
))
745 ps
->ps
[0] |= GEN7_WM_DW1_PS_KILL_PIXEL
;
747 ps
->ps
[0] &= ~GEN7_WM_DW1_PS_KILL_PIXEL
;
749 if (params
->dual_source_blending
)
750 ps
->ps
[4] |= GEN7_PS_DW4_DUAL_SOURCE_BLEND
;
752 ps
->ps
[4] &= ~GEN7_PS_DW4_DUAL_SOURCE_BLEND
;
754 if (ps_params_get_gen6_dispatch_enable(dev
, params
, &ps
->conds
))
755 ps
->ps
[3] |= GEN6_WM_DW5_PS_DISPATCH_ENABLE
;
757 ps
->ps
[3] &= ~GEN6_WM_DW5_PS_DISPATCH_ENABLE
;
759 if (ps_params_get_gen6_kill_pixel(dev
, params
, &ps
->conds
))
760 ps
->ps
[3] |= GEN6_WM_DW5_PS_KILL_PIXEL
;
762 ps
->ps
[3] &= ~GEN6_WM_DW5_PS_KILL_PIXEL
;
764 if (params
->dual_source_blending
)
765 ps
->ps
[3] |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND
;
767 ps
->ps
[3] &= ~GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND
;