2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2015 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "ilo_debug.h"
29 #include "ilo_state_shader.h"
32 uint8_t dispatch_modes
;
34 uint32_t kernel_offsets
[3];
35 uint8_t grf_starts
[3];
37 uint8_t scratch_space
;
39 uint8_t sampler_count
;
40 uint8_t surface_count
;
43 uint16_t thread_count
;
45 struct ilo_state_ps_dispatch_conds conds
;
49 bool dual_source_blending
;
54 ps_kernel_validate_gen6(const struct ilo_dev
*dev
,
55 const struct ilo_state_shader_kernel_info
*kernel
)
57 /* "Dispatch GRF Start Register for Constant/Setup Data" is U7 */
58 const uint8_t max_grf_start
= 128;
60 * From the Sandy Bridge PRM, volume 2 part 1, page 271:
62 * "(Per-Thread Scratch Space)
63 * Range [0,11] indicating [1k bytes, 2M bytes] in powers of two"
65 const uint32_t max_scratch_size
= 2 * 1024 * 1024;
67 ILO_DEV_ASSERT(dev
, 6, 8);
69 /* "Kernel Start Pointer" is 64-byte aligned */
70 assert(kernel
->offset
% 64 == 0);
72 assert(kernel
->grf_start
< max_grf_start
);
73 assert(kernel
->scratch_size
<= max_scratch_size
);
79 ps_validate_gen6(const struct ilo_dev
*dev
,
80 const struct ilo_state_ps_info
*info
)
82 const struct ilo_state_shader_kernel_info
*kernel_8
= &info
->kernel_8
;
83 const struct ilo_state_shader_kernel_info
*kernel_16
= &info
->kernel_16
;
84 const struct ilo_state_shader_kernel_info
*kernel_32
= &info
->kernel_32
;
85 const struct ilo_state_ps_io_info
*io
= &info
->io
;
87 ILO_DEV_ASSERT(dev
, 6, 8);
89 if (!ps_kernel_validate_gen6(dev
, kernel_8
) ||
90 !ps_kernel_validate_gen6(dev
, kernel_16
) ||
91 !ps_kernel_validate_gen6(dev
, kernel_32
))
94 /* unsupported on Gen6 */
95 if (ilo_dev_gen(dev
) == ILO_GEN(6))
96 assert(!io
->use_coverage_mask
);
99 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
101 * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
102 * field must be set to disabled."
104 if (ilo_dev_gen(dev
) == ILO_GEN(6) && io
->pscdepth
!= GEN7_PSCDEPTH_OFF
)
105 assert(info
->cv_has_depth_buffer
);
107 if (!info
->per_sample_dispatch
) {
109 * From the Sandy Bridge PRM, volume 2 part 1, page 281:
111 * "MSDISPMODE_PERSAMPLE is required in order to select
114 assert(io
->posoffset
!= GEN6_POSOFFSET_SAMPLE
);
117 * From the Sandy Bridge PRM, volume 2 part 1, page 282:
119 * "MSDISPMODE_PERSAMPLE is required in order to select
122 * From the Sandy Bridge PRM, volume 2 part 1, page 283:
124 * "MSDISPMODE_PERSAMPLE is required in order to select Perspective
125 * Sample or Non-perspective Sample barycentric coordinates."
127 assert(!info
->cv_per_sample_interp
);
132 * From the Sandy Bridge PRM, volume 2 part 1, page 314:
134 * "Pixel Shader Dispatch, Alpha... must all be disabled."
136 * Simply disallow any valid kernel when there is early-z op. Also, when
137 * there is no valid kernel, io should be zeroed.
139 if (info
->valid_kernels
)
140 assert(!info
->cv_has_earlyz_op
);
142 assert(ilo_is_zeroed(io
, sizeof(*io
)));
148 ps_get_gen6_dispatch_modes(const struct ilo_dev
*dev
,
149 const struct ilo_state_ps_info
*info
)
151 const struct ilo_state_ps_io_info
*io
= &info
->io
;
152 uint8_t dispatch_modes
= info
->valid_kernels
;
154 ILO_DEV_ASSERT(dev
, 6, 8);
160 * From the Sandy Bridge PRM, volume 2 part 1, page 334:
162 * "Not valid on [DevSNB] if 4x PERPIXEL mode with pixel shader
165 * "Valid on all products, except when in non-1x PERSAMPLE mode
166 * (applies to [DevSNB+] only)"
168 * From the Sandy Bridge PRM, volume 4 part 1, page 239:
170 * "[DevSNB]: When Pixel Shader outputs oDepth and PS invocation mode
171 * is PERPIXEL, Message Type for Render Target Write must be SIMD8.
173 * Errata: [DevSNB+]: When Pixel Shader outputs oMask, this message
174 * type is not supported: SIMD8 (including SIMD8_DUALSRC_xx)."
176 * It is really hard to follow what combinations are valid on what
177 * platforms. Judging from the restrictions on RT write messages on Gen6,
178 * oDepth and oMask related issues should be Gen6-specific. PERSAMPLE
179 * issue should be universal, and disallows multiple dispatch modes.
181 if (ilo_dev_gen(dev
) == ILO_GEN(6)) {
182 if (io
->pscdepth
!= GEN7_PSCDEPTH_OFF
&& !info
->per_sample_dispatch
)
183 dispatch_modes
&= GEN6_PS_DISPATCH_8
;
185 dispatch_modes
&= ~GEN6_PS_DISPATCH_8
;
187 if (info
->per_sample_dispatch
&& !info
->sample_count_one
) {
188 /* prefer 32 over 16 over 8 */
189 if (dispatch_modes
& GEN6_PS_DISPATCH_32
)
190 dispatch_modes
&= GEN6_PS_DISPATCH_32
;
191 else if (dispatch_modes
& GEN6_PS_DISPATCH_16
)
192 dispatch_modes
&= GEN6_PS_DISPATCH_16
;
194 dispatch_modes
&= GEN6_PS_DISPATCH_8
;
198 * From the Broadwell PRM, volume 2b, page 149:
200 * "When Render Target Fast Clear Enable is ENABLED or Render Target
201 * Resolve Type = RESOLVE_PARTIAL or RESOLVE_FULL, this bit (8 Pixel
202 * Dispatch or Dual-8 Pixel Dispatch Enable) must be DISABLED."
204 if (info
->rt_clear_enable
|| info
->rt_resolve_enable
)
205 dispatch_modes
&= ~GEN6_PS_DISPATCH_8
;
207 assert(dispatch_modes
);
209 return dispatch_modes
;
213 ps_get_gen6_thread_count(const struct ilo_dev
*dev
,
214 const struct ilo_state_ps_info
*info
)
216 uint16_t thread_count
;
218 ILO_DEV_ASSERT(dev
, 6, 8);
220 /* Maximum Number of Threads of 3DSTATE_PS */
221 switch (ilo_dev_gen(dev
)) {
223 /* scaled automatically */
224 thread_count
= 64 - 1;
227 thread_count
= (dev
->gt
== 3) ? 408 :
228 (dev
->gt
== 2) ? 204 : 102;
231 thread_count
= (dev
->gt
== 2) ? 172 : 48;
235 /* from the classic driver instead of the PRM */
236 thread_count
= (dev
->gt
== 2) ? 80 : 40;
240 return thread_count
- 1;
244 ps_params_get_gen6_kill_pixel(const struct ilo_dev
*dev
,
245 const struct ilo_state_ps_params_info
*params
,
246 const struct ilo_state_ps_dispatch_conds
*conds
)
248 ILO_DEV_ASSERT(dev
, 6, 8);
251 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
253 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
254 * PS kernel or color calculator has the ability to kill (discard)
255 * pixels or samples, other than due to depth or stencil testing.
256 * This bit is required to be ENABLED in the following situations:
258 * The API pixel shader program contains "killpix" or "discard"
259 * instructions, or other code in the pixel shader kernel that can
260 * cause the final pixel mask to differ from the pixel mask received
263 * A sampler with chroma key enabled with kill pixel mode is used by
266 * Any render target has Alpha Test Enable or AlphaToCoverage Enable
269 * The pixel shader kernel generates and outputs oMask.
271 * Note: As ClipDistance clipping is fully supported in hardware and
272 * therefore not via PS instructions, there should be no need to
273 * ENABLE this bit due to ClipDistance clipping."
275 return (conds
->ps_may_kill
|| params
->alpha_may_kill
);
279 ps_params_get_gen6_dispatch_enable(const struct ilo_dev
*dev
,
280 const struct ilo_state_ps_params_info
*params
,
281 const struct ilo_state_ps_dispatch_conds
*conds
)
284 * We want to skip dispatching when EarlyZ suffices. The conditions that
285 * require dispatching are
287 * - PS writes RTs and RTs are writeable
288 * - PS changes depth value and depth test/write is enabled
289 * - PS changes stencil value and stencil test is enabled
291 * - PS or CC kills pixels
292 * - EDSC is PSEXEC, and depth test/write or stencil test is enabled
294 bool dispatch_required
=
295 ((conds
->has_rt_write
&& params
->has_writeable_rt
) ||
296 conds
->write_odepth
||
297 conds
->write_ostencil
||
298 conds
->has_uav_write
||
299 ps_params_get_gen6_kill_pixel(dev
, params
, conds
) ||
300 params
->earlyz_control_psexec
);
302 ILO_DEV_ASSERT(dev
, 6, 8);
305 * From the Ivy Bridge PRM, volume 2 part 1, page 280:
307 * "If EDSC_PSEXEC mode is selected, Thread Dispatch Enable must be
310 if (ilo_dev_gen(dev
) < ILO_GEN(8) && params
->earlyz_control_psexec
)
311 dispatch_required
= true;
313 /* assert it is valid to dispatch */
314 if (dispatch_required
)
315 assert(conds
->ps_valid
);
317 return dispatch_required
;
321 ps_get_gen6_ff_kernels(const struct ilo_dev
*dev
,
322 const struct ilo_state_ps_info
*info
,
325 const struct ilo_state_shader_kernel_info
*kernel_8
= &info
->kernel_8
;
326 const struct ilo_state_shader_kernel_info
*kernel_16
= &info
->kernel_16
;
327 const struct ilo_state_shader_kernel_info
*kernel_32
= &info
->kernel_32
;
328 uint32_t scratch_size
;
330 ILO_DEV_ASSERT(dev
, 6, 8);
332 ff
->dispatch_modes
= ps_get_gen6_dispatch_modes(dev
, info
);
334 /* initialize kernel offsets and GRF starts */
335 if (util_is_power_of_two(ff
->dispatch_modes
)) {
336 if (ff
->dispatch_modes
& GEN6_PS_DISPATCH_8
) {
337 ff
->kernel_offsets
[0] = kernel_8
->offset
;
338 ff
->grf_starts
[0] = kernel_8
->grf_start
;
339 } else if (ff
->dispatch_modes
& GEN6_PS_DISPATCH_16
) {
340 ff
->kernel_offsets
[0] = kernel_16
->offset
;
341 ff
->grf_starts
[0] = kernel_16
->grf_start
;
342 } else if (ff
->dispatch_modes
& GEN6_PS_DISPATCH_32
) {
343 ff
->kernel_offsets
[0] = kernel_32
->offset
;
344 ff
->grf_starts
[0] = kernel_32
->grf_start
;
347 ff
->kernel_offsets
[0] = kernel_8
->offset
;
348 ff
->kernel_offsets
[1] = kernel_32
->offset
;
349 ff
->kernel_offsets
[2] = kernel_16
->offset
;
351 ff
->grf_starts
[0] = kernel_8
->grf_start
;
352 ff
->grf_starts
[1] = kernel_32
->grf_start
;
353 ff
->grf_starts
[2] = kernel_16
->grf_start
;
356 /* we do not want to save it */
357 assert(ff
->kernel_offsets
[0] == 0);
359 ff
->pcb_enable
= (((ff
->dispatch_modes
& GEN6_PS_DISPATCH_8
) &&
360 kernel_8
->pcb_attr_count
) ||
361 ((ff
->dispatch_modes
& GEN6_PS_DISPATCH_16
) &&
362 kernel_16
->pcb_attr_count
) ||
363 ((ff
->dispatch_modes
& GEN6_PS_DISPATCH_32
) &&
364 kernel_32
->pcb_attr_count
));
367 if ((ff
->dispatch_modes
& GEN6_PS_DISPATCH_8
) &&
368 scratch_size
< kernel_8
->scratch_size
)
369 scratch_size
= kernel_8
->scratch_size
;
370 if ((ff
->dispatch_modes
& GEN6_PS_DISPATCH_16
) &&
371 scratch_size
< kernel_16
->scratch_size
)
372 scratch_size
= kernel_16
->scratch_size
;
373 if ((ff
->dispatch_modes
& GEN6_PS_DISPATCH_32
) &&
374 scratch_size
< kernel_32
->scratch_size
)
375 scratch_size
= kernel_32
->scratch_size
;
377 /* next power of two, starting from 1KB */
378 ff
->scratch_space
= (scratch_size
> 1024) ?
379 (util_last_bit(scratch_size
- 1) - 10): 0;
381 /* GPU hangs on Haswell if none of the dispatch mode bits is set */
382 if (ilo_dev_gen(dev
) == ILO_GEN(7.5) && !ff
->dispatch_modes
)
383 ff
->dispatch_modes
|= GEN6_PS_DISPATCH_8
;
389 ps_get_gen6_ff(const struct ilo_dev
*dev
,
390 const struct ilo_state_ps_info
*info
,
393 const struct ilo_state_shader_resource_info
*resource
= &info
->resource
;
394 const struct ilo_state_ps_io_info
*io
= &info
->io
;
395 const struct ilo_state_ps_params_info
*params
= &info
->params
;
397 ILO_DEV_ASSERT(dev
, 6, 8);
399 memset(ff
, 0, sizeof(*ff
));
401 if (!ps_validate_gen6(dev
, info
) || !ps_get_gen6_ff_kernels(dev
, info
, ff
))
404 ff
->sampler_count
= (resource
->sampler_count
<= 12) ?
405 (resource
->sampler_count
+ 3) / 4 : 4;
406 ff
->surface_count
= resource
->surface_count
;
407 ff
->has_uav
= resource
->has_uav
;
409 ff
->thread_count
= ps_get_gen6_thread_count(dev
, info
);
411 ff
->conds
.ps_valid
= (info
->valid_kernels
!= 0x0);
412 ff
->conds
.has_rt_write
= io
->has_rt_write
;
413 ff
->conds
.write_odepth
= (io
->pscdepth
!= GEN7_PSCDEPTH_OFF
);
414 ff
->conds
.write_ostencil
= false;
415 ff
->conds
.has_uav_write
= resource
->has_uav
;
416 ff
->conds
.ps_may_kill
= (io
->write_pixel_mask
|| io
->write_omask
);
418 ff
->kill_pixel
= ps_params_get_gen6_kill_pixel(dev
, params
, &ff
->conds
);
419 ff
->dispatch_enable
=
420 ps_params_get_gen6_dispatch_enable(dev
, params
, &ff
->conds
);
421 ff
->dual_source_blending
= params
->dual_source_blending
;
422 ff
->sample_mask
= params
->sample_mask
;
428 ps_set_gen6_3dstate_wm(struct ilo_state_ps
*ps
,
429 const struct ilo_dev
*dev
,
430 const struct ilo_state_ps_info
*info
,
431 const struct pixel_ff
*ff
)
433 const struct ilo_state_ps_io_info
*io
= &info
->io
;
434 uint32_t dw2
, dw3
, dw4
, dw5
, dw6
;
436 ILO_DEV_ASSERT(dev
, 6, 6);
438 dw2
= ff
->sampler_count
<< GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
|
439 ff
->surface_count
<< GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT
;
442 dw2
|= GEN6_THREADDISP_FP_MODE_ALT
;
444 dw3
= ff
->scratch_space
<< GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT
;
446 dw4
= ff
->grf_starts
[0] << GEN6_WM_DW4_URB_GRF_START0__SHIFT
|
447 ff
->grf_starts
[1] << GEN6_WM_DW4_URB_GRF_START1__SHIFT
|
448 ff
->grf_starts
[2] << GEN6_WM_DW4_URB_GRF_START2__SHIFT
;
450 dw5
= ff
->thread_count
<< GEN6_WM_DW5_MAX_THREADS__SHIFT
|
451 ff
->dispatch_modes
<< GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT
;
454 dw5
|= GEN6_WM_DW5_PS_KILL_PIXEL
;
456 if (io
->pscdepth
!= GEN7_PSCDEPTH_OFF
)
457 dw5
|= GEN6_WM_DW5_PS_COMPUTE_DEPTH
;
459 dw5
|= GEN6_WM_DW5_PS_USE_DEPTH
;
461 if (ff
->dispatch_enable
)
462 dw5
|= GEN6_WM_DW5_PS_DISPATCH_ENABLE
;
465 dw5
|= GEN6_WM_DW5_PS_COMPUTE_OMASK
;
467 dw5
|= GEN6_WM_DW5_PS_USE_W
;
469 if (ff
->dual_source_blending
)
470 dw5
|= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND
;
472 dw6
= io
->attr_count
<< GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT
|
473 io
->posoffset
<< GEN6_WM_DW6_PS_POSOFFSET__SHIFT
;
475 dw6
|= (info
->per_sample_dispatch
) ?
476 GEN6_WM_DW6_MSDISPMODE_PERSAMPLE
: GEN6_WM_DW6_MSDISPMODE_PERPIXEL
;
478 STATIC_ASSERT(ARRAY_SIZE(ps
->ps
) >= 7);
484 ps
->ps
[5] = ff
->kernel_offsets
[1];
485 ps
->ps
[6] = ff
->kernel_offsets
[2];
491 ps_set_gen7_3dstate_wm(struct ilo_state_ps
*ps
,
492 const struct ilo_dev
*dev
,
493 const struct ilo_state_ps_info
*info
,
494 const struct pixel_ff
*ff
)
496 const struct ilo_state_ps_io_info
*io
= &info
->io
;
499 ILO_DEV_ASSERT(dev
, 7, 7.5);
501 dw1
= io
->pscdepth
<< GEN7_WM_DW1_PSCDEPTH__SHIFT
;
503 if (ff
->dispatch_enable
)
504 dw1
|= GEN7_WM_DW1_PS_DISPATCH_ENABLE
;
506 dw1
|= GEN7_WM_DW1_PS_KILL_PIXEL
;
509 dw1
|= GEN7_WM_DW1_PS_USE_DEPTH
;
511 dw1
|= GEN7_WM_DW1_PS_USE_W
;
512 if (io
->use_coverage_mask
)
513 dw1
|= GEN7_WM_DW1_PS_USE_COVERAGE_MASK
;
515 dw2
= (info
->per_sample_dispatch
) ?
516 GEN7_WM_DW2_MSDISPMODE_PERSAMPLE
: GEN7_WM_DW2_MSDISPMODE_PERPIXEL
;
518 STATIC_ASSERT(ARRAY_SIZE(ps
->ps
) >= 2);
526 ps_set_gen7_3DSTATE_PS(struct ilo_state_ps
*ps
,
527 const struct ilo_dev
*dev
,
528 const struct ilo_state_ps_info
*info
,
529 const struct pixel_ff
*ff
)
531 const struct ilo_state_ps_io_info
*io
= &info
->io
;
532 uint32_t dw2
, dw3
, dw4
, dw5
;
534 ILO_DEV_ASSERT(dev
, 7, 7.5);
536 dw2
= ff
->sampler_count
<< GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
|
537 ff
->surface_count
<< GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT
;
540 dw2
|= GEN6_THREADDISP_FP_MODE_ALT
;
542 dw3
= ff
->scratch_space
<< GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT
;
544 dw4
= io
->posoffset
<< GEN7_PS_DW4_POSOFFSET__SHIFT
|
545 ff
->dispatch_modes
<< GEN7_PS_DW4_DISPATCH_MODE__SHIFT
;
547 if (ilo_dev_gen(dev
) == ILO_GEN(7.5)) {
548 dw4
|= ff
->thread_count
<< GEN75_PS_DW4_MAX_THREADS__SHIFT
|
549 (ff
->sample_mask
& 0xff) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT
;
551 dw4
|= ff
->thread_count
<< GEN7_PS_DW4_MAX_THREADS__SHIFT
;
555 dw4
|= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE
;
557 dw4
|= GEN7_PS_DW4_ATTR_ENABLE
;
559 dw4
|= GEN7_PS_DW4_COMPUTE_OMASK
;
560 if (info
->rt_clear_enable
)
561 dw4
|= GEN7_PS_DW4_RT_FAST_CLEAR
;
562 if (ff
->dual_source_blending
)
563 dw4
|= GEN7_PS_DW4_DUAL_SOURCE_BLEND
;
564 if (info
->rt_resolve_enable
)
565 dw4
|= GEN7_PS_DW4_RT_RESOLVE
;
566 if (ilo_dev_gen(dev
) >= ILO_GEN(7.5) && ff
->has_uav
)
567 dw4
|= GEN75_PS_DW4_ACCESS_UAV
;
569 dw5
= ff
->grf_starts
[0] << GEN7_PS_DW5_URB_GRF_START0__SHIFT
|
570 ff
->grf_starts
[1] << GEN7_PS_DW5_URB_GRF_START1__SHIFT
|
571 ff
->grf_starts
[2] << GEN7_PS_DW5_URB_GRF_START2__SHIFT
;
573 STATIC_ASSERT(ARRAY_SIZE(ps
->ps
) >= 8);
578 ps
->ps
[6] = ff
->kernel_offsets
[1];
579 ps
->ps
[7] = ff
->kernel_offsets
[2];
585 ps_set_gen8_3DSTATE_PS(struct ilo_state_ps
*ps
,
586 const struct ilo_dev
*dev
,
587 const struct ilo_state_ps_info
*info
,
588 const struct pixel_ff
*ff
)
590 const struct ilo_state_ps_io_info
*io
= &info
->io
;
591 uint32_t dw3
, dw4
, dw6
, dw7
;
593 ILO_DEV_ASSERT(dev
, 8, 8);
595 dw3
= ff
->sampler_count
<< GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
|
596 ff
->surface_count
<< GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT
;
599 dw3
|= GEN6_THREADDISP_FP_MODE_ALT
;
601 dw4
= ff
->scratch_space
<< GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT
;
603 dw6
= ff
->thread_count
<< GEN8_PS_DW6_MAX_THREADS__SHIFT
|
604 io
->posoffset
<< GEN8_PS_DW6_POSOFFSET__SHIFT
|
605 ff
->dispatch_modes
<< GEN8_PS_DW6_DISPATCH_MODE__SHIFT
;
608 dw6
|= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE
;
610 if (info
->rt_clear_enable
)
611 dw6
|= GEN8_PS_DW6_RT_FAST_CLEAR
;
612 if (info
->rt_resolve_enable
)
613 dw6
|= GEN8_PS_DW6_RT_RESOLVE
;
615 dw7
= ff
->grf_starts
[0] << GEN8_PS_DW7_URB_GRF_START0__SHIFT
|
616 ff
->grf_starts
[1] << GEN8_PS_DW7_URB_GRF_START1__SHIFT
|
617 ff
->grf_starts
[2] << GEN8_PS_DW7_URB_GRF_START2__SHIFT
;
619 STATIC_ASSERT(ARRAY_SIZE(ps
->ps
) >= 6);
624 ps
->ps
[4] = ff
->kernel_offsets
[1];
625 ps
->ps
[5] = ff
->kernel_offsets
[2];
631 ps_set_gen8_3DSTATE_PS_EXTRA(struct ilo_state_ps
*ps
,
632 const struct ilo_dev
*dev
,
633 const struct ilo_state_ps_info
*info
,
634 const struct pixel_ff
*ff
)
636 const struct ilo_state_ps_io_info
*io
= &info
->io
;
639 ILO_DEV_ASSERT(dev
, 8, 8);
641 dw1
= io
->pscdepth
<< GEN8_PSX_DW1_PSCDEPTH__SHIFT
;
643 if (info
->valid_kernels
)
644 dw1
|= GEN8_PSX_DW1_VALID
;
645 if (!io
->has_rt_write
)
646 dw1
|= GEN8_PSX_DW1_UAV_ONLY
;
648 dw1
|= GEN8_PSX_DW1_COMPUTE_OMASK
;
649 if (io
->write_pixel_mask
)
650 dw1
|= GEN8_PSX_DW1_KILL_PIXEL
;
653 dw1
|= GEN8_PSX_DW1_USE_DEPTH
;
655 dw1
|= GEN8_PSX_DW1_USE_W
;
657 dw1
|= GEN8_PSX_DW1_ATTR_ENABLE
;
659 if (info
->per_sample_dispatch
)
660 dw1
|= GEN8_PSX_DW1_PER_SAMPLE
;
662 dw1
|= GEN8_PSX_DW1_ACCESS_UAV
;
663 if (io
->use_coverage_mask
)
664 dw1
|= GEN8_PSX_DW1_USE_COVERAGE_MASK
;
667 * From the Broadwell PRM, volume 2b, page 151:
669 * "When this bit (Pixel Shader Valid) clear the rest of this command
670 * should also be clear.
672 if (!info
->valid_kernels
)
675 STATIC_ASSERT(ARRAY_SIZE(ps
->ps
) >= 5);
682 ilo_state_ps_init(struct ilo_state_ps
*ps
,
683 const struct ilo_dev
*dev
,
684 const struct ilo_state_ps_info
*info
)
689 assert(ilo_is_zeroed(ps
, sizeof(*ps
)));
691 ret
&= ps_get_gen6_ff(dev
, info
, &ff
);
693 if (ilo_dev_gen(dev
) >= ILO_GEN(8)) {
694 ret
&= ps_set_gen8_3DSTATE_PS(ps
, dev
, info
, &ff
);
695 ret
&= ps_set_gen8_3DSTATE_PS_EXTRA(ps
, dev
, info
, &ff
);
696 } else if (ilo_dev_gen(dev
) >= ILO_GEN(7)) {
697 ret
&= ps_set_gen7_3dstate_wm(ps
, dev
, info
, &ff
);
698 ret
&= ps_set_gen7_3DSTATE_PS(ps
, dev
, info
, &ff
);
700 ret
&= ps_set_gen6_3dstate_wm(ps
, dev
, info
, &ff
);
703 /* save conditions */
704 ps
->conds
= ff
.conds
;
712 ilo_state_ps_init_disabled(struct ilo_state_ps
*ps
,
713 const struct ilo_dev
*dev
)
715 struct ilo_state_ps_info info
;
717 memset(&info
, 0, sizeof(info
));
719 return ilo_state_ps_init(ps
, dev
, &info
);
723 ilo_state_ps_set_params(struct ilo_state_ps
*ps
,
724 const struct ilo_dev
*dev
,
725 const struct ilo_state_ps_params_info
*params
)
727 ILO_DEV_ASSERT(dev
, 6, 8);
729 /* modify sample mask */
730 if (ilo_dev_gen(dev
) == ILO_GEN(7.5)) {
731 ps
->ps
[4] = (ps
->ps
[4] & ~GEN75_PS_DW4_SAMPLE_MASK__MASK
) |
732 (params
->sample_mask
& 0xff) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT
;
735 /* modify dispatch enable, pixel kill, and dual source blending */
736 if (ilo_dev_gen(dev
) < ILO_GEN(8)) {
737 if (ilo_dev_gen(dev
) >= ILO_GEN(7)) {
738 if (ps_params_get_gen6_dispatch_enable(dev
, params
, &ps
->conds
))
739 ps
->ps
[0] |= GEN7_WM_DW1_PS_DISPATCH_ENABLE
;
741 ps
->ps
[0] &= ~GEN7_WM_DW1_PS_DISPATCH_ENABLE
;
743 if (ps_params_get_gen6_kill_pixel(dev
, params
, &ps
->conds
))
744 ps
->ps
[0] |= GEN7_WM_DW1_PS_KILL_PIXEL
;
746 ps
->ps
[0] &= ~GEN7_WM_DW1_PS_KILL_PIXEL
;
748 if (params
->dual_source_blending
)
749 ps
->ps
[4] |= GEN7_PS_DW4_DUAL_SOURCE_BLEND
;
751 ps
->ps
[4] &= ~GEN7_PS_DW4_DUAL_SOURCE_BLEND
;
753 if (ps_params_get_gen6_dispatch_enable(dev
, params
, &ps
->conds
))
754 ps
->ps
[3] |= GEN6_WM_DW5_PS_DISPATCH_ENABLE
;
756 ps
->ps
[3] &= ~GEN6_WM_DW5_PS_DISPATCH_ENABLE
;
758 if (ps_params_get_gen6_kill_pixel(dev
, params
, &ps
->conds
))
759 ps
->ps
[3] |= GEN6_WM_DW5_PS_KILL_PIXEL
;
761 ps
->ps
[3] &= ~GEN6_WM_DW5_PS_KILL_PIXEL
;
763 if (params
->dual_source_blending
)
764 ps
->ps
[3] |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND
;
766 ps
->ps
[3] &= ~GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND
;