2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2014 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "genhw/genhw.h"
29 #include "util/u_dual_blend.h"
30 #include "util/u_framebuffer.h"
31 #include "util/u_half.h"
33 #include "ilo_format.h"
34 #include "ilo_image.h"
35 #include "ilo_state_3d.h"
36 #include "../ilo_shader.h"
39 rasterizer_init_clip(const struct ilo_dev
*dev
,
40 const struct pipe_rasterizer_state
*state
,
41 struct ilo_rasterizer_clip
*clip
)
43 uint32_t dw1
, dw2
, dw3
;
45 ILO_DEV_ASSERT(dev
, 6, 8);
47 dw1
= GEN6_CLIP_DW1_STATISTICS
;
49 if (ilo_dev_gen(dev
) >= ILO_GEN(7)) {
51 * From the Ivy Bridge PRM, volume 2 part 1, page 219:
53 * "Workaround : Due to Hardware issue "EarlyCull" needs to be
54 * enabled only for the cases where the incoming primitive topology
55 * into the clipper guaranteed to be Trilist."
57 * What does this mean?
60 GEN7_CLIP_DW1_EARLY_CULL_ENABLE
;
62 if (ilo_dev_gen(dev
) < ILO_GEN(8)) {
64 dw1
|= GEN7_CLIP_DW1_FRONTWINDING_CCW
;
66 switch (state
->cull_face
) {
68 dw1
|= GEN7_CLIP_DW1_CULLMODE_NONE
;
71 dw1
|= GEN7_CLIP_DW1_CULLMODE_FRONT
;
74 dw1
|= GEN7_CLIP_DW1_CULLMODE_BACK
;
76 case PIPE_FACE_FRONT_AND_BACK
:
77 dw1
|= GEN7_CLIP_DW1_CULLMODE_BOTH
;
83 dw2
= GEN6_CLIP_DW2_CLIP_ENABLE
|
84 GEN6_CLIP_DW2_XY_TEST_ENABLE
|
85 state
->clip_plane_enable
<< GEN6_CLIP_DW2_UCP_CLIP_ENABLES__SHIFT
|
86 GEN6_CLIP_DW2_CLIPMODE_NORMAL
;
88 if (state
->clip_halfz
)
89 dw2
|= GEN6_CLIP_DW2_APIMODE_D3D
;
91 dw2
|= GEN6_CLIP_DW2_APIMODE_OGL
;
93 if (ilo_dev_gen(dev
) < ILO_GEN(8) && state
->depth_clip
)
94 dw2
|= GEN6_CLIP_DW2_Z_TEST_ENABLE
;
96 if (state
->flatshade_first
) {
97 dw2
|= 0 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT
|
98 0 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT
|
99 1 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT
;
102 dw2
|= 2 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT
|
103 1 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT
|
104 2 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT
;
107 dw3
= 0x1 << GEN6_CLIP_DW3_MIN_POINT_WIDTH__SHIFT
|
108 0x7ff << GEN6_CLIP_DW3_MAX_POINT_WIDTH__SHIFT
;
110 clip
->payload
[0] = dw1
;
111 clip
->payload
[1] = dw2
;
112 clip
->payload
[2] = dw3
;
114 clip
->can_enable_guardband
= true;
117 * There are several reasons that guard band test should be disabled
119 * - GL wide points (to avoid partially visibie object)
120 * - GL wide or AA lines (to avoid partially visibie object)
122 if (state
->point_size_per_vertex
|| state
->point_size
> 1.0f
)
123 clip
->can_enable_guardband
= false;
124 if (state
->line_smooth
|| state
->line_width
> 1.0f
)
125 clip
->can_enable_guardband
= false;
129 rasterizer_init_sf_depth_offset_gen6(const struct ilo_dev
*dev
,
130 const struct pipe_rasterizer_state
*state
,
131 struct ilo_rasterizer_sf
*sf
)
133 ILO_DEV_ASSERT(dev
, 6, 8);
136 * Scale the constant term. The minimum representable value used by the HW
137 * is not large enouch to be the minimum resolvable difference.
139 sf
->dw_depth_offset_const
= fui(state
->offset_units
* 2.0f
);
140 sf
->dw_depth_offset_scale
= fui(state
->offset_scale
);
141 sf
->dw_depth_offset_clamp
= fui(state
->offset_clamp
);
145 rasterizer_init_sf_gen6(const struct ilo_dev
*dev
,
146 const struct pipe_rasterizer_state
*state
,
147 struct ilo_rasterizer_sf
*sf
)
149 int line_width
, point_width
;
150 uint32_t dw1
, dw2
, dw3
;
152 ILO_DEV_ASSERT(dev
, 6, 7.5);
155 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
157 * "This bit (Statistics Enable) should be set whenever clipping is
158 * enabled and the Statistics Enable bit is set in CLIP_STATE. It
159 * should be cleared if clipping is disabled or Statistics Enable in
160 * CLIP_STATE is clear."
162 dw1
= GEN7_SF_DW1_STATISTICS
|
163 GEN7_SF_DW1_VIEWPORT_ENABLE
;
165 /* XXX GEN6 path seems to work fine for GEN7 */
166 if (false && ilo_dev_gen(dev
) >= ILO_GEN(7)) {
168 * From the Ivy Bridge PRM, volume 2 part 1, page 258:
170 * "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
171 * Enable Solid , Global Depth Offset Enable Wireframe, and Global
172 * Depth Offset Enable Point) should be set whenever non zero depth
173 * bias (Slope, Bias) values are used. Setting this bit may have
174 * some degradation of performance for some workloads."
176 if (state
->offset_tri
|| state
->offset_line
|| state
->offset_point
) {
177 /* XXX need to scale offset_const according to the depth format */
178 dw1
|= GEN7_SF_DW1_LEGACY_DEPTH_OFFSET
;
180 dw1
|= GEN7_SF_DW1_DEPTH_OFFSET_SOLID
|
181 GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME
|
182 GEN7_SF_DW1_DEPTH_OFFSET_POINT
;
185 if (state
->offset_tri
)
186 dw1
|= GEN7_SF_DW1_DEPTH_OFFSET_SOLID
;
187 if (state
->offset_line
)
188 dw1
|= GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME
;
189 if (state
->offset_point
)
190 dw1
|= GEN7_SF_DW1_DEPTH_OFFSET_POINT
;
193 switch (state
->fill_front
) {
194 case PIPE_POLYGON_MODE_FILL
:
195 dw1
|= GEN7_SF_DW1_FRONTFACE_SOLID
;
197 case PIPE_POLYGON_MODE_LINE
:
198 dw1
|= GEN7_SF_DW1_FRONTFACE_WIREFRAME
;
200 case PIPE_POLYGON_MODE_POINT
:
201 dw1
|= GEN7_SF_DW1_FRONTFACE_POINT
;
205 switch (state
->fill_back
) {
206 case PIPE_POLYGON_MODE_FILL
:
207 dw1
|= GEN7_SF_DW1_BACKFACE_SOLID
;
209 case PIPE_POLYGON_MODE_LINE
:
210 dw1
|= GEN7_SF_DW1_BACKFACE_WIREFRAME
;
212 case PIPE_POLYGON_MODE_POINT
:
213 dw1
|= GEN7_SF_DW1_BACKFACE_POINT
;
217 if (state
->front_ccw
)
218 dw1
|= GEN7_SF_DW1_FRONTWINDING_CCW
;
222 if (state
->line_smooth
) {
224 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
226 * "This field (Anti-aliasing Enable) must be disabled if any of the
227 * render targets have integer (UINT or SINT) surface format."
229 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
231 * "This field (Hierarchical Depth Buffer Enable) must be disabled
232 * if Anti-aliasing Enable in 3DSTATE_SF is enabled.
234 * TODO We do not check those yet.
236 dw2
|= GEN7_SF_DW2_AA_LINE_ENABLE
|
237 GEN7_SF_DW2_AA_LINE_CAP_1_0
;
240 switch (state
->cull_face
) {
242 dw2
|= GEN7_SF_DW2_CULLMODE_NONE
;
244 case PIPE_FACE_FRONT
:
245 dw2
|= GEN7_SF_DW2_CULLMODE_FRONT
;
248 dw2
|= GEN7_SF_DW2_CULLMODE_BACK
;
250 case PIPE_FACE_FRONT_AND_BACK
:
251 dw2
|= GEN7_SF_DW2_CULLMODE_BOTH
;
256 * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
257 * pixels in the minor direction. We have to make the lines slightly
258 * thicker, 0.5 pixel on both sides, so that they intersect that many
259 * pixels are considered into the lines.
261 * Line width is in U3.7.
264 ((state
->line_width
+ (float) state
->line_smooth
) * 128.0f
+ 0.5f
);
265 line_width
= CLAMP(line_width
, 0, 1023);
268 if (line_width
== 128 && !state
->line_smooth
)
271 dw2
|= line_width
<< GEN7_SF_DW2_LINE_WIDTH__SHIFT
;
273 if (ilo_dev_gen(dev
) == ILO_GEN(7.5) && state
->line_stipple_enable
)
274 dw2
|= GEN75_SF_DW2_LINE_STIPPLE_ENABLE
;
277 dw2
|= GEN7_SF_DW2_SCISSOR_ENABLE
;
279 dw3
= GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE
|
280 GEN7_SF_DW3_SUBPIXEL_8BITS
;
282 if (state
->line_last_pixel
)
283 dw3
|= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE
;
285 if (state
->flatshade_first
) {
286 dw3
|= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT
|
287 0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT
|
288 1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT
;
290 dw3
|= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT
|
291 1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT
|
292 2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT
;
295 if (!state
->point_size_per_vertex
)
296 dw3
|= GEN7_SF_DW3_USE_POINT_WIDTH
;
299 point_width
= (int) (state
->point_size
* 8.0f
+ 0.5f
);
300 point_width
= CLAMP(point_width
, 1, 2047);
304 STATIC_ASSERT(Elements(sf
->payload
) >= 3);
305 sf
->payload
[0] = dw1
;
306 sf
->payload
[1] = dw2
;
307 sf
->payload
[2] = dw3
;
309 if (state
->multisample
) {
310 sf
->dw_msaa
= GEN7_SF_DW2_MSRASTMODE_ON_PATTERN
;
313 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
315 * "Software must not program a value of 0.0 when running in
316 * MSRASTMODE_ON_xxx modes - zero-width lines are not available
317 * when multisampling rasterization is enabled."
320 line_width
= 128; /* 1.0f */
322 sf
->dw_msaa
|= line_width
<< GEN7_SF_DW2_LINE_WIDTH__SHIFT
;
328 rasterizer_init_sf_depth_offset_gen6(dev
, state
, sf
);
329 /* 3DSTATE_RASTER is Gen8+ only */
334 rasterizer_get_sf_raster_gen8(const struct ilo_dev
*dev
,
335 const struct pipe_rasterizer_state
*state
)
339 ILO_DEV_ASSERT(dev
, 8, 8);
341 if (state
->front_ccw
)
342 dw
|= GEN8_RASTER_DW1_FRONTWINDING_CCW
;
344 switch (state
->cull_face
) {
346 dw
|= GEN8_RASTER_DW1_CULLMODE_NONE
;
348 case PIPE_FACE_FRONT
:
349 dw
|= GEN8_RASTER_DW1_CULLMODE_FRONT
;
352 dw
|= GEN8_RASTER_DW1_CULLMODE_BACK
;
354 case PIPE_FACE_FRONT_AND_BACK
:
355 dw
|= GEN8_RASTER_DW1_CULLMODE_BOTH
;
359 if (state
->point_smooth
)
360 dw
|= GEN8_RASTER_DW1_SMOOTH_POINT_ENABLE
;
362 if (state
->multisample
)
363 dw
|= GEN8_RASTER_DW1_API_MULTISAMPLE_ENABLE
;
365 if (state
->offset_tri
)
366 dw
|= GEN8_RASTER_DW1_DEPTH_OFFSET_SOLID
;
367 if (state
->offset_line
)
368 dw
|= GEN8_RASTER_DW1_DEPTH_OFFSET_WIREFRAME
;
369 if (state
->offset_point
)
370 dw
|= GEN8_RASTER_DW1_DEPTH_OFFSET_POINT
;
372 switch (state
->fill_front
) {
373 case PIPE_POLYGON_MODE_FILL
:
374 dw
|= GEN8_RASTER_DW1_FRONTFACE_SOLID
;
376 case PIPE_POLYGON_MODE_LINE
:
377 dw
|= GEN8_RASTER_DW1_FRONTFACE_WIREFRAME
;
379 case PIPE_POLYGON_MODE_POINT
:
380 dw
|= GEN8_RASTER_DW1_FRONTFACE_POINT
;
384 switch (state
->fill_back
) {
385 case PIPE_POLYGON_MODE_FILL
:
386 dw
|= GEN8_RASTER_DW1_BACKFACE_SOLID
;
388 case PIPE_POLYGON_MODE_LINE
:
389 dw
|= GEN8_RASTER_DW1_BACKFACE_WIREFRAME
;
391 case PIPE_POLYGON_MODE_POINT
:
392 dw
|= GEN8_RASTER_DW1_BACKFACE_POINT
;
396 if (state
->line_smooth
)
397 dw
|= GEN8_RASTER_DW1_AA_LINE_ENABLE
;
400 dw
|= GEN8_RASTER_DW1_SCISSOR_ENABLE
;
402 if (state
->depth_clip
)
403 dw
|= GEN8_RASTER_DW1_Z_TEST_ENABLE
;
409 rasterizer_init_sf_gen8(const struct ilo_dev
*dev
,
410 const struct pipe_rasterizer_state
*state
,
411 struct ilo_rasterizer_sf
*sf
)
413 int line_width
, point_width
;
414 uint32_t dw1
, dw2
, dw3
;
416 ILO_DEV_ASSERT(dev
, 8, 8);
420 ((state
->line_width
+ (float) state
->line_smooth
) * 128.0f
+ 0.5f
);
421 line_width
= CLAMP(line_width
, 0, 1023);
424 if (line_width
== 128 && !state
->line_smooth
)
428 point_width
= (int) (state
->point_size
* 8.0f
+ 0.5f
);
429 point_width
= CLAMP(point_width
, 1, 2047);
431 dw1
= GEN7_SF_DW1_STATISTICS
|
432 GEN7_SF_DW1_VIEWPORT_ENABLE
;
434 dw2
= line_width
<< GEN7_SF_DW2_LINE_WIDTH__SHIFT
;
435 if (state
->line_smooth
)
436 dw2
|= GEN7_SF_DW2_AA_LINE_CAP_1_0
;
438 dw3
= GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE
|
439 GEN7_SF_DW3_SUBPIXEL_8BITS
|
442 if (state
->line_last_pixel
)
443 dw3
|= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE
;
445 if (state
->flatshade_first
) {
446 dw3
|= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT
|
447 0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT
|
448 1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT
;
450 dw3
|= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT
|
451 1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT
|
452 2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT
;
455 if (!state
->point_size_per_vertex
)
456 dw3
|= GEN7_SF_DW3_USE_POINT_WIDTH
;
460 STATIC_ASSERT(Elements(sf
->payload
) >= 3);
461 sf
->payload
[0] = dw1
;
462 sf
->payload
[1] = dw2
;
463 sf
->payload
[2] = dw3
;
465 rasterizer_init_sf_depth_offset_gen6(dev
, state
, sf
);
468 sf
->dw_raster
= rasterizer_get_sf_raster_gen8(dev
, state
);
472 rasterizer_init_wm_gen6(const struct ilo_dev
*dev
,
473 const struct pipe_rasterizer_state
*state
,
474 struct ilo_rasterizer_wm
*wm
)
478 ILO_DEV_ASSERT(dev
, 6, 6);
480 /* only the FF unit states are set, as in GEN7 */
482 dw5
= GEN6_WM_DW5_AA_LINE_WIDTH_2_0
;
484 /* same value as in 3DSTATE_SF */
485 if (state
->line_smooth
)
486 dw5
|= GEN6_WM_DW5_AA_LINE_CAP_1_0
;
488 if (state
->poly_stipple_enable
)
489 dw5
|= GEN6_WM_DW5_POLY_STIPPLE_ENABLE
;
490 if (state
->line_stipple_enable
)
491 dw5
|= GEN6_WM_DW5_LINE_STIPPLE_ENABLE
;
494 * assertion that makes sure
496 * dw6 |= wm->dw_msaa_rast | wm->dw_msaa_disp;
500 STATIC_ASSERT(GEN6_WM_DW6_MSRASTMODE_OFF_PIXEL
== 0 &&
501 GEN6_WM_DW6_MSDISPMODE_PERSAMPLE
== 0);
502 dw6
= GEN6_WM_DW6_ZW_INTERP_PIXEL
;
504 if (state
->bottom_edge_rule
)
505 dw6
|= GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT
;
508 (state
->multisample
) ? GEN6_WM_DW6_MSRASTMODE_ON_PATTERN
: 0;
509 wm
->dw_msaa_disp
= GEN6_WM_DW6_MSDISPMODE_PERPIXEL
;
511 STATIC_ASSERT(Elements(wm
->payload
) >= 2);
512 wm
->payload
[0] = dw5
;
513 wm
->payload
[1] = dw6
;
517 rasterizer_init_wm_gen7(const struct ilo_dev
*dev
,
518 const struct pipe_rasterizer_state
*state
,
519 struct ilo_rasterizer_wm
*wm
)
523 ILO_DEV_ASSERT(dev
, 7, 7.5);
526 * assertion that makes sure
528 * dw1 |= wm->dw_msaa_rast;
529 * dw2 |= wm->dw_msaa_disp;
533 STATIC_ASSERT(GEN7_WM_DW1_MSRASTMODE_OFF_PIXEL
== 0 &&
534 GEN7_WM_DW2_MSDISPMODE_PERSAMPLE
== 0);
535 dw1
= GEN7_WM_DW1_ZW_INTERP_PIXEL
|
536 GEN7_WM_DW1_AA_LINE_WIDTH_2_0
;
539 /* same value as in 3DSTATE_SF */
540 if (state
->line_smooth
)
541 dw1
|= GEN7_WM_DW1_AA_LINE_CAP_1_0
;
543 if (state
->poly_stipple_enable
)
544 dw1
|= GEN7_WM_DW1_POLY_STIPPLE_ENABLE
;
545 if (state
->line_stipple_enable
)
546 dw1
|= GEN7_WM_DW1_LINE_STIPPLE_ENABLE
;
548 if (state
->bottom_edge_rule
)
549 dw1
|= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT
;
552 (state
->multisample
) ? GEN7_WM_DW1_MSRASTMODE_ON_PATTERN
: 0;
553 wm
->dw_msaa_disp
= GEN7_WM_DW2_MSDISPMODE_PERPIXEL
;
555 STATIC_ASSERT(Elements(wm
->payload
) >= 2);
556 wm
->payload
[0] = dw1
;
557 wm
->payload
[1] = dw2
;
561 rasterizer_get_wm_gen8(const struct ilo_dev
*dev
,
562 const struct pipe_rasterizer_state
*state
)
566 ILO_DEV_ASSERT(dev
, 8, 8);
568 dw
= GEN7_WM_DW1_ZW_INTERP_PIXEL
|
569 GEN7_WM_DW1_AA_LINE_WIDTH_2_0
;
571 /* same value as in 3DSTATE_SF */
572 if (state
->line_smooth
)
573 dw
|= GEN7_WM_DW1_AA_LINE_CAP_1_0
;
575 if (state
->poly_stipple_enable
)
576 dw
|= GEN7_WM_DW1_POLY_STIPPLE_ENABLE
;
577 if (state
->line_stipple_enable
)
578 dw
|= GEN7_WM_DW1_LINE_STIPPLE_ENABLE
;
580 if (state
->bottom_edge_rule
)
581 dw
|= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT
;
587 ilo_gpe_init_rasterizer(const struct ilo_dev
*dev
,
588 const struct pipe_rasterizer_state
*state
,
589 struct ilo_rasterizer_state
*rasterizer
)
591 rasterizer_init_clip(dev
, state
, &rasterizer
->clip
);
593 if (ilo_dev_gen(dev
) >= ILO_GEN(8)) {
594 memset(&rasterizer
->wm
, 0, sizeof(rasterizer
->wm
));
595 rasterizer
->wm
.payload
[0] = rasterizer_get_wm_gen8(dev
, state
);
597 rasterizer_init_sf_gen8(dev
, state
, &rasterizer
->sf
);
598 } else if (ilo_dev_gen(dev
) >= ILO_GEN(7)) {
599 rasterizer_init_wm_gen7(dev
, state
, &rasterizer
->wm
);
600 rasterizer_init_sf_gen6(dev
, state
, &rasterizer
->sf
);
602 rasterizer_init_wm_gen6(dev
, state
, &rasterizer
->wm
);
603 rasterizer_init_sf_gen6(dev
, state
, &rasterizer
->sf
);
608 fs_init_cso_gen6(const struct ilo_dev
*dev
,
609 const struct ilo_shader_state
*fs
,
610 struct ilo_shader_cso
*cso
)
612 int start_grf
, input_count
, sampler_count
, interps
, max_threads
;
613 uint32_t dw2
, dw4
, dw5
, dw6
;
615 ILO_DEV_ASSERT(dev
, 6, 6);
617 start_grf
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_URB_DATA_START_REG
);
618 input_count
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_INPUT_COUNT
);
619 sampler_count
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_SAMPLER_COUNT
);
620 interps
= ilo_shader_get_kernel_param(fs
,
621 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS
);
623 /* see brwCreateContext() */
624 max_threads
= (dev
->gt
== 2) ? 80 : 40;
626 dw2
= (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT
;
627 dw2
|= ((sampler_count
+ 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
;
629 dw4
= start_grf
<< GEN6_WM_DW4_URB_GRF_START0__SHIFT
|
630 0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT
|
631 0 << GEN6_WM_DW4_URB_GRF_START2__SHIFT
;
633 dw5
= (max_threads
- 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT
;
636 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
638 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
639 * PS kernel or color calculator has the ability to kill (discard)
640 * pixels or samples, other than due to depth or stencil testing.
641 * This bit is required to be ENABLED in the following situations:
643 * The API pixel shader program contains "killpix" or "discard"
644 * instructions, or other code in the pixel shader kernel that can
645 * cause the final pixel mask to differ from the pixel mask received
648 * A sampler with chroma key enabled with kill pixel mode is used by
651 * Any render target has Alpha Test Enable or AlphaToCoverage Enable
654 * The pixel shader kernel generates and outputs oMask.
656 * Note: As ClipDistance clipping is fully supported in hardware and
657 * therefore not via PS instructions, there should be no need to
658 * ENABLE this bit due to ClipDistance clipping."
660 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_USE_KILL
))
661 dw5
|= GEN6_WM_DW5_PS_KILL_PIXEL
;
664 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
666 * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
667 * field must be set to disabled."
669 * TODO This is not checked yet.
671 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_OUTPUT_Z
))
672 dw5
|= GEN6_WM_DW5_PS_COMPUTE_DEPTH
;
674 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_INPUT_Z
))
675 dw5
|= GEN6_WM_DW5_PS_USE_DEPTH
;
677 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_INPUT_W
))
678 dw5
|= GEN6_WM_DW5_PS_USE_W
;
681 * TODO set this bit only when
683 * a) fs writes colors and color is not masked, or
684 * b) fs writes depth, or
688 dw5
|= GEN6_WM_DW5_PS_DISPATCH_ENABLE
;
690 assert(!ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_DISPATCH_16_OFFSET
));
691 dw5
|= GEN6_PS_DISPATCH_8
<< GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT
;
693 dw6
= input_count
<< GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT
|
694 GEN6_WM_DW6_PS_POSOFFSET_NONE
|
695 interps
<< GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT
;
697 STATIC_ASSERT(Elements(cso
->payload
) >= 4);
698 cso
->payload
[0] = dw2
;
699 cso
->payload
[1] = dw4
;
700 cso
->payload
[2] = dw5
;
701 cso
->payload
[3] = dw6
;
705 fs_get_wm_gen7(const struct ilo_dev
*dev
,
706 const struct ilo_shader_state
*fs
)
710 ILO_DEV_ASSERT(dev
, 7, 7.5);
712 dw
= ilo_shader_get_kernel_param(fs
,
713 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS
) <<
714 GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT
;
717 * TODO set this bit only when
719 * a) fs writes colors and color is not masked, or
720 * b) fs writes depth, or
723 dw
|= GEN7_WM_DW1_PS_DISPATCH_ENABLE
;
726 * From the Ivy Bridge PRM, volume 2 part 1, page 278:
728 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that
729 * the PS kernel or color calculator has the ability to kill
730 * (discard) pixels or samples, other than due to depth or stencil
731 * testing. This bit is required to be ENABLED in the following
734 * - The API pixel shader program contains "killpix" or "discard"
735 * instructions, or other code in the pixel shader kernel that
736 * can cause the final pixel mask to differ from the pixel mask
737 * received on dispatch.
739 * - A sampler with chroma key enabled with kill pixel mode is used
740 * by the pixel shader.
742 * - Any render target has Alpha Test Enable or AlphaToCoverage
745 * - The pixel shader kernel generates and outputs oMask.
747 * Note: As ClipDistance clipping is fully supported in hardware
748 * and therefore not via PS instructions, there should be no need
749 * to ENABLE this bit due to ClipDistance clipping."
751 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_USE_KILL
))
752 dw
|= GEN7_WM_DW1_PS_KILL_PIXEL
;
754 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_OUTPUT_Z
))
755 dw
|= GEN7_WM_DW1_PSCDEPTH_ON
;
757 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_INPUT_Z
))
758 dw
|= GEN7_WM_DW1_PS_USE_DEPTH
;
760 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_INPUT_W
))
761 dw
|= GEN7_WM_DW1_PS_USE_W
;
767 fs_init_cso_gen7(const struct ilo_dev
*dev
,
768 const struct ilo_shader_state
*fs
,
769 struct ilo_shader_cso
*cso
)
771 int start_grf
, sampler_count
, max_threads
;
772 uint32_t dw2
, dw4
, dw5
;
774 ILO_DEV_ASSERT(dev
, 7, 7.5);
776 start_grf
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_URB_DATA_START_REG
);
777 sampler_count
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_SAMPLER_COUNT
);
779 dw2
= (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT
;
780 dw2
|= ((sampler_count
+ 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
;
782 dw4
= GEN7_PS_DW4_POSOFFSET_NONE
;
784 /* see brwCreateContext() */
785 switch (ilo_dev_gen(dev
)) {
787 max_threads
= (dev
->gt
== 3) ? 408 : (dev
->gt
== 2) ? 204 : 102;
788 dw4
|= (max_threads
- 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT
;
789 dw4
|= 1 << GEN75_PS_DW4_SAMPLE_MASK__SHIFT
;
793 max_threads
= (dev
->gt
== 2) ? 172 : 48;
794 dw4
|= (max_threads
- 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT
;
798 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_PCB_CBUF0_SIZE
))
799 dw4
|= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE
;
801 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_INPUT_COUNT
))
802 dw4
|= GEN7_PS_DW4_ATTR_ENABLE
;
804 assert(!ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_DISPATCH_16_OFFSET
));
805 dw4
|= GEN6_PS_DISPATCH_8
<< GEN7_PS_DW4_DISPATCH_MODE__SHIFT
;
807 dw5
= start_grf
<< GEN7_PS_DW5_URB_GRF_START0__SHIFT
|
808 0 << GEN7_PS_DW5_URB_GRF_START1__SHIFT
|
809 0 << GEN7_PS_DW5_URB_GRF_START2__SHIFT
;
811 STATIC_ASSERT(Elements(cso
->payload
) >= 4);
812 cso
->payload
[0] = dw2
;
813 cso
->payload
[1] = dw4
;
814 cso
->payload
[2] = dw5
;
815 cso
->payload
[3] = fs_get_wm_gen7(dev
, fs
);
819 fs_get_psx_gen8(const struct ilo_dev
*dev
,
820 const struct ilo_shader_state
*fs
)
824 ILO_DEV_ASSERT(dev
, 8, 8);
826 dw
= GEN8_PSX_DW1_DISPATCH_ENABLE
;
828 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_USE_KILL
))
829 dw
|= GEN8_PSX_DW1_KILL_PIXEL
;
830 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_OUTPUT_Z
))
831 dw
|= GEN8_PSX_DW1_PSCDEPTH_ON
;
832 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_INPUT_Z
))
833 dw
|= GEN8_PSX_DW1_USE_DEPTH
;
834 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_INPUT_W
))
835 dw
|= GEN8_PSX_DW1_USE_W
;
836 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_INPUT_COUNT
))
837 dw
|= GEN8_PSX_DW1_ATTR_ENABLE
;
843 fs_get_wm_gen8(const struct ilo_dev
*dev
,
844 const struct ilo_shader_state
*fs
)
846 ILO_DEV_ASSERT(dev
, 8, 8);
848 return ilo_shader_get_kernel_param(fs
,
849 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS
) <<
850 GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT
;
854 fs_init_cso_gen8(const struct ilo_dev
*dev
,
855 const struct ilo_shader_state
*fs
,
856 struct ilo_shader_cso
*cso
)
858 int start_grf
, sampler_count
;
859 uint32_t dw3
, dw6
, dw7
;
861 ILO_DEV_ASSERT(dev
, 8, 8);
863 start_grf
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_URB_DATA_START_REG
);
864 sampler_count
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_SAMPLER_COUNT
);
866 dw3
= (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT
;
867 dw3
|= ((sampler_count
+ 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
;
870 dw6
= (64 - 2) << GEN8_PS_DW6_MAX_THREADS__SHIFT
|
871 GEN8_PS_DW6_POSOFFSET_NONE
;
872 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_PCB_CBUF0_SIZE
))
873 dw6
|= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE
;
875 assert(!ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_DISPATCH_16_OFFSET
));
876 dw6
|= GEN6_PS_DISPATCH_8
<< GEN8_PS_DW6_DISPATCH_MODE__SHIFT
;
878 dw7
= start_grf
<< GEN8_PS_DW7_URB_GRF_START0__SHIFT
|
879 0 << GEN8_PS_DW7_URB_GRF_START1__SHIFT
|
880 0 << GEN8_PS_DW7_URB_GRF_START2__SHIFT
;
882 STATIC_ASSERT(Elements(cso
->payload
) >= 5);
883 cso
->payload
[0] = dw3
;
884 cso
->payload
[1] = dw6
;
885 cso
->payload
[2] = dw7
;
886 cso
->payload
[3] = fs_get_psx_gen8(dev
, fs
);
887 cso
->payload
[4] = fs_get_wm_gen8(dev
, fs
);
891 ilo_gpe_init_fs_cso(const struct ilo_dev
*dev
,
892 const struct ilo_shader_state
*fs
,
893 struct ilo_shader_cso
*cso
)
895 if (ilo_dev_gen(dev
) >= ILO_GEN(8))
896 fs_init_cso_gen8(dev
, fs
, cso
);
897 else if (ilo_dev_gen(dev
) >= ILO_GEN(7))
898 fs_init_cso_gen7(dev
, fs
, cso
);
900 fs_init_cso_gen6(dev
, fs
, cso
);
903 struct ilo_zs_surface_info
{
911 enum gen_surface_tiling tiling
;
915 unsigned width
, height
, depth
;
916 unsigned lod
, first_layer
, num_layers
;
920 zs_init_info_null(const struct ilo_dev
*dev
,
921 struct ilo_zs_surface_info
*info
)
923 ILO_DEV_ASSERT(dev
, 6, 8);
925 memset(info
, 0, sizeof(*info
));
927 info
->surface_type
= GEN6_SURFTYPE_NULL
;
928 info
->format
= GEN6_ZFORMAT_D32_FLOAT
;
932 info
->num_layers
= 1;
936 zs_init_info(const struct ilo_dev
*dev
,
937 const struct ilo_image
*img
,
938 const struct ilo_image
*s8_img
,
939 enum pipe_texture_target target
,
940 enum pipe_format format
, unsigned level
,
941 unsigned first_layer
, unsigned num_layers
,
942 struct ilo_zs_surface_info
*info
)
944 bool separate_stencil
;
946 ILO_DEV_ASSERT(dev
, 6, 8);
948 memset(info
, 0, sizeof(*info
));
950 info
->surface_type
= ilo_gpe_gen6_translate_texture(target
);
952 if (info
->surface_type
== GEN6_SURFTYPE_CUBE
) {
954 * From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
956 * "For Other Surfaces (Cube Surfaces):
957 * This field (Minimum Array Element) is ignored."
959 * "For Other Surfaces (Cube Surfaces):
960 * This field (Render Target View Extent) is ignored."
962 * As such, we cannot set first_layer and num_layers on cube surfaces.
963 * To work around that, treat it as a 2D surface.
965 info
->surface_type
= GEN6_SURFTYPE_2D
;
968 if (ilo_dev_gen(dev
) >= ILO_GEN(7)) {
969 separate_stencil
= true;
972 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
974 * "This field (Separate Stencil Buffer Enable) must be set to the
975 * same value (enabled or disabled) as Hierarchical Depth Buffer
978 separate_stencil
= ilo_image_can_enable_aux(img
, level
);
982 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
984 * "If this field (Hierarchical Depth Buffer Enable) is enabled, the
985 * Surface Format of the depth buffer cannot be
986 * D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
987 * requires the separate stencil buffer."
989 * From the Ironlake PRM, volume 2 part 1, page 330:
991 * "If this field (Separate Stencil Buffer Enable) is disabled, the
992 * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
994 * There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT
995 * is indeed used, the depth values output by the fragment shaders will
996 * be different when read back.
998 * As for GEN7+, separate_stencil is always true.
1001 case PIPE_FORMAT_Z16_UNORM
:
1002 info
->format
= GEN6_ZFORMAT_D16_UNORM
;
1004 case PIPE_FORMAT_Z32_FLOAT
:
1005 info
->format
= GEN6_ZFORMAT_D32_FLOAT
;
1007 case PIPE_FORMAT_Z24X8_UNORM
:
1008 case PIPE_FORMAT_Z24_UNORM_S8_UINT
:
1009 info
->format
= (separate_stencil
) ?
1010 GEN6_ZFORMAT_D24_UNORM_X8_UINT
:
1011 GEN6_ZFORMAT_D24_UNORM_S8_UINT
;
1013 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
:
1014 info
->format
= (separate_stencil
) ?
1015 GEN6_ZFORMAT_D32_FLOAT
:
1016 GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT
;
1018 case PIPE_FORMAT_S8_UINT
:
1019 if (separate_stencil
) {
1020 info
->format
= GEN6_ZFORMAT_D32_FLOAT
;
1025 assert(!"unsupported depth/stencil format");
1026 zs_init_info_null(dev
, info
);
1031 if (format
!= PIPE_FORMAT_S8_UINT
) {
1032 info
->zs
.bo
= img
->bo
;
1033 info
->zs
.stride
= img
->bo_stride
;
1035 assert(img
->walk_layer_height
% 4 == 0);
1036 info
->zs
.qpitch
= img
->walk_layer_height
/ 4;
1038 info
->zs
.tiling
= img
->tiling
;
1039 info
->zs
.offset
= 0;
1042 if (s8_img
|| format
== PIPE_FORMAT_S8_UINT
) {
1043 info
->stencil
.bo
= s8_img
->bo
;
1046 * From the Sandy Bridge PRM, volume 2 part 1, page 329:
1048 * "The pitch must be set to 2x the value computed based on width,
1049 * as the stencil buffer is stored with two rows interleaved."
1051 * For GEN7, we still dobule the stride because we did not double the
1052 * slice widths when initializing the layout.
1054 info
->stencil
.stride
= s8_img
->bo_stride
* 2;
1056 assert(s8_img
->walk_layer_height
% 4 == 0);
1057 info
->stencil
.qpitch
= s8_img
->walk_layer_height
/ 4;
1059 info
->stencil
.tiling
= s8_img
->tiling
;
1061 if (ilo_dev_gen(dev
) == ILO_GEN(6)) {
1064 assert(s8_img
->walk
== ILO_IMAGE_WALK_LOD
);
1066 /* offset to the level */
1067 ilo_image_get_slice_pos(s8_img
, level
, 0, &x
, &y
);
1068 ilo_image_pos_to_mem(s8_img
, x
, y
, &x
, &y
);
1069 info
->stencil
.offset
= ilo_image_mem_to_raw(s8_img
, x
, y
);
1073 if (ilo_image_can_enable_aux(img
, level
)) {
1074 info
->hiz
.bo
= img
->aux
.bo
;
1075 info
->hiz
.stride
= img
->aux
.bo_stride
;
1077 assert(img
->aux
.walk_layer_height
% 4 == 0);
1078 info
->hiz
.qpitch
= img
->aux
.walk_layer_height
/ 4;
1080 info
->hiz
.tiling
= GEN6_TILING_Y
;
1082 /* offset to the level */
1083 if (ilo_dev_gen(dev
) == ILO_GEN(6))
1084 info
->hiz
.offset
= img
->aux
.walk_lod_offsets
[level
];
1087 info
->width
= img
->width0
;
1088 info
->height
= img
->height0
;
1089 info
->depth
= (target
== PIPE_TEXTURE_3D
) ? img
->depth0
: num_layers
;
1092 info
->first_layer
= first_layer
;
1093 info
->num_layers
= num_layers
;
1097 ilo_gpe_init_zs_surface(const struct ilo_dev
*dev
,
1098 const struct ilo_image
*img
,
1099 const struct ilo_image
*s8_img
,
1100 enum pipe_texture_target target
,
1101 enum pipe_format format
, unsigned level
,
1102 unsigned first_layer
, unsigned num_layers
,
1103 struct ilo_zs_surface
*zs
)
1105 const int max_2d_size
= (ilo_dev_gen(dev
) >= ILO_GEN(7)) ? 16384 : 8192;
1106 const int max_array_size
= (ilo_dev_gen(dev
) >= ILO_GEN(7)) ? 2048 : 512;
1107 struct ilo_zs_surface_info info
;
1108 uint32_t dw1
, dw2
, dw3
, dw4
, dw5
, dw6
;
1109 int align_w
= 8, align_h
= 4;
1111 ILO_DEV_ASSERT(dev
, 6, 8);
1114 zs_init_info(dev
, img
, s8_img
, target
, format
,
1115 level
, first_layer
, num_layers
, &info
);
1117 switch (img
->sample_count
) {
1137 zs_init_info_null(dev
, &info
);
1140 switch (info
.surface_type
) {
1141 case GEN6_SURFTYPE_NULL
:
1143 case GEN6_SURFTYPE_1D
:
1144 assert(info
.width
<= max_2d_size
&& info
.height
== 1 &&
1145 info
.depth
<= max_array_size
);
1146 assert(info
.first_layer
< max_array_size
- 1 &&
1147 info
.num_layers
<= max_array_size
);
1149 case GEN6_SURFTYPE_2D
:
1150 assert(info
.width
<= max_2d_size
&& info
.height
<= max_2d_size
&&
1151 info
.depth
<= max_array_size
);
1152 assert(info
.first_layer
< max_array_size
- 1 &&
1153 info
.num_layers
<= max_array_size
);
1155 case GEN6_SURFTYPE_3D
:
1156 assert(info
.width
<= 2048 && info
.height
<= 2048 && info
.depth
<= 2048);
1157 assert(info
.first_layer
< 2048 && info
.num_layers
<= max_array_size
);
1159 case GEN6_SURFTYPE_CUBE
:
1160 assert(info
.width
<= max_2d_size
&& info
.height
<= max_2d_size
&&
1162 assert(info
.first_layer
== 0 && info
.num_layers
== 1);
1163 assert(info
.width
== info
.height
);
1166 assert(!"unexpected depth surface type");
1170 dw1
= info
.surface_type
<< GEN6_DEPTH_DW1_TYPE__SHIFT
|
1171 info
.format
<< GEN6_DEPTH_DW1_FORMAT__SHIFT
;
1174 /* required for GEN6+ */
1175 assert(info
.zs
.tiling
== GEN6_TILING_Y
);
1176 assert(info
.zs
.stride
> 0 && info
.zs
.stride
< 128 * 1024 &&
1177 info
.zs
.stride
% 128 == 0);
1178 assert(info
.width
<= info
.zs
.stride
);
1180 dw1
|= (info
.zs
.stride
- 1);
1181 dw2
= info
.zs
.offset
;
1186 if (ilo_dev_gen(dev
) >= ILO_GEN(7)) {
1188 dw1
|= GEN7_DEPTH_DW1_DEPTH_WRITE_ENABLE
;
1190 if (info
.stencil
.bo
)
1191 dw1
|= GEN7_DEPTH_DW1_STENCIL_WRITE_ENABLE
;
1194 dw1
|= GEN7_DEPTH_DW1_HIZ_ENABLE
;
1196 dw3
= (info
.height
- 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT
|
1197 (info
.width
- 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT
|
1198 info
.lod
<< GEN7_DEPTH_DW3_LOD__SHIFT
;
1200 zs
->dw_aligned_8x4
=
1201 (align(info
.height
, align_h
) - 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT
|
1202 (align(info
.width
, align_w
) - 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT
|
1203 info
.lod
<< GEN7_DEPTH_DW3_LOD__SHIFT
;
1205 dw4
= (info
.depth
- 1) << GEN7_DEPTH_DW4_DEPTH__SHIFT
|
1206 info
.first_layer
<< GEN7_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT
;
1210 dw6
= (info
.num_layers
- 1) << GEN7_DEPTH_DW6_RT_VIEW_EXTENT__SHIFT
;
1212 if (ilo_dev_gen(dev
) >= ILO_GEN(8))
1213 dw6
|= info
.zs
.qpitch
;
1215 /* always Y-tiled */
1216 dw1
|= GEN6_TILING_Y
<< GEN6_DEPTH_DW1_TILING__SHIFT
;
1219 dw1
|= GEN6_DEPTH_DW1_HIZ_ENABLE
|
1220 GEN6_DEPTH_DW1_SEPARATE_STENCIL
;
1223 dw3
= (info
.height
- 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT
|
1224 (info
.width
- 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT
|
1225 info
.lod
<< GEN6_DEPTH_DW3_LOD__SHIFT
|
1226 GEN6_DEPTH_DW3_MIPLAYOUT_BELOW
;
1228 zs
->dw_aligned_8x4
=
1229 (align(info
.height
, align_h
) - 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT
|
1230 (align(info
.width
, align_w
) - 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT
|
1231 info
.lod
<< GEN6_DEPTH_DW3_LOD__SHIFT
|
1232 GEN6_DEPTH_DW3_MIPLAYOUT_BELOW
;
1234 dw4
= (info
.depth
- 1) << GEN6_DEPTH_DW4_DEPTH__SHIFT
|
1235 info
.first_layer
<< GEN6_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT
|
1236 (info
.num_layers
- 1) << GEN6_DEPTH_DW4_RT_VIEW_EXTENT__SHIFT
;
1243 STATIC_ASSERT(Elements(zs
->payload
) >= 12);
1245 zs
->payload
[0] = dw1
;
1246 zs
->payload
[1] = dw2
;
1247 zs
->payload
[2] = dw3
;
1248 zs
->payload
[3] = dw4
;
1249 zs
->payload
[4] = dw5
;
1250 zs
->payload
[5] = dw6
;
1252 /* do not increment reference count */
1253 zs
->bo
= info
.zs
.bo
;
1255 /* separate stencil */
1256 if (info
.stencil
.bo
) {
1257 assert(info
.stencil
.stride
> 0 && info
.stencil
.stride
< 128 * 1024 &&
1258 info
.stencil
.stride
% 128 == 0);
1260 dw1
= (info
.stencil
.stride
- 1) << GEN6_STENCIL_DW1_PITCH__SHIFT
;
1261 if (ilo_dev_gen(dev
) >= ILO_GEN(7.5))
1262 dw1
|= GEN75_STENCIL_DW1_STENCIL_BUFFER_ENABLE
;
1264 dw2
= info
.stencil
.offset
;
1265 dw4
= info
.stencil
.qpitch
;
1272 zs
->payload
[6] = dw1
;
1273 zs
->payload
[7] = dw2
;
1274 zs
->payload
[8] = dw4
;
1275 /* do not increment reference count */
1276 zs
->separate_s8_bo
= info
.stencil
.bo
;
1280 dw1
= (info
.hiz
.stride
- 1) << GEN6_HIZ_DW1_PITCH__SHIFT
;
1281 dw2
= info
.hiz
.offset
;
1282 dw4
= info
.hiz
.qpitch
;
1289 zs
->payload
[9] = dw1
;
1290 zs
->payload
[10] = dw2
;
1291 zs
->payload
[11] = dw4
;
1292 /* do not increment reference count */
1293 zs
->hiz_bo
= info
.hiz
.bo
;
1297 viewport_get_guardband(const struct ilo_dev
*dev
,
1298 int center_x
, int center_y
,
1299 int *min_gbx
, int *max_gbx
,
1300 int *min_gby
, int *max_gby
)
1303 * From the Sandy Bridge PRM, volume 2 part 1, page 234:
1305 * "Per-Device Guardband Extents
1307 * - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
1308 * - Maximum Post-Clamp Delta (X or Y): 16K"
1310 * "In addition, in order to be correctly rendered, objects must have a
1311 * screenspace bounding box not exceeding 8K in the X or Y direction.
1312 * This additional restriction must also be comprehended by software,
1313 * i.e., enforced by use of clipping."
1315 * From the Ivy Bridge PRM, volume 2 part 1, page 248:
1317 * "Per-Device Guardband Extents
1319 * - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
1320 * - Maximum Post-Clamp Delta (X or Y): N/A"
1322 * "In addition, in order to be correctly rendered, objects must have a
1323 * screenspace bounding box not exceeding 8K in the X or Y direction.
1324 * This additional restriction must also be comprehended by software,
1325 * i.e., enforced by use of clipping."
1327 * Combined, the bounding box of any object can not exceed 8K in both
1330 * Below we set the guardband as a squre of length 8K, centered at where
1331 * the viewport is. This makes sure all objects passing the GB test are
1332 * valid to the renderer, and those failing the XY clipping have a
1333 * better chance of passing the GB test.
1335 const int max_extent
= (ilo_dev_gen(dev
) >= ILO_GEN(7)) ? 32768 : 16384;
1336 const int half_len
= 8192 / 2;
1338 /* make sure the guardband is within the valid range */
1339 if (center_x
- half_len
< -max_extent
)
1340 center_x
= -max_extent
+ half_len
;
1341 else if (center_x
+ half_len
> max_extent
- 1)
1342 center_x
= max_extent
- half_len
;
1344 if (center_y
- half_len
< -max_extent
)
1345 center_y
= -max_extent
+ half_len
;
1346 else if (center_y
+ half_len
> max_extent
- 1)
1347 center_y
= max_extent
- half_len
;
1349 *min_gbx
= (float) (center_x
- half_len
);
1350 *max_gbx
= (float) (center_x
+ half_len
);
1351 *min_gby
= (float) (center_y
- half_len
);
1352 *max_gby
= (float) (center_y
+ half_len
);
1356 ilo_gpe_set_viewport_cso(const struct ilo_dev
*dev
,
1357 const struct pipe_viewport_state
*state
,
1358 struct ilo_viewport_cso
*vp
)
1360 const float scale_x
= fabs(state
->scale
[0]);
1361 const float scale_y
= fabs(state
->scale
[1]);
1362 const float scale_z
= fabs(state
->scale
[2]);
1363 int min_gbx
, max_gbx
, min_gby
, max_gby
;
1365 ILO_DEV_ASSERT(dev
, 6, 8);
1367 viewport_get_guardband(dev
,
1368 (int) state
->translate
[0],
1369 (int) state
->translate
[1],
1370 &min_gbx
, &max_gbx
, &min_gby
, &max_gby
);
1373 vp
->m00
= state
->scale
[0];
1374 vp
->m11
= state
->scale
[1];
1375 vp
->m22
= state
->scale
[2];
1376 vp
->m30
= state
->translate
[0];
1377 vp
->m31
= state
->translate
[1];
1378 vp
->m32
= state
->translate
[2];
1380 /* guardband in NDC space */
1381 vp
->min_gbx
= ((float) min_gbx
- state
->translate
[0]) / scale_x
;
1382 vp
->max_gbx
= ((float) max_gbx
- state
->translate
[0]) / scale_x
;
1383 vp
->min_gby
= ((float) min_gby
- state
->translate
[1]) / scale_y
;
1384 vp
->max_gby
= ((float) max_gby
- state
->translate
[1]) / scale_y
;
1386 /* viewport in screen space */
1387 vp
->min_x
= scale_x
* -1.0f
+ state
->translate
[0];
1388 vp
->max_x
= scale_x
* 1.0f
+ state
->translate
[0];
1389 vp
->min_y
= scale_y
* -1.0f
+ state
->translate
[1];
1390 vp
->max_y
= scale_y
* 1.0f
+ state
->translate
[1];
1391 vp
->min_z
= scale_z
* -1.0f
+ state
->translate
[2];
1392 vp
->max_z
= scale_z
* 1.0f
+ state
->translate
[2];
1396 * Translate a pipe logicop to the matching hardware logicop.
1399 gen6_translate_pipe_logicop(unsigned logicop
)
1402 case PIPE_LOGICOP_CLEAR
: return GEN6_LOGICOP_CLEAR
;
1403 case PIPE_LOGICOP_NOR
: return GEN6_LOGICOP_NOR
;
1404 case PIPE_LOGICOP_AND_INVERTED
: return GEN6_LOGICOP_AND_INVERTED
;
1405 case PIPE_LOGICOP_COPY_INVERTED
: return GEN6_LOGICOP_COPY_INVERTED
;
1406 case PIPE_LOGICOP_AND_REVERSE
: return GEN6_LOGICOP_AND_REVERSE
;
1407 case PIPE_LOGICOP_INVERT
: return GEN6_LOGICOP_INVERT
;
1408 case PIPE_LOGICOP_XOR
: return GEN6_LOGICOP_XOR
;
1409 case PIPE_LOGICOP_NAND
: return GEN6_LOGICOP_NAND
;
1410 case PIPE_LOGICOP_AND
: return GEN6_LOGICOP_AND
;
1411 case PIPE_LOGICOP_EQUIV
: return GEN6_LOGICOP_EQUIV
;
1412 case PIPE_LOGICOP_NOOP
: return GEN6_LOGICOP_NOOP
;
1413 case PIPE_LOGICOP_OR_INVERTED
: return GEN6_LOGICOP_OR_INVERTED
;
1414 case PIPE_LOGICOP_COPY
: return GEN6_LOGICOP_COPY
;
1415 case PIPE_LOGICOP_OR_REVERSE
: return GEN6_LOGICOP_OR_REVERSE
;
1416 case PIPE_LOGICOP_OR
: return GEN6_LOGICOP_OR
;
1417 case PIPE_LOGICOP_SET
: return GEN6_LOGICOP_SET
;
1419 assert(!"unknown logicop function");
1420 return GEN6_LOGICOP_CLEAR
;
1425 * Translate a pipe blend function to the matching hardware blend function.
1428 gen6_translate_pipe_blend(unsigned blend
)
1431 case PIPE_BLEND_ADD
: return GEN6_BLENDFUNCTION_ADD
;
1432 case PIPE_BLEND_SUBTRACT
: return GEN6_BLENDFUNCTION_SUBTRACT
;
1433 case PIPE_BLEND_REVERSE_SUBTRACT
: return GEN6_BLENDFUNCTION_REVERSE_SUBTRACT
;
1434 case PIPE_BLEND_MIN
: return GEN6_BLENDFUNCTION_MIN
;
1435 case PIPE_BLEND_MAX
: return GEN6_BLENDFUNCTION_MAX
;
1437 assert(!"unknown blend function");
1438 return GEN6_BLENDFUNCTION_ADD
;
1443 * Translate a pipe blend factor to the matching hardware blend factor.
1446 gen6_translate_pipe_blendfactor(unsigned blendfactor
)
1448 switch (blendfactor
) {
1449 case PIPE_BLENDFACTOR_ONE
: return GEN6_BLENDFACTOR_ONE
;
1450 case PIPE_BLENDFACTOR_SRC_COLOR
: return GEN6_BLENDFACTOR_SRC_COLOR
;
1451 case PIPE_BLENDFACTOR_SRC_ALPHA
: return GEN6_BLENDFACTOR_SRC_ALPHA
;
1452 case PIPE_BLENDFACTOR_DST_ALPHA
: return GEN6_BLENDFACTOR_DST_ALPHA
;
1453 case PIPE_BLENDFACTOR_DST_COLOR
: return GEN6_BLENDFACTOR_DST_COLOR
;
1454 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
: return GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE
;
1455 case PIPE_BLENDFACTOR_CONST_COLOR
: return GEN6_BLENDFACTOR_CONST_COLOR
;
1456 case PIPE_BLENDFACTOR_CONST_ALPHA
: return GEN6_BLENDFACTOR_CONST_ALPHA
;
1457 case PIPE_BLENDFACTOR_SRC1_COLOR
: return GEN6_BLENDFACTOR_SRC1_COLOR
;
1458 case PIPE_BLENDFACTOR_SRC1_ALPHA
: return GEN6_BLENDFACTOR_SRC1_ALPHA
;
1459 case PIPE_BLENDFACTOR_ZERO
: return GEN6_BLENDFACTOR_ZERO
;
1460 case PIPE_BLENDFACTOR_INV_SRC_COLOR
: return GEN6_BLENDFACTOR_INV_SRC_COLOR
;
1461 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
: return GEN6_BLENDFACTOR_INV_SRC_ALPHA
;
1462 case PIPE_BLENDFACTOR_INV_DST_ALPHA
: return GEN6_BLENDFACTOR_INV_DST_ALPHA
;
1463 case PIPE_BLENDFACTOR_INV_DST_COLOR
: return GEN6_BLENDFACTOR_INV_DST_COLOR
;
1464 case PIPE_BLENDFACTOR_INV_CONST_COLOR
: return GEN6_BLENDFACTOR_INV_CONST_COLOR
;
1465 case PIPE_BLENDFACTOR_INV_CONST_ALPHA
: return GEN6_BLENDFACTOR_INV_CONST_ALPHA
;
1466 case PIPE_BLENDFACTOR_INV_SRC1_COLOR
: return GEN6_BLENDFACTOR_INV_SRC1_COLOR
;
1467 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA
: return GEN6_BLENDFACTOR_INV_SRC1_ALPHA
;
1469 assert(!"unknown blend factor");
1470 return GEN6_BLENDFACTOR_ONE
;
1475 * Translate a pipe stencil op to the matching hardware stencil op.
1478 gen6_translate_pipe_stencil_op(unsigned stencil_op
)
1480 switch (stencil_op
) {
1481 case PIPE_STENCIL_OP_KEEP
: return GEN6_STENCILOP_KEEP
;
1482 case PIPE_STENCIL_OP_ZERO
: return GEN6_STENCILOP_ZERO
;
1483 case PIPE_STENCIL_OP_REPLACE
: return GEN6_STENCILOP_REPLACE
;
1484 case PIPE_STENCIL_OP_INCR
: return GEN6_STENCILOP_INCRSAT
;
1485 case PIPE_STENCIL_OP_DECR
: return GEN6_STENCILOP_DECRSAT
;
1486 case PIPE_STENCIL_OP_INCR_WRAP
: return GEN6_STENCILOP_INCR
;
1487 case PIPE_STENCIL_OP_DECR_WRAP
: return GEN6_STENCILOP_DECR
;
1488 case PIPE_STENCIL_OP_INVERT
: return GEN6_STENCILOP_INVERT
;
1490 assert(!"unknown stencil op");
1491 return GEN6_STENCILOP_KEEP
;
1496 gen6_blend_factor_dst_alpha_forced_one(int factor
)
1499 case GEN6_BLENDFACTOR_DST_ALPHA
:
1500 return GEN6_BLENDFACTOR_ONE
;
1501 case GEN6_BLENDFACTOR_INV_DST_ALPHA
:
1502 case GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE
:
1503 return GEN6_BLENDFACTOR_ZERO
;
1510 blend_get_rt_blend_enable_gen6(const struct ilo_dev
*dev
,
1511 const struct pipe_rt_blend_state
*rt
,
1512 bool dst_alpha_forced_one
)
1514 int rgb_src
, rgb_dst
, a_src
, a_dst
;
1517 ILO_DEV_ASSERT(dev
, 6, 7.5);
1519 if (!rt
->blend_enable
)
1522 rgb_src
= gen6_translate_pipe_blendfactor(rt
->rgb_src_factor
);
1523 rgb_dst
= gen6_translate_pipe_blendfactor(rt
->rgb_dst_factor
);
1524 a_src
= gen6_translate_pipe_blendfactor(rt
->alpha_src_factor
);
1525 a_dst
= gen6_translate_pipe_blendfactor(rt
->alpha_dst_factor
);
1527 if (dst_alpha_forced_one
) {
1528 rgb_src
= gen6_blend_factor_dst_alpha_forced_one(rgb_src
);
1529 rgb_dst
= gen6_blend_factor_dst_alpha_forced_one(rgb_dst
);
1530 a_src
= gen6_blend_factor_dst_alpha_forced_one(a_src
);
1531 a_dst
= gen6_blend_factor_dst_alpha_forced_one(a_dst
);
1534 dw
= GEN6_RT_DW0_BLEND_ENABLE
|
1535 gen6_translate_pipe_blend(rt
->alpha_func
) << 26 |
1538 gen6_translate_pipe_blend(rt
->rgb_func
) << 11 |
1542 if (rt
->rgb_func
!= rt
->alpha_func
||
1543 rgb_src
!= a_src
|| rgb_dst
!= a_dst
)
1544 dw
|= GEN6_RT_DW0_INDEPENDENT_ALPHA_ENABLE
;
1550 blend_get_rt_blend_enable_gen8(const struct ilo_dev
*dev
,
1551 const struct pipe_rt_blend_state
*rt
,
1552 bool dst_alpha_forced_one
,
1553 bool *independent_alpha
)
1555 int rgb_src
, rgb_dst
, a_src
, a_dst
;
1558 ILO_DEV_ASSERT(dev
, 8, 8);
1560 if (!rt
->blend_enable
) {
1561 *independent_alpha
= false;
1565 rgb_src
= gen6_translate_pipe_blendfactor(rt
->rgb_src_factor
);
1566 rgb_dst
= gen6_translate_pipe_blendfactor(rt
->rgb_dst_factor
);
1567 a_src
= gen6_translate_pipe_blendfactor(rt
->alpha_src_factor
);
1568 a_dst
= gen6_translate_pipe_blendfactor(rt
->alpha_dst_factor
);
1570 if (dst_alpha_forced_one
) {
1571 rgb_src
= gen6_blend_factor_dst_alpha_forced_one(rgb_src
);
1572 rgb_dst
= gen6_blend_factor_dst_alpha_forced_one(rgb_dst
);
1573 a_src
= gen6_blend_factor_dst_alpha_forced_one(a_src
);
1574 a_dst
= gen6_blend_factor_dst_alpha_forced_one(a_dst
);
1577 dw
= GEN8_RT_DW0_BLEND_ENABLE
|
1580 gen6_translate_pipe_blend(rt
->rgb_func
) << 18 |
1583 gen6_translate_pipe_blend(rt
->alpha_func
) << 5;
1585 *independent_alpha
= (rt
->rgb_func
!= rt
->alpha_func
||
1593 blend_init_cso_gen6(const struct ilo_dev
*dev
,
1594 const struct pipe_blend_state
*state
,
1595 struct ilo_blend_state
*blend
,
1598 const struct pipe_rt_blend_state
*rt
= &state
->rt
[index
];
1599 struct ilo_blend_cso
*cso
= &blend
->cso
[index
];
1601 ILO_DEV_ASSERT(dev
, 6, 7.5);
1603 cso
->payload
[0] = 0;
1604 cso
->payload
[1] = GEN6_RT_DW1_COLORCLAMP_RTFORMAT
|
1605 GEN6_RT_DW1_PRE_BLEND_CLAMP
|
1606 GEN6_RT_DW1_POST_BLEND_CLAMP
;
1608 if (!(rt
->colormask
& PIPE_MASK_A
))
1609 cso
->payload
[1] |= GEN6_RT_DW1_WRITE_DISABLE_A
;
1610 if (!(rt
->colormask
& PIPE_MASK_R
))
1611 cso
->payload
[1] |= GEN6_RT_DW1_WRITE_DISABLE_R
;
1612 if (!(rt
->colormask
& PIPE_MASK_G
))
1613 cso
->payload
[1] |= GEN6_RT_DW1_WRITE_DISABLE_G
;
1614 if (!(rt
->colormask
& PIPE_MASK_B
))
1615 cso
->payload
[1] |= GEN6_RT_DW1_WRITE_DISABLE_B
;
1618 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
1620 * "Color Buffer Blending and Logic Ops must not be enabled
1621 * simultaneously, or behavior is UNDEFINED."
1623 * Since state->logicop_enable takes precedence over rt->blend_enable,
1624 * no special care is needed.
1626 if (state
->logicop_enable
) {
1628 cso
->dw_blend_dst_alpha_forced_one
= 0;
1630 cso
->dw_blend
= blend_get_rt_blend_enable_gen6(dev
, rt
, false);
1631 cso
->dw_blend_dst_alpha_forced_one
=
1632 blend_get_rt_blend_enable_gen6(dev
, rt
, true);
1637 blend_init_cso_gen8(const struct ilo_dev
*dev
,
1638 const struct pipe_blend_state
*state
,
1639 struct ilo_blend_state
*blend
,
1642 const struct pipe_rt_blend_state
*rt
= &state
->rt
[index
];
1643 struct ilo_blend_cso
*cso
= &blend
->cso
[index
];
1644 bool independent_alpha
= false;
1646 ILO_DEV_ASSERT(dev
, 8, 8);
1648 cso
->payload
[0] = 0;
1649 cso
->payload
[1] = GEN8_RT_DW1_COLORCLAMP_RTFORMAT
|
1650 GEN8_RT_DW1_PRE_BLEND_CLAMP
|
1651 GEN8_RT_DW1_POST_BLEND_CLAMP
;
1653 if (!(rt
->colormask
& PIPE_MASK_A
))
1654 cso
->payload
[0] |= GEN8_RT_DW0_WRITE_DISABLE_A
;
1655 if (!(rt
->colormask
& PIPE_MASK_R
))
1656 cso
->payload
[0] |= GEN8_RT_DW0_WRITE_DISABLE_R
;
1657 if (!(rt
->colormask
& PIPE_MASK_G
))
1658 cso
->payload
[0] |= GEN8_RT_DW0_WRITE_DISABLE_G
;
1659 if (!(rt
->colormask
& PIPE_MASK_B
))
1660 cso
->payload
[0] |= GEN8_RT_DW0_WRITE_DISABLE_B
;
1662 if (state
->logicop_enable
) {
1664 cso
->dw_blend_dst_alpha_forced_one
= 0;
1668 cso
->dw_blend
= blend_get_rt_blend_enable_gen8(dev
, rt
, false, &tmp
[0]);
1669 cso
->dw_blend_dst_alpha_forced_one
=
1670 blend_get_rt_blend_enable_gen8(dev
, rt
, true, &tmp
[1]);
1672 if (tmp
[0] || tmp
[1])
1673 independent_alpha
= true;
1676 return independent_alpha
;
1680 blend_get_logicop_enable_gen6(const struct ilo_dev
*dev
,
1681 const struct pipe_blend_state
*state
)
1683 ILO_DEV_ASSERT(dev
, 6, 7.5);
1685 if (!state
->logicop_enable
)
1688 return GEN6_RT_DW1_LOGICOP_ENABLE
|
1689 gen6_translate_pipe_logicop(state
->logicop_func
) << 18;
1693 blend_get_logicop_enable_gen8(const struct ilo_dev
*dev
,
1694 const struct pipe_blend_state
*state
)
1696 ILO_DEV_ASSERT(dev
, 8, 8);
1698 if (!state
->logicop_enable
)
1701 return GEN8_RT_DW1_LOGICOP_ENABLE
|
1702 gen6_translate_pipe_logicop(state
->logicop_func
) << 27;
1706 blend_get_alpha_mod_gen6(const struct ilo_dev
*dev
,
1707 const struct pipe_blend_state
*state
,
1712 ILO_DEV_ASSERT(dev
, 6, 7.5);
1714 if (state
->alpha_to_coverage
) {
1715 dw
|= GEN6_RT_DW1_ALPHA_TO_COVERAGE
;
1716 if (ilo_dev_gen(dev
) >= ILO_GEN(7))
1717 dw
|= GEN6_RT_DW1_ALPHA_TO_COVERAGE_DITHER
;
1720 * From the Sandy Bridge PRM, volume 2 part 1, page 378:
1722 * "If Dual Source Blending is enabled, this bit (AlphaToOne Enable)
1723 * must be disabled."
1725 if (state
->alpha_to_one
&& !dual_blend
)
1726 dw
|= GEN6_RT_DW1_ALPHA_TO_ONE
;
1732 blend_get_alpha_mod_gen8(const struct ilo_dev
*dev
,
1733 const struct pipe_blend_state
*state
,
1738 ILO_DEV_ASSERT(dev
, 8, 8);
1740 if (state
->alpha_to_coverage
) {
1741 dw
|= GEN8_BLEND_DW0_ALPHA_TO_COVERAGE
|
1742 GEN8_BLEND_DW0_ALPHA_TO_COVERAGE_DITHER
;
1745 if (state
->alpha_to_one
&& !dual_blend
)
1746 dw
|= GEN8_BLEND_DW0_ALPHA_TO_ONE
;
1752 blend_get_ps_blend_gen8(const struct ilo_dev
*dev
, uint32_t rt_dw0
)
1754 int rgb_src
, rgb_dst
, a_src
, a_dst
;
1757 ILO_DEV_ASSERT(dev
, 8, 8);
1759 if (!(rt_dw0
& GEN8_RT_DW0_BLEND_ENABLE
))
1762 a_src
= GEN_EXTRACT(rt_dw0
, GEN8_RT_DW0_SRC_ALPHA_FACTOR
);
1763 a_dst
= GEN_EXTRACT(rt_dw0
, GEN8_RT_DW0_DST_ALPHA_FACTOR
);
1764 rgb_src
= GEN_EXTRACT(rt_dw0
, GEN8_RT_DW0_SRC_COLOR_FACTOR
);
1765 rgb_dst
= GEN_EXTRACT(rt_dw0
, GEN8_RT_DW0_DST_COLOR_FACTOR
);
1767 dw
= GEN8_PS_BLEND_DW1_BLEND_ENABLE
;
1768 dw
|= GEN_SHIFT32(a_src
, GEN8_PS_BLEND_DW1_SRC_ALPHA_FACTOR
);
1769 dw
|= GEN_SHIFT32(a_dst
, GEN8_PS_BLEND_DW1_DST_ALPHA_FACTOR
);
1770 dw
|= GEN_SHIFT32(rgb_src
, GEN8_PS_BLEND_DW1_SRC_COLOR_FACTOR
);
1771 dw
|= GEN_SHIFT32(rgb_dst
, GEN8_PS_BLEND_DW1_DST_COLOR_FACTOR
);
1773 if (a_src
!= rgb_src
|| a_dst
!= rgb_dst
)
1774 dw
|= GEN8_PS_BLEND_DW1_INDEPENDENT_ALPHA_ENABLE
;
1780 ilo_gpe_init_blend(const struct ilo_dev
*dev
,
1781 const struct pipe_blend_state
*state
,
1782 struct ilo_blend_state
*blend
)
1786 ILO_DEV_ASSERT(dev
, 6, 8);
1788 blend
->dual_blend
= (util_blend_state_is_dual(state
, 0) &&
1789 state
->rt
[0].blend_enable
&&
1790 !state
->logicop_enable
);
1791 blend
->alpha_to_coverage
= state
->alpha_to_coverage
;
1793 if (ilo_dev_gen(dev
) >= ILO_GEN(8)) {
1794 bool independent_alpha
;
1796 blend
->dw_alpha_mod
=
1797 blend_get_alpha_mod_gen8(dev
, state
, blend
->dual_blend
);
1798 blend
->dw_logicop
= blend_get_logicop_enable_gen8(dev
, state
);
1799 blend
->dw_shared
= (state
->dither
) ? GEN8_BLEND_DW0_DITHER_ENABLE
: 0;
1801 independent_alpha
= blend_init_cso_gen8(dev
, state
, blend
, 0);
1802 if (independent_alpha
)
1803 blend
->dw_shared
|= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE
;
1805 blend
->dw_ps_blend
= blend_get_ps_blend_gen8(dev
,
1806 blend
->cso
[0].dw_blend
);
1807 blend
->dw_ps_blend_dst_alpha_forced_one
= blend_get_ps_blend_gen8(dev
,
1808 blend
->cso
[0].dw_blend_dst_alpha_forced_one
);
1810 if (state
->independent_blend_enable
) {
1811 for (i
= 1; i
< Elements(blend
->cso
); i
++) {
1812 independent_alpha
= blend_init_cso_gen8(dev
, state
, blend
, i
);
1813 if (independent_alpha
)
1814 blend
->dw_shared
|= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE
;
1817 for (i
= 1; i
< Elements(blend
->cso
); i
++)
1818 blend
->cso
[i
] = blend
->cso
[0];
1821 blend
->dw_alpha_mod
=
1822 blend_get_alpha_mod_gen6(dev
, state
, blend
->dual_blend
);
1823 blend
->dw_logicop
= blend_get_logicop_enable_gen6(dev
, state
);
1824 blend
->dw_shared
= (state
->dither
) ? GEN6_RT_DW1_DITHER_ENABLE
: 0;
1826 blend
->dw_ps_blend
= 0;
1827 blend
->dw_ps_blend_dst_alpha_forced_one
= 0;
1829 blend_init_cso_gen6(dev
, state
, blend
, 0);
1830 if (state
->independent_blend_enable
) {
1831 for (i
= 1; i
< Elements(blend
->cso
); i
++)
1832 blend_init_cso_gen6(dev
, state
, blend
, i
);
1834 for (i
= 1; i
< Elements(blend
->cso
); i
++)
1835 blend
->cso
[i
] = blend
->cso
[0];
1841 * Translate a pipe DSA test function to the matching hardware compare
1845 gen6_translate_dsa_func(unsigned func
)
1848 case PIPE_FUNC_NEVER
: return GEN6_COMPAREFUNCTION_NEVER
;
1849 case PIPE_FUNC_LESS
: return GEN6_COMPAREFUNCTION_LESS
;
1850 case PIPE_FUNC_EQUAL
: return GEN6_COMPAREFUNCTION_EQUAL
;
1851 case PIPE_FUNC_LEQUAL
: return GEN6_COMPAREFUNCTION_LEQUAL
;
1852 case PIPE_FUNC_GREATER
: return GEN6_COMPAREFUNCTION_GREATER
;
1853 case PIPE_FUNC_NOTEQUAL
: return GEN6_COMPAREFUNCTION_NOTEQUAL
;
1854 case PIPE_FUNC_GEQUAL
: return GEN6_COMPAREFUNCTION_GEQUAL
;
1855 case PIPE_FUNC_ALWAYS
: return GEN6_COMPAREFUNCTION_ALWAYS
;
1857 assert(!"unknown depth/stencil/alpha test function");
1858 return GEN6_COMPAREFUNCTION_NEVER
;
1863 dsa_get_stencil_enable_gen6(const struct ilo_dev
*dev
,
1864 const struct pipe_stencil_state
*stencil0
,
1865 const struct pipe_stencil_state
*stencil1
)
1869 ILO_DEV_ASSERT(dev
, 6, 7.5);
1871 if (!stencil0
->enabled
)
1875 * From the Sandy Bridge PRM, volume 2 part 1, page 359:
1877 * "If the Depth Buffer is either undefined or does not have a surface
1878 * format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
1879 * stencil buffer is disabled, Stencil Test Enable must be DISABLED"
1881 * From the Sandy Bridge PRM, volume 2 part 1, page 370:
1883 * "This field (Stencil Test Enable) cannot be enabled if
1884 * Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
1886 * TODO We do not check these yet.
1888 dw
= GEN6_ZS_DW0_STENCIL_TEST_ENABLE
|
1889 gen6_translate_dsa_func(stencil0
->func
) << 28 |
1890 gen6_translate_pipe_stencil_op(stencil0
->fail_op
) << 25 |
1891 gen6_translate_pipe_stencil_op(stencil0
->zfail_op
) << 22 |
1892 gen6_translate_pipe_stencil_op(stencil0
->zpass_op
) << 19;
1893 if (stencil0
->writemask
)
1894 dw
|= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE
;
1896 if (stencil1
->enabled
) {
1897 dw
|= GEN6_ZS_DW0_STENCIL1_ENABLE
|
1898 gen6_translate_dsa_func(stencil1
->func
) << 12 |
1899 gen6_translate_pipe_stencil_op(stencil1
->fail_op
) << 9 |
1900 gen6_translate_pipe_stencil_op(stencil1
->zfail_op
) << 6 |
1901 gen6_translate_pipe_stencil_op(stencil1
->zpass_op
) << 3;
1902 if (stencil1
->writemask
)
1903 dw
|= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE
;
1910 dsa_get_stencil_enable_gen8(const struct ilo_dev
*dev
,
1911 const struct pipe_stencil_state
*stencil0
,
1912 const struct pipe_stencil_state
*stencil1
)
1916 ILO_DEV_ASSERT(dev
, 8, 8);
1918 if (!stencil0
->enabled
)
1921 dw
= gen6_translate_pipe_stencil_op(stencil0
->fail_op
) << 29 |
1922 gen6_translate_pipe_stencil_op(stencil0
->zfail_op
) << 26 |
1923 gen6_translate_pipe_stencil_op(stencil0
->zpass_op
) << 23 |
1924 gen6_translate_dsa_func(stencil0
->func
) << 8 |
1925 GEN8_ZS_DW1_STENCIL_TEST_ENABLE
;
1926 if (stencil0
->writemask
)
1927 dw
|= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE
;
1929 if (stencil1
->enabled
) {
1930 dw
|= gen6_translate_dsa_func(stencil1
->func
) << 20 |
1931 gen6_translate_pipe_stencil_op(stencil1
->fail_op
) << 17 |
1932 gen6_translate_pipe_stencil_op(stencil1
->zfail_op
) << 14 |
1933 gen6_translate_pipe_stencil_op(stencil1
->zpass_op
) << 11 |
1934 GEN8_ZS_DW1_STENCIL1_ENABLE
;
1935 if (stencil1
->writemask
)
1936 dw
|= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE
;
1943 dsa_get_depth_enable_gen6(const struct ilo_dev
*dev
,
1944 const struct pipe_depth_state
*state
)
1948 ILO_DEV_ASSERT(dev
, 6, 7.5);
1951 * From the Sandy Bridge PRM, volume 2 part 1, page 360:
1953 * "Enabling the Depth Test function without defining a Depth Buffer is
1956 * From the Sandy Bridge PRM, volume 2 part 1, page 375:
1958 * "A Depth Buffer must be defined before enabling writes to it, or
1959 * operation is UNDEFINED."
1961 * TODO We do not check these yet.
1963 if (state
->enabled
) {
1964 dw
= GEN6_ZS_DW2_DEPTH_TEST_ENABLE
|
1965 gen6_translate_dsa_func(state
->func
) << 27;
1967 dw
= GEN6_COMPAREFUNCTION_ALWAYS
<< 27;
1970 if (state
->writemask
)
1971 dw
|= GEN6_ZS_DW2_DEPTH_WRITE_ENABLE
;
1977 dsa_get_depth_enable_gen8(const struct ilo_dev
*dev
,
1978 const struct pipe_depth_state
*state
)
1982 ILO_DEV_ASSERT(dev
, 8, 8);
1984 if (state
->enabled
) {
1985 dw
= GEN8_ZS_DW1_DEPTH_TEST_ENABLE
|
1986 gen6_translate_dsa_func(state
->func
) << 5;
1988 dw
= GEN6_COMPAREFUNCTION_ALWAYS
<< 5;
1991 if (state
->writemask
)
1992 dw
|= GEN8_ZS_DW1_DEPTH_WRITE_ENABLE
;
1998 dsa_get_alpha_enable_gen6(const struct ilo_dev
*dev
,
1999 const struct pipe_alpha_state
*state
)
2003 ILO_DEV_ASSERT(dev
, 6, 7.5);
2005 if (!state
->enabled
)
2008 /* this will be ORed to BLEND_STATE */
2009 dw
= GEN6_RT_DW1_ALPHA_TEST_ENABLE
|
2010 gen6_translate_dsa_func(state
->func
) << 13;
2016 dsa_get_alpha_enable_gen8(const struct ilo_dev
*dev
,
2017 const struct pipe_alpha_state
*state
)
2021 ILO_DEV_ASSERT(dev
, 8, 8);
2023 if (!state
->enabled
)
2026 /* this will be ORed to BLEND_STATE */
2027 dw
= GEN8_BLEND_DW0_ALPHA_TEST_ENABLE
|
2028 gen6_translate_dsa_func(state
->func
) << 24;
2034 ilo_gpe_init_dsa(const struct ilo_dev
*dev
,
2035 const struct pipe_depth_stencil_alpha_state
*state
,
2036 struct ilo_dsa_state
*dsa
)
2038 ILO_DEV_ASSERT(dev
, 6, 8);
2040 STATIC_ASSERT(Elements(dsa
->payload
) >= 3);
2042 if (ilo_dev_gen(dev
) >= ILO_GEN(8)) {
2043 const uint32_t dw_stencil
= dsa_get_stencil_enable_gen8(dev
,
2044 &state
->stencil
[0], &state
->stencil
[1]);
2045 const uint32_t dw_depth
= dsa_get_depth_enable_gen8(dev
, &state
->depth
);
2047 assert(!(dw_stencil
& dw_depth
));
2048 dsa
->payload
[0] = dw_stencil
| dw_depth
;
2050 dsa
->dw_blend_alpha
= dsa_get_alpha_enable_gen8(dev
, &state
->alpha
);
2051 dsa
->dw_ps_blend_alpha
= (state
->alpha
.enabled
) ?
2052 GEN8_PS_BLEND_DW1_ALPHA_TEST_ENABLE
: 0;
2054 dsa
->payload
[0] = dsa_get_stencil_enable_gen6(dev
,
2055 &state
->stencil
[0], &state
->stencil
[1]);
2056 dsa
->payload
[2] = dsa_get_depth_enable_gen6(dev
, &state
->depth
);
2058 dsa
->dw_blend_alpha
= dsa_get_alpha_enable_gen6(dev
, &state
->alpha
);
2059 dsa
->dw_ps_blend_alpha
= 0;
2062 dsa
->payload
[1] = state
->stencil
[0].valuemask
<< 24 |
2063 state
->stencil
[0].writemask
<< 16 |
2064 state
->stencil
[1].valuemask
<< 8 |
2065 state
->stencil
[1].writemask
;
2067 dsa
->alpha_ref
= float_to_ubyte(state
->alpha
.ref_value
);
2071 ilo_gpe_set_scissor(const struct ilo_dev
*dev
,
2072 unsigned start_slot
,
2073 unsigned num_states
,
2074 const struct pipe_scissor_state
*states
,
2075 struct ilo_scissor_state
*scissor
)
2079 ILO_DEV_ASSERT(dev
, 6, 8);
2081 for (i
= 0; i
< num_states
; i
++) {
2082 uint16_t min_x
, min_y
, max_x
, max_y
;
2084 /* both max and min are inclusive in SCISSOR_RECT */
2085 if (states
[i
].minx
< states
[i
].maxx
&&
2086 states
[i
].miny
< states
[i
].maxy
) {
2087 min_x
= states
[i
].minx
;
2088 min_y
= states
[i
].miny
;
2089 max_x
= states
[i
].maxx
- 1;
2090 max_y
= states
[i
].maxy
- 1;
2093 /* we have to make min greater than max */
2100 scissor
->payload
[(start_slot
+ i
) * 2 + 0] = min_y
<< 16 | min_x
;
2101 scissor
->payload
[(start_slot
+ i
) * 2 + 1] = max_y
<< 16 | max_x
;
2104 if (!start_slot
&& num_states
)
2105 scissor
->scissor0
= states
[0];
2109 ilo_gpe_set_scissor_null(const struct ilo_dev
*dev
,
2110 struct ilo_scissor_state
*scissor
)
2114 for (i
= 0; i
< Elements(scissor
->payload
); i
+= 2) {
2115 scissor
->payload
[i
+ 0] = 1 << 16 | 1;
2116 scissor
->payload
[i
+ 1] = 0;
2121 fb_set_blend_caps(const struct ilo_dev
*dev
,
2122 enum pipe_format format
,
2123 struct ilo_fb_blend_caps
*caps
)
2125 const struct util_format_description
*desc
=
2126 util_format_description(format
);
2127 const int ch
= util_format_get_first_non_void_channel(format
);
2129 memset(caps
, 0, sizeof(*caps
));
2131 if (format
== PIPE_FORMAT_NONE
|| desc
->is_mixed
)
2135 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
2137 * "Logic Ops are only supported on *_UNORM surfaces (excluding _SRGB
2138 * variants), otherwise Logic Ops must be DISABLED."
2140 * According to the classic driver, this is lifted on Gen8+.
2142 if (ilo_dev_gen(dev
) >= ILO_GEN(8)) {
2143 caps
->can_logicop
= true;
2145 caps
->can_logicop
= (ch
>= 0 && desc
->channel
[ch
].normalized
&&
2146 desc
->channel
[ch
].type
== UTIL_FORMAT_TYPE_UNSIGNED
&&
2147 desc
->colorspace
== UTIL_FORMAT_COLORSPACE_RGB
);
2150 /* no blending for pure integer formats */
2151 caps
->can_blend
= !util_format_is_pure_integer(format
);
2154 * From the Sandy Bridge PRM, volume 2 part 1, page 382:
2156 * "Alpha Test can only be enabled if Pixel Shader outputs a float
2159 caps
->can_alpha_test
= !util_format_is_pure_integer(format
);
2161 caps
->dst_alpha_forced_one
=
2162 (ilo_format_translate_render(dev
, format
) !=
2163 ilo_format_translate_color(dev
, format
));
2166 if (caps
->dst_alpha_forced_one
) {
2167 enum pipe_format render_format
;
2170 case PIPE_FORMAT_B8G8R8X8_UNORM
:
2171 render_format
= PIPE_FORMAT_B8G8R8A8_UNORM
;
2174 render_format
= PIPE_FORMAT_NONE
;
2178 assert(ilo_format_translate_render(dev
, format
) ==
2179 ilo_format_translate_color(dev
, render_format
));
2184 ilo_gpe_set_fb(const struct ilo_dev
*dev
,
2185 const struct pipe_framebuffer_state
*state
,
2186 struct ilo_fb_state
*fb
)
2188 const struct pipe_surface
*first_surf
= NULL
;
2191 ILO_DEV_ASSERT(dev
, 6, 8);
2193 util_copy_framebuffer_state(&fb
->state
, state
);
2195 ilo_gpe_init_view_surface_null(dev
,
2196 (state
->width
) ? state
->width
: 1,
2197 (state
->height
) ? state
->height
: 1,
2198 1, 0, &fb
->null_rt
);
2200 for (i
= 0; i
< state
->nr_cbufs
; i
++) {
2201 if (state
->cbufs
[i
]) {
2202 fb_set_blend_caps(dev
, state
->cbufs
[i
]->format
, &fb
->blend_caps
[i
]);
2205 first_surf
= state
->cbufs
[i
];
2207 fb_set_blend_caps(dev
, PIPE_FORMAT_NONE
, &fb
->blend_caps
[i
]);
2211 if (!first_surf
&& state
->zsbuf
)
2212 first_surf
= state
->zsbuf
;
2214 fb
->num_samples
= (first_surf
) ? first_surf
->texture
->nr_samples
: 1;
2215 if (!fb
->num_samples
)
2216 fb
->num_samples
= 1;
2219 * The PRMs list several restrictions when the framebuffer has more than
2220 * one surface. It seems they are actually lifted on GEN6+.