2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2014 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "genhw/genhw.h"
29 #include "util/u_dual_blend.h"
30 #include "util/u_framebuffer.h"
31 #include "util/u_half.h"
33 #include "ilo_context.h"
34 #include "ilo_format.h"
35 #include "ilo_resource.h"
36 #include "ilo_shader.h"
37 #include "ilo_state.h"
38 #include "ilo_state_3d.h"
41 rasterizer_init_clip(const struct ilo_dev_info
*dev
,
42 const struct pipe_rasterizer_state
*state
,
43 struct ilo_rasterizer_clip
*clip
)
45 uint32_t dw1
, dw2
, dw3
;
47 ILO_DEV_ASSERT(dev
, 6, 8);
49 dw1
= GEN6_CLIP_DW1_STATISTICS
;
51 if (ilo_dev_gen(dev
) >= ILO_GEN(7)) {
53 * From the Ivy Bridge PRM, volume 2 part 1, page 219:
55 * "Workaround : Due to Hardware issue "EarlyCull" needs to be
56 * enabled only for the cases where the incoming primitive topology
57 * into the clipper guaranteed to be Trilist."
59 * What does this mean?
62 GEN7_CLIP_DW1_EARLY_CULL_ENABLE
;
64 if (ilo_dev_gen(dev
) < ILO_GEN(8)) {
66 dw1
|= GEN7_CLIP_DW1_FRONTWINDING_CCW
;
68 switch (state
->cull_face
) {
70 dw1
|= GEN7_CLIP_DW1_CULLMODE_NONE
;
73 dw1
|= GEN7_CLIP_DW1_CULLMODE_FRONT
;
76 dw1
|= GEN7_CLIP_DW1_CULLMODE_BACK
;
78 case PIPE_FACE_FRONT_AND_BACK
:
79 dw1
|= GEN7_CLIP_DW1_CULLMODE_BOTH
;
85 dw2
= GEN6_CLIP_DW2_CLIP_ENABLE
|
86 GEN6_CLIP_DW2_XY_TEST_ENABLE
|
87 state
->clip_plane_enable
<< GEN6_CLIP_DW2_UCP_CLIP_ENABLES__SHIFT
|
88 GEN6_CLIP_DW2_CLIPMODE_NORMAL
;
90 if (state
->clip_halfz
)
91 dw2
|= GEN6_CLIP_DW2_APIMODE_D3D
;
93 dw2
|= GEN6_CLIP_DW2_APIMODE_OGL
;
95 if (ilo_dev_gen(dev
) < ILO_GEN(8) && state
->depth_clip
)
96 dw2
|= GEN6_CLIP_DW2_Z_TEST_ENABLE
;
98 if (state
->flatshade_first
) {
99 dw2
|= 0 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT
|
100 0 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT
|
101 1 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT
;
104 dw2
|= 2 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT
|
105 1 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT
|
106 2 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT
;
109 dw3
= 0x1 << GEN6_CLIP_DW3_MIN_POINT_WIDTH__SHIFT
|
110 0x7ff << GEN6_CLIP_DW3_MAX_POINT_WIDTH__SHIFT
;
112 clip
->payload
[0] = dw1
;
113 clip
->payload
[1] = dw2
;
114 clip
->payload
[2] = dw3
;
116 clip
->can_enable_guardband
= true;
119 * There are several reasons that guard band test should be disabled
121 * - GL wide points (to avoid partially visibie object)
122 * - GL wide or AA lines (to avoid partially visibie object)
124 if (state
->point_size_per_vertex
|| state
->point_size
> 1.0f
)
125 clip
->can_enable_guardband
= false;
126 if (state
->line_smooth
|| state
->line_width
> 1.0f
)
127 clip
->can_enable_guardband
= false;
131 rasterizer_init_sf_depth_offset_gen6(const struct ilo_dev_info
*dev
,
132 const struct pipe_rasterizer_state
*state
,
133 struct ilo_rasterizer_sf
*sf
)
135 ILO_DEV_ASSERT(dev
, 6, 8);
138 * Scale the constant term. The minimum representable value used by the HW
139 * is not large enouch to be the minimum resolvable difference.
141 sf
->dw_depth_offset_const
= fui(state
->offset_units
* 2.0f
);
142 sf
->dw_depth_offset_scale
= fui(state
->offset_scale
);
143 sf
->dw_depth_offset_clamp
= fui(state
->offset_clamp
);
147 rasterizer_init_sf_gen6(const struct ilo_dev_info
*dev
,
148 const struct pipe_rasterizer_state
*state
,
149 struct ilo_rasterizer_sf
*sf
)
151 int line_width
, point_width
;
152 uint32_t dw1
, dw2
, dw3
;
154 ILO_DEV_ASSERT(dev
, 6, 7.5);
157 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
159 * "This bit (Statistics Enable) should be set whenever clipping is
160 * enabled and the Statistics Enable bit is set in CLIP_STATE. It
161 * should be cleared if clipping is disabled or Statistics Enable in
162 * CLIP_STATE is clear."
164 dw1
= GEN7_SF_DW1_STATISTICS
|
165 GEN7_SF_DW1_VIEWPORT_ENABLE
;
167 /* XXX GEN6 path seems to work fine for GEN7 */
168 if (false && ilo_dev_gen(dev
) >= ILO_GEN(7)) {
170 * From the Ivy Bridge PRM, volume 2 part 1, page 258:
172 * "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
173 * Enable Solid , Global Depth Offset Enable Wireframe, and Global
174 * Depth Offset Enable Point) should be set whenever non zero depth
175 * bias (Slope, Bias) values are used. Setting this bit may have
176 * some degradation of performance for some workloads."
178 if (state
->offset_tri
|| state
->offset_line
|| state
->offset_point
) {
179 /* XXX need to scale offset_const according to the depth format */
180 dw1
|= GEN7_SF_DW1_LEGACY_DEPTH_OFFSET
;
182 dw1
|= GEN7_SF_DW1_DEPTH_OFFSET_SOLID
|
183 GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME
|
184 GEN7_SF_DW1_DEPTH_OFFSET_POINT
;
187 if (state
->offset_tri
)
188 dw1
|= GEN7_SF_DW1_DEPTH_OFFSET_SOLID
;
189 if (state
->offset_line
)
190 dw1
|= GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME
;
191 if (state
->offset_point
)
192 dw1
|= GEN7_SF_DW1_DEPTH_OFFSET_POINT
;
195 switch (state
->fill_front
) {
196 case PIPE_POLYGON_MODE_FILL
:
197 dw1
|= GEN7_SF_DW1_FRONTFACE_SOLID
;
199 case PIPE_POLYGON_MODE_LINE
:
200 dw1
|= GEN7_SF_DW1_FRONTFACE_WIREFRAME
;
202 case PIPE_POLYGON_MODE_POINT
:
203 dw1
|= GEN7_SF_DW1_FRONTFACE_POINT
;
207 switch (state
->fill_back
) {
208 case PIPE_POLYGON_MODE_FILL
:
209 dw1
|= GEN7_SF_DW1_BACKFACE_SOLID
;
211 case PIPE_POLYGON_MODE_LINE
:
212 dw1
|= GEN7_SF_DW1_BACKFACE_WIREFRAME
;
214 case PIPE_POLYGON_MODE_POINT
:
215 dw1
|= GEN7_SF_DW1_BACKFACE_POINT
;
219 if (state
->front_ccw
)
220 dw1
|= GEN7_SF_DW1_FRONTWINDING_CCW
;
224 if (state
->line_smooth
) {
226 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
228 * "This field (Anti-aliasing Enable) must be disabled if any of the
229 * render targets have integer (UINT or SINT) surface format."
231 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
233 * "This field (Hierarchical Depth Buffer Enable) must be disabled
234 * if Anti-aliasing Enable in 3DSTATE_SF is enabled.
236 * TODO We do not check those yet.
238 dw2
|= GEN7_SF_DW2_AA_LINE_ENABLE
|
239 GEN7_SF_DW2_AA_LINE_CAP_1_0
;
242 switch (state
->cull_face
) {
244 dw2
|= GEN7_SF_DW2_CULLMODE_NONE
;
246 case PIPE_FACE_FRONT
:
247 dw2
|= GEN7_SF_DW2_CULLMODE_FRONT
;
250 dw2
|= GEN7_SF_DW2_CULLMODE_BACK
;
252 case PIPE_FACE_FRONT_AND_BACK
:
253 dw2
|= GEN7_SF_DW2_CULLMODE_BOTH
;
258 * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
259 * pixels in the minor direction. We have to make the lines slightly
260 * thicker, 0.5 pixel on both sides, so that they intersect that many
261 * pixels are considered into the lines.
263 * Line width is in U3.7.
266 ((state
->line_width
+ (float) state
->line_smooth
) * 128.0f
+ 0.5f
);
267 line_width
= CLAMP(line_width
, 0, 1023);
270 if (line_width
== 128 && !state
->line_smooth
)
273 dw2
|= line_width
<< GEN7_SF_DW2_LINE_WIDTH__SHIFT
;
275 if (ilo_dev_gen(dev
) == ILO_GEN(7.5) && state
->line_stipple_enable
)
276 dw2
|= GEN75_SF_DW2_LINE_STIPPLE_ENABLE
;
279 dw2
|= GEN7_SF_DW2_SCISSOR_ENABLE
;
281 dw3
= GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE
|
282 GEN7_SF_DW3_SUBPIXEL_8BITS
;
284 if (state
->line_last_pixel
)
285 dw3
|= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE
;
287 if (state
->flatshade_first
) {
288 dw3
|= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT
|
289 0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT
|
290 1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT
;
292 dw3
|= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT
|
293 1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT
|
294 2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT
;
297 if (!state
->point_size_per_vertex
)
298 dw3
|= GEN7_SF_DW3_USE_POINT_WIDTH
;
301 point_width
= (int) (state
->point_size
* 8.0f
+ 0.5f
);
302 point_width
= CLAMP(point_width
, 1, 2047);
306 STATIC_ASSERT(Elements(sf
->payload
) >= 3);
307 sf
->payload
[0] = dw1
;
308 sf
->payload
[1] = dw2
;
309 sf
->payload
[2] = dw3
;
311 if (state
->multisample
) {
312 sf
->dw_msaa
= GEN7_SF_DW2_MSRASTMODE_ON_PATTERN
;
315 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
317 * "Software must not program a value of 0.0 when running in
318 * MSRASTMODE_ON_xxx modes - zero-width lines are not available
319 * when multisampling rasterization is enabled."
322 line_width
= 128; /* 1.0f */
324 sf
->dw_msaa
|= line_width
<< GEN7_SF_DW2_LINE_WIDTH__SHIFT
;
330 rasterizer_init_sf_depth_offset_gen6(dev
, state
, sf
);
331 /* 3DSTATE_RASTER is Gen8+ only */
336 rasterizer_get_sf_raster_gen8(const struct ilo_dev_info
*dev
,
337 const struct pipe_rasterizer_state
*state
)
341 ILO_DEV_ASSERT(dev
, 8, 8);
343 if (state
->front_ccw
)
344 dw
|= GEN8_RASTER_DW1_FRONTWINDING_CCW
;
346 switch (state
->cull_face
) {
348 dw
|= GEN8_RASTER_DW1_CULLMODE_NONE
;
350 case PIPE_FACE_FRONT
:
351 dw
|= GEN8_RASTER_DW1_CULLMODE_FRONT
;
354 dw
|= GEN8_RASTER_DW1_CULLMODE_BACK
;
356 case PIPE_FACE_FRONT_AND_BACK
:
357 dw
|= GEN8_RASTER_DW1_CULLMODE_BOTH
;
361 if (state
->point_smooth
)
362 dw
|= GEN8_RASTER_DW1_SMOOTH_POINT_ENABLE
;
364 if (state
->multisample
)
365 dw
|= GEN8_RASTER_DW1_API_MULTISAMPLE_ENABLE
;
367 if (state
->offset_tri
)
368 dw
|= GEN8_RASTER_DW1_DEPTH_OFFSET_SOLID
;
369 if (state
->offset_line
)
370 dw
|= GEN8_RASTER_DW1_DEPTH_OFFSET_WIREFRAME
;
371 if (state
->offset_point
)
372 dw
|= GEN8_RASTER_DW1_DEPTH_OFFSET_POINT
;
374 switch (state
->fill_front
) {
375 case PIPE_POLYGON_MODE_FILL
:
376 dw
|= GEN8_RASTER_DW1_FRONTFACE_SOLID
;
378 case PIPE_POLYGON_MODE_LINE
:
379 dw
|= GEN8_RASTER_DW1_FRONTFACE_WIREFRAME
;
381 case PIPE_POLYGON_MODE_POINT
:
382 dw
|= GEN8_RASTER_DW1_FRONTFACE_POINT
;
386 switch (state
->fill_back
) {
387 case PIPE_POLYGON_MODE_FILL
:
388 dw
|= GEN8_RASTER_DW1_BACKFACE_SOLID
;
390 case PIPE_POLYGON_MODE_LINE
:
391 dw
|= GEN8_RASTER_DW1_BACKFACE_WIREFRAME
;
393 case PIPE_POLYGON_MODE_POINT
:
394 dw
|= GEN8_RASTER_DW1_BACKFACE_POINT
;
398 if (state
->line_smooth
)
399 dw
|= GEN8_RASTER_DW1_AA_LINE_ENABLE
;
402 dw
|= GEN8_RASTER_DW1_SCISSOR_ENABLE
;
404 if (state
->depth_clip
)
405 dw
|= GEN8_RASTER_DW1_Z_TEST_ENABLE
;
411 rasterizer_init_sf_gen8(const struct ilo_dev_info
*dev
,
412 const struct pipe_rasterizer_state
*state
,
413 struct ilo_rasterizer_sf
*sf
)
415 int line_width
, point_width
;
416 uint32_t dw1
, dw2
, dw3
;
418 ILO_DEV_ASSERT(dev
, 8, 8);
422 ((state
->line_width
+ (float) state
->line_smooth
) * 128.0f
+ 0.5f
);
423 line_width
= CLAMP(line_width
, 0, 1023);
426 if (line_width
== 128 && !state
->line_smooth
)
430 point_width
= (int) (state
->point_size
* 8.0f
+ 0.5f
);
431 point_width
= CLAMP(point_width
, 1, 2047);
433 dw1
= GEN7_SF_DW1_STATISTICS
|
434 GEN7_SF_DW1_VIEWPORT_ENABLE
;
436 dw2
= line_width
<< GEN7_SF_DW2_LINE_WIDTH__SHIFT
;
437 if (state
->line_smooth
)
438 dw2
|= GEN7_SF_DW2_AA_LINE_CAP_1_0
;
440 dw3
= GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE
|
441 GEN7_SF_DW3_SUBPIXEL_8BITS
|
444 if (state
->line_last_pixel
)
445 dw3
|= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE
;
447 if (state
->flatshade_first
) {
448 dw3
|= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT
|
449 0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT
|
450 1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT
;
452 dw3
|= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT
|
453 1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT
|
454 2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT
;
457 if (!state
->point_size_per_vertex
)
458 dw3
|= GEN7_SF_DW3_USE_POINT_WIDTH
;
462 STATIC_ASSERT(Elements(sf
->payload
) >= 3);
463 sf
->payload
[0] = dw1
;
464 sf
->payload
[1] = dw2
;
465 sf
->payload
[2] = dw3
;
467 rasterizer_init_sf_depth_offset_gen6(dev
, state
, sf
);
470 sf
->dw_raster
= rasterizer_get_sf_raster_gen8(dev
, state
);
474 rasterizer_init_wm_gen6(const struct ilo_dev_info
*dev
,
475 const struct pipe_rasterizer_state
*state
,
476 struct ilo_rasterizer_wm
*wm
)
480 ILO_DEV_ASSERT(dev
, 6, 6);
482 /* only the FF unit states are set, as in GEN7 */
484 dw5
= GEN6_WM_DW5_AA_LINE_WIDTH_2_0
;
486 /* same value as in 3DSTATE_SF */
487 if (state
->line_smooth
)
488 dw5
|= GEN6_WM_DW5_AA_LINE_CAP_1_0
;
490 if (state
->poly_stipple_enable
)
491 dw5
|= GEN6_WM_DW5_POLY_STIPPLE_ENABLE
;
492 if (state
->line_stipple_enable
)
493 dw5
|= GEN6_WM_DW5_LINE_STIPPLE_ENABLE
;
496 * assertion that makes sure
498 * dw6 |= wm->dw_msaa_rast | wm->dw_msaa_disp;
502 STATIC_ASSERT(GEN6_WM_DW6_MSRASTMODE_OFF_PIXEL
== 0 &&
503 GEN6_WM_DW6_MSDISPMODE_PERSAMPLE
== 0);
504 dw6
= GEN6_WM_DW6_ZW_INTERP_PIXEL
;
506 if (state
->bottom_edge_rule
)
507 dw6
|= GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT
;
510 (state
->multisample
) ? GEN6_WM_DW6_MSRASTMODE_ON_PATTERN
: 0;
511 wm
->dw_msaa_disp
= GEN6_WM_DW6_MSDISPMODE_PERPIXEL
;
513 STATIC_ASSERT(Elements(wm
->payload
) >= 2);
514 wm
->payload
[0] = dw5
;
515 wm
->payload
[1] = dw6
;
519 rasterizer_init_wm_gen7(const struct ilo_dev_info
*dev
,
520 const struct pipe_rasterizer_state
*state
,
521 struct ilo_rasterizer_wm
*wm
)
525 ILO_DEV_ASSERT(dev
, 7, 7.5);
528 * assertion that makes sure
530 * dw1 |= wm->dw_msaa_rast;
531 * dw2 |= wm->dw_msaa_disp;
535 STATIC_ASSERT(GEN7_WM_DW1_MSRASTMODE_OFF_PIXEL
== 0 &&
536 GEN7_WM_DW2_MSDISPMODE_PERSAMPLE
== 0);
537 dw1
= GEN7_WM_DW1_ZW_INTERP_PIXEL
|
538 GEN7_WM_DW1_AA_LINE_WIDTH_2_0
;
541 /* same value as in 3DSTATE_SF */
542 if (state
->line_smooth
)
543 dw1
|= GEN7_WM_DW1_AA_LINE_CAP_1_0
;
545 if (state
->poly_stipple_enable
)
546 dw1
|= GEN7_WM_DW1_POLY_STIPPLE_ENABLE
;
547 if (state
->line_stipple_enable
)
548 dw1
|= GEN7_WM_DW1_LINE_STIPPLE_ENABLE
;
550 if (state
->bottom_edge_rule
)
551 dw1
|= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT
;
554 (state
->multisample
) ? GEN7_WM_DW1_MSRASTMODE_ON_PATTERN
: 0;
555 wm
->dw_msaa_disp
= GEN7_WM_DW2_MSDISPMODE_PERPIXEL
;
557 STATIC_ASSERT(Elements(wm
->payload
) >= 2);
558 wm
->payload
[0] = dw1
;
559 wm
->payload
[1] = dw2
;
563 rasterizer_get_wm_gen8(const struct ilo_dev_info
*dev
,
564 const struct pipe_rasterizer_state
*state
)
568 ILO_DEV_ASSERT(dev
, 8, 8);
570 dw
= GEN7_WM_DW1_ZW_INTERP_PIXEL
|
571 GEN7_WM_DW1_AA_LINE_WIDTH_2_0
;
573 /* same value as in 3DSTATE_SF */
574 if (state
->line_smooth
)
575 dw
|= GEN7_WM_DW1_AA_LINE_CAP_1_0
;
577 if (state
->poly_stipple_enable
)
578 dw
|= GEN7_WM_DW1_POLY_STIPPLE_ENABLE
;
579 if (state
->line_stipple_enable
)
580 dw
|= GEN7_WM_DW1_LINE_STIPPLE_ENABLE
;
582 if (state
->bottom_edge_rule
)
583 dw
|= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT
;
589 ilo_gpe_init_rasterizer(const struct ilo_dev_info
*dev
,
590 const struct pipe_rasterizer_state
*state
,
591 struct ilo_rasterizer_state
*rasterizer
)
593 rasterizer_init_clip(dev
, state
, &rasterizer
->clip
);
595 if (ilo_dev_gen(dev
) >= ILO_GEN(8)) {
596 memset(&rasterizer
->wm
, 0, sizeof(rasterizer
->wm
));
597 rasterizer
->wm
.payload
[0] = rasterizer_get_wm_gen8(dev
, state
);
599 rasterizer_init_sf_gen8(dev
, state
, &rasterizer
->sf
);
600 } else if (ilo_dev_gen(dev
) >= ILO_GEN(7)) {
601 rasterizer_init_wm_gen7(dev
, state
, &rasterizer
->wm
);
602 rasterizer_init_sf_gen6(dev
, state
, &rasterizer
->sf
);
604 rasterizer_init_wm_gen6(dev
, state
, &rasterizer
->wm
);
605 rasterizer_init_sf_gen6(dev
, state
, &rasterizer
->sf
);
610 fs_init_cso_gen6(const struct ilo_dev_info
*dev
,
611 const struct ilo_shader_state
*fs
,
612 struct ilo_shader_cso
*cso
)
614 int start_grf
, input_count
, sampler_count
, interps
, max_threads
;
615 uint32_t dw2
, dw4
, dw5
, dw6
;
617 ILO_DEV_ASSERT(dev
, 6, 6);
619 start_grf
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_URB_DATA_START_REG
);
620 input_count
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_INPUT_COUNT
);
621 sampler_count
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_SAMPLER_COUNT
);
622 interps
= ilo_shader_get_kernel_param(fs
,
623 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS
);
625 /* see brwCreateContext() */
626 max_threads
= (dev
->gt
== 2) ? 80 : 40;
628 dw2
= (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT
;
629 dw2
|= ((sampler_count
+ 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
;
631 dw4
= start_grf
<< GEN6_WM_DW4_URB_GRF_START0__SHIFT
|
632 0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT
|
633 0 << GEN6_WM_DW4_URB_GRF_START2__SHIFT
;
635 dw5
= (max_threads
- 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT
;
638 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
640 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
641 * PS kernel or color calculator has the ability to kill (discard)
642 * pixels or samples, other than due to depth or stencil testing.
643 * This bit is required to be ENABLED in the following situations:
645 * The API pixel shader program contains "killpix" or "discard"
646 * instructions, or other code in the pixel shader kernel that can
647 * cause the final pixel mask to differ from the pixel mask received
650 * A sampler with chroma key enabled with kill pixel mode is used by
653 * Any render target has Alpha Test Enable or AlphaToCoverage Enable
656 * The pixel shader kernel generates and outputs oMask.
658 * Note: As ClipDistance clipping is fully supported in hardware and
659 * therefore not via PS instructions, there should be no need to
660 * ENABLE this bit due to ClipDistance clipping."
662 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_USE_KILL
))
663 dw5
|= GEN6_WM_DW5_PS_KILL_PIXEL
;
666 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
668 * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
669 * field must be set to disabled."
671 * TODO This is not checked yet.
673 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_OUTPUT_Z
))
674 dw5
|= GEN6_WM_DW5_PS_COMPUTE_DEPTH
;
676 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_INPUT_Z
))
677 dw5
|= GEN6_WM_DW5_PS_USE_DEPTH
;
679 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_INPUT_W
))
680 dw5
|= GEN6_WM_DW5_PS_USE_W
;
683 * TODO set this bit only when
685 * a) fs writes colors and color is not masked, or
686 * b) fs writes depth, or
690 dw5
|= GEN6_WM_DW5_PS_DISPATCH_ENABLE
;
692 assert(!ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_DISPATCH_16_OFFSET
));
693 dw5
|= GEN6_PS_DISPATCH_8
<< GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT
;
695 dw6
= input_count
<< GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT
|
696 GEN6_WM_DW6_PS_POSOFFSET_NONE
|
697 interps
<< GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT
;
699 STATIC_ASSERT(Elements(cso
->payload
) >= 4);
700 cso
->payload
[0] = dw2
;
701 cso
->payload
[1] = dw4
;
702 cso
->payload
[2] = dw5
;
703 cso
->payload
[3] = dw6
;
707 fs_get_wm_gen7(const struct ilo_dev_info
*dev
,
708 const struct ilo_shader_state
*fs
)
712 ILO_DEV_ASSERT(dev
, 7, 7.5);
714 dw
= ilo_shader_get_kernel_param(fs
,
715 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS
) <<
716 GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT
;
719 * TODO set this bit only when
721 * a) fs writes colors and color is not masked, or
722 * b) fs writes depth, or
725 dw
|= GEN7_WM_DW1_PS_DISPATCH_ENABLE
;
728 * From the Ivy Bridge PRM, volume 2 part 1, page 278:
730 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that
731 * the PS kernel or color calculator has the ability to kill
732 * (discard) pixels or samples, other than due to depth or stencil
733 * testing. This bit is required to be ENABLED in the following
736 * - The API pixel shader program contains "killpix" or "discard"
737 * instructions, or other code in the pixel shader kernel that
738 * can cause the final pixel mask to differ from the pixel mask
739 * received on dispatch.
741 * - A sampler with chroma key enabled with kill pixel mode is used
742 * by the pixel shader.
744 * - Any render target has Alpha Test Enable or AlphaToCoverage
747 * - The pixel shader kernel generates and outputs oMask.
749 * Note: As ClipDistance clipping is fully supported in hardware
750 * and therefore not via PS instructions, there should be no need
751 * to ENABLE this bit due to ClipDistance clipping."
753 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_USE_KILL
))
754 dw
|= GEN7_WM_DW1_PS_KILL_PIXEL
;
756 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_OUTPUT_Z
))
757 dw
|= GEN7_WM_DW1_PSCDEPTH_ON
;
759 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_INPUT_Z
))
760 dw
|= GEN7_WM_DW1_PS_USE_DEPTH
;
762 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_INPUT_W
))
763 dw
|= GEN7_WM_DW1_PS_USE_W
;
769 fs_init_cso_gen7(const struct ilo_dev_info
*dev
,
770 const struct ilo_shader_state
*fs
,
771 struct ilo_shader_cso
*cso
)
773 int start_grf
, sampler_count
, max_threads
;
774 uint32_t dw2
, dw4
, dw5
;
776 ILO_DEV_ASSERT(dev
, 7, 7.5);
778 start_grf
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_URB_DATA_START_REG
);
779 sampler_count
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_SAMPLER_COUNT
);
781 dw2
= (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT
;
782 dw2
|= ((sampler_count
+ 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
;
784 dw4
= GEN7_PS_DW4_POSOFFSET_NONE
;
786 /* see brwCreateContext() */
787 switch (ilo_dev_gen(dev
)) {
789 max_threads
= (dev
->gt
== 3) ? 408 : (dev
->gt
== 2) ? 204 : 102;
790 dw4
|= (max_threads
- 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT
;
791 dw4
|= 1 << GEN75_PS_DW4_SAMPLE_MASK__SHIFT
;
795 max_threads
= (dev
->gt
== 2) ? 172 : 48;
796 dw4
|= (max_threads
- 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT
;
800 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_PCB_CBUF0_SIZE
))
801 dw4
|= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE
;
803 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_INPUT_COUNT
))
804 dw4
|= GEN7_PS_DW4_ATTR_ENABLE
;
806 assert(!ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_DISPATCH_16_OFFSET
));
807 dw4
|= GEN6_PS_DISPATCH_8
<< GEN7_PS_DW4_DISPATCH_MODE__SHIFT
;
809 dw5
= start_grf
<< GEN7_PS_DW5_URB_GRF_START0__SHIFT
|
810 0 << GEN7_PS_DW5_URB_GRF_START1__SHIFT
|
811 0 << GEN7_PS_DW5_URB_GRF_START2__SHIFT
;
813 STATIC_ASSERT(Elements(cso
->payload
) >= 4);
814 cso
->payload
[0] = dw2
;
815 cso
->payload
[1] = dw4
;
816 cso
->payload
[2] = dw5
;
817 cso
->payload
[3] = fs_get_wm_gen7(dev
, fs
);
821 fs_get_psx_gen8(const struct ilo_dev_info
*dev
,
822 const struct ilo_shader_state
*fs
)
826 ILO_DEV_ASSERT(dev
, 8, 8);
828 dw
= GEN8_PSX_DW1_DISPATCH_ENABLE
;
830 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_USE_KILL
))
831 dw
|= GEN8_PSX_DW1_KILL_PIXEL
;
832 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_OUTPUT_Z
))
833 dw
|= GEN8_PSX_DW1_PSCDEPTH_ON
;
834 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_INPUT_Z
))
835 dw
|= GEN8_PSX_DW1_USE_DEPTH
;
836 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_INPUT_W
))
837 dw
|= GEN8_PSX_DW1_USE_W
;
838 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_INPUT_COUNT
))
839 dw
|= GEN8_PSX_DW1_ATTR_ENABLE
;
845 fs_get_wm_gen8(const struct ilo_dev_info
*dev
,
846 const struct ilo_shader_state
*fs
)
848 ILO_DEV_ASSERT(dev
, 8, 8);
850 return ilo_shader_get_kernel_param(fs
,
851 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS
) <<
852 GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT
;
856 fs_init_cso_gen8(const struct ilo_dev_info
*dev
,
857 const struct ilo_shader_state
*fs
,
858 struct ilo_shader_cso
*cso
)
860 int start_grf
, sampler_count
;
861 uint32_t dw3
, dw6
, dw7
;
863 ILO_DEV_ASSERT(dev
, 8, 8);
865 start_grf
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_URB_DATA_START_REG
);
866 sampler_count
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_SAMPLER_COUNT
);
868 dw3
= (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT
;
869 dw3
|= ((sampler_count
+ 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
;
872 dw6
= (64 - 2) << GEN8_PS_DW6_MAX_THREADS__SHIFT
|
873 GEN8_PS_DW6_POSOFFSET_NONE
;
874 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_PCB_CBUF0_SIZE
))
875 dw6
|= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE
;
877 assert(!ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_DISPATCH_16_OFFSET
));
878 dw6
|= GEN6_PS_DISPATCH_8
<< GEN8_PS_DW6_DISPATCH_MODE__SHIFT
;
880 dw7
= start_grf
<< GEN8_PS_DW7_URB_GRF_START0__SHIFT
|
881 0 << GEN8_PS_DW7_URB_GRF_START1__SHIFT
|
882 0 << GEN8_PS_DW7_URB_GRF_START2__SHIFT
;
884 STATIC_ASSERT(Elements(cso
->payload
) >= 5);
885 cso
->payload
[0] = dw3
;
886 cso
->payload
[1] = dw6
;
887 cso
->payload
[2] = dw7
;
888 cso
->payload
[3] = fs_get_psx_gen8(dev
, fs
);
889 cso
->payload
[4] = fs_get_wm_gen8(dev
, fs
);
893 ilo_gpe_init_fs_cso(const struct ilo_dev_info
*dev
,
894 const struct ilo_shader_state
*fs
,
895 struct ilo_shader_cso
*cso
)
897 if (ilo_dev_gen(dev
) >= ILO_GEN(8))
898 fs_init_cso_gen8(dev
, fs
, cso
);
899 else if (ilo_dev_gen(dev
) >= ILO_GEN(7))
900 fs_init_cso_gen7(dev
, fs
, cso
);
902 fs_init_cso_gen6(dev
, fs
, cso
);
905 struct ilo_zs_surface_info
{
913 enum intel_tiling_mode tiling
;
917 unsigned width
, height
, depth
;
918 unsigned lod
, first_layer
, num_layers
;
922 zs_init_info_null(const struct ilo_dev_info
*dev
,
923 struct ilo_zs_surface_info
*info
)
925 ILO_DEV_ASSERT(dev
, 6, 8);
927 memset(info
, 0, sizeof(*info
));
929 info
->surface_type
= GEN6_SURFTYPE_NULL
;
930 info
->format
= GEN6_ZFORMAT_D32_FLOAT
;
934 info
->num_layers
= 1;
938 zs_init_info(const struct ilo_dev_info
*dev
,
939 const struct ilo_texture
*tex
,
940 enum pipe_format format
, unsigned level
,
941 unsigned first_layer
, unsigned num_layers
,
942 struct ilo_zs_surface_info
*info
)
944 bool separate_stencil
;
946 ILO_DEV_ASSERT(dev
, 6, 8);
948 memset(info
, 0, sizeof(*info
));
950 info
->surface_type
= ilo_gpe_gen6_translate_texture(tex
->base
.target
);
952 if (info
->surface_type
== GEN6_SURFTYPE_CUBE
) {
954 * From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
956 * "For Other Surfaces (Cube Surfaces):
957 * This field (Minimum Array Element) is ignored."
959 * "For Other Surfaces (Cube Surfaces):
960 * This field (Render Target View Extent) is ignored."
962 * As such, we cannot set first_layer and num_layers on cube surfaces.
963 * To work around that, treat it as a 2D surface.
965 info
->surface_type
= GEN6_SURFTYPE_2D
;
968 if (ilo_dev_gen(dev
) >= ILO_GEN(7)) {
969 separate_stencil
= true;
973 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
975 * "This field (Separate Stencil Buffer Enable) must be set to the
976 * same value (enabled or disabled) as Hierarchical Depth Buffer
980 ilo_texture_can_enable_hiz(tex
, level
, first_layer
, num_layers
);
984 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
986 * "If this field (Hierarchical Depth Buffer Enable) is enabled, the
987 * Surface Format of the depth buffer cannot be
988 * D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
989 * requires the separate stencil buffer."
991 * From the Ironlake PRM, volume 2 part 1, page 330:
993 * "If this field (Separate Stencil Buffer Enable) is disabled, the
994 * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
996 * There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT
997 * is indeed used, the depth values output by the fragment shaders will
998 * be different when read back.
1000 * As for GEN7+, separate_stencil is always true.
1003 case PIPE_FORMAT_Z16_UNORM
:
1004 info
->format
= GEN6_ZFORMAT_D16_UNORM
;
1006 case PIPE_FORMAT_Z32_FLOAT
:
1007 info
->format
= GEN6_ZFORMAT_D32_FLOAT
;
1009 case PIPE_FORMAT_Z24X8_UNORM
:
1010 case PIPE_FORMAT_Z24_UNORM_S8_UINT
:
1011 info
->format
= (separate_stencil
) ?
1012 GEN6_ZFORMAT_D24_UNORM_X8_UINT
:
1013 GEN6_ZFORMAT_D24_UNORM_S8_UINT
;
1015 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
:
1016 info
->format
= (separate_stencil
) ?
1017 GEN6_ZFORMAT_D32_FLOAT
:
1018 GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT
;
1020 case PIPE_FORMAT_S8_UINT
:
1021 if (separate_stencil
) {
1022 info
->format
= GEN6_ZFORMAT_D32_FLOAT
;
1027 assert(!"unsupported depth/stencil format");
1028 zs_init_info_null(dev
, info
);
1033 if (format
!= PIPE_FORMAT_S8_UINT
) {
1034 info
->zs
.bo
= tex
->bo
;
1035 info
->zs
.stride
= tex
->layout
.bo_stride
;
1037 assert(tex
->layout
.layer_height
% 4 == 0);
1038 info
->zs
.qpitch
= tex
->layout
.layer_height
/ 4;
1040 info
->zs
.tiling
= tex
->layout
.tiling
;
1041 info
->zs
.offset
= 0;
1044 if (tex
->separate_s8
|| format
== PIPE_FORMAT_S8_UINT
) {
1045 const struct ilo_texture
*s8_tex
=
1046 (tex
->separate_s8
) ? tex
->separate_s8
: tex
;
1048 info
->stencil
.bo
= s8_tex
->bo
;
1051 * From the Sandy Bridge PRM, volume 2 part 1, page 329:
1053 * "The pitch must be set to 2x the value computed based on width,
1054 * as the stencil buffer is stored with two rows interleaved."
1056 * For GEN7, we still dobule the stride because we did not double the
1057 * slice widths when initializing the layout.
1059 info
->stencil
.stride
= s8_tex
->layout
.bo_stride
* 2;
1061 assert(s8_tex
->layout
.layer_height
% 4 == 0);
1062 info
->stencil
.qpitch
= s8_tex
->layout
.layer_height
/ 4;
1064 info
->stencil
.tiling
= s8_tex
->layout
.tiling
;
1066 if (ilo_dev_gen(dev
) == ILO_GEN(6)) {
1069 assert(s8_tex
->layout
.walk
== ILO_LAYOUT_WALK_LOD
);
1071 /* offset to the level */
1072 ilo_layout_get_slice_pos(&s8_tex
->layout
, level
, 0, &x
, &y
);
1073 ilo_layout_pos_to_mem(&s8_tex
->layout
, x
, y
, &x
, &y
);
1074 info
->stencil
.offset
= ilo_layout_mem_to_raw(&s8_tex
->layout
, x
, y
);
1078 if (ilo_texture_can_enable_hiz(tex
, level
, first_layer
, num_layers
)) {
1079 info
->hiz
.bo
= tex
->aux_bo
;
1080 info
->hiz
.stride
= tex
->layout
.aux_stride
;
1082 assert(tex
->layout
.aux_layer_height
% 4 == 0);
1083 info
->hiz
.qpitch
= tex
->layout
.aux_layer_height
/ 4;
1085 info
->hiz
.tiling
= INTEL_TILING_Y
;
1087 /* offset to the level */
1088 if (ilo_dev_gen(dev
) == ILO_GEN(6))
1089 info
->hiz
.offset
= tex
->layout
.aux_offsets
[level
];
1092 info
->width
= tex
->layout
.width0
;
1093 info
->height
= tex
->layout
.height0
;
1094 info
->depth
= (tex
->base
.target
== PIPE_TEXTURE_3D
) ?
1095 tex
->base
.depth0
: num_layers
;
1098 info
->first_layer
= first_layer
;
1099 info
->num_layers
= num_layers
;
1103 ilo_gpe_init_zs_surface(const struct ilo_dev_info
*dev
,
1104 const struct ilo_texture
*tex
,
1105 enum pipe_format format
, unsigned level
,
1106 unsigned first_layer
, unsigned num_layers
,
1107 struct ilo_zs_surface
*zs
)
1109 const int max_2d_size
= (ilo_dev_gen(dev
) >= ILO_GEN(7)) ? 16384 : 8192;
1110 const int max_array_size
= (ilo_dev_gen(dev
) >= ILO_GEN(7)) ? 2048 : 512;
1111 struct ilo_zs_surface_info info
;
1112 uint32_t dw1
, dw2
, dw3
, dw4
, dw5
, dw6
;
1113 int align_w
= 8, align_h
= 4;
1115 ILO_DEV_ASSERT(dev
, 6, 8);
1118 zs_init_info(dev
, tex
, format
, level
, first_layer
, num_layers
, &info
);
1120 switch (tex
->base
.nr_samples
) {
1140 zs_init_info_null(dev
, &info
);
1143 switch (info
.surface_type
) {
1144 case GEN6_SURFTYPE_NULL
:
1146 case GEN6_SURFTYPE_1D
:
1147 assert(info
.width
<= max_2d_size
&& info
.height
== 1 &&
1148 info
.depth
<= max_array_size
);
1149 assert(info
.first_layer
< max_array_size
- 1 &&
1150 info
.num_layers
<= max_array_size
);
1152 case GEN6_SURFTYPE_2D
:
1153 assert(info
.width
<= max_2d_size
&& info
.height
<= max_2d_size
&&
1154 info
.depth
<= max_array_size
);
1155 assert(info
.first_layer
< max_array_size
- 1 &&
1156 info
.num_layers
<= max_array_size
);
1158 case GEN6_SURFTYPE_3D
:
1159 assert(info
.width
<= 2048 && info
.height
<= 2048 && info
.depth
<= 2048);
1160 assert(info
.first_layer
< 2048 && info
.num_layers
<= max_array_size
);
1162 case GEN6_SURFTYPE_CUBE
:
1163 assert(info
.width
<= max_2d_size
&& info
.height
<= max_2d_size
&&
1165 assert(info
.first_layer
== 0 && info
.num_layers
== 1);
1166 assert(info
.width
== info
.height
);
1169 assert(!"unexpected depth surface type");
1173 dw1
= info
.surface_type
<< GEN6_DEPTH_DW1_TYPE__SHIFT
|
1174 info
.format
<< GEN6_DEPTH_DW1_FORMAT__SHIFT
;
1177 /* required for GEN6+ */
1178 assert(info
.zs
.tiling
== INTEL_TILING_Y
);
1179 assert(info
.zs
.stride
> 0 && info
.zs
.stride
< 128 * 1024 &&
1180 info
.zs
.stride
% 128 == 0);
1181 assert(info
.width
<= info
.zs
.stride
);
1183 dw1
|= (info
.zs
.stride
- 1);
1184 dw2
= info
.zs
.offset
;
1189 if (ilo_dev_gen(dev
) >= ILO_GEN(7)) {
1191 dw1
|= GEN7_DEPTH_DW1_DEPTH_WRITE_ENABLE
;
1193 if (info
.stencil
.bo
)
1194 dw1
|= GEN7_DEPTH_DW1_STENCIL_WRITE_ENABLE
;
1197 dw1
|= GEN7_DEPTH_DW1_HIZ_ENABLE
;
1199 dw3
= (info
.height
- 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT
|
1200 (info
.width
- 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT
|
1201 info
.lod
<< GEN7_DEPTH_DW3_LOD__SHIFT
;
1203 zs
->dw_aligned_8x4
=
1204 (align(info
.height
, align_h
) - 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT
|
1205 (align(info
.width
, align_w
) - 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT
|
1206 info
.lod
<< GEN7_DEPTH_DW3_LOD__SHIFT
;
1208 dw4
= (info
.depth
- 1) << GEN7_DEPTH_DW4_DEPTH__SHIFT
|
1209 info
.first_layer
<< GEN7_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT
;
1213 dw6
= (info
.num_layers
- 1) << GEN7_DEPTH_DW6_RT_VIEW_EXTENT__SHIFT
;
1215 if (ilo_dev_gen(dev
) >= ILO_GEN(8))
1216 dw6
|= info
.zs
.qpitch
;
1218 /* always Y-tiled */
1219 dw1
|= GEN6_TILING_Y
<< GEN6_DEPTH_DW1_TILING__SHIFT
;
1222 dw1
|= GEN6_DEPTH_DW1_HIZ_ENABLE
|
1223 GEN6_DEPTH_DW1_SEPARATE_STENCIL
;
1226 dw3
= (info
.height
- 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT
|
1227 (info
.width
- 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT
|
1228 info
.lod
<< GEN6_DEPTH_DW3_LOD__SHIFT
|
1229 GEN6_DEPTH_DW3_MIPLAYOUT_BELOW
;
1231 zs
->dw_aligned_8x4
=
1232 (align(info
.height
, align_h
) - 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT
|
1233 (align(info
.width
, align_w
) - 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT
|
1234 info
.lod
<< GEN6_DEPTH_DW3_LOD__SHIFT
|
1235 GEN6_DEPTH_DW3_MIPLAYOUT_BELOW
;
1237 dw4
= (info
.depth
- 1) << GEN6_DEPTH_DW4_DEPTH__SHIFT
|
1238 info
.first_layer
<< GEN6_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT
|
1239 (info
.num_layers
- 1) << GEN6_DEPTH_DW4_RT_VIEW_EXTENT__SHIFT
;
1246 STATIC_ASSERT(Elements(zs
->payload
) >= 12);
1248 zs
->payload
[0] = dw1
;
1249 zs
->payload
[1] = dw2
;
1250 zs
->payload
[2] = dw3
;
1251 zs
->payload
[3] = dw4
;
1252 zs
->payload
[4] = dw5
;
1253 zs
->payload
[5] = dw6
;
1255 /* do not increment reference count */
1256 zs
->bo
= info
.zs
.bo
;
1258 /* separate stencil */
1259 if (info
.stencil
.bo
) {
1260 assert(info
.stencil
.stride
> 0 && info
.stencil
.stride
< 128 * 1024 &&
1261 info
.stencil
.stride
% 128 == 0);
1263 dw1
= (info
.stencil
.stride
- 1) << GEN6_STENCIL_DW1_PITCH__SHIFT
;
1264 if (ilo_dev_gen(dev
) >= ILO_GEN(7.5))
1265 dw1
|= GEN75_STENCIL_DW1_STENCIL_BUFFER_ENABLE
;
1267 dw2
= info
.stencil
.offset
;
1268 dw4
= info
.stencil
.qpitch
;
1275 zs
->payload
[6] = dw1
;
1276 zs
->payload
[7] = dw2
;
1277 zs
->payload
[8] = dw4
;
1278 /* do not increment reference count */
1279 zs
->separate_s8_bo
= info
.stencil
.bo
;
1283 dw1
= (info
.hiz
.stride
- 1) << GEN6_HIZ_DW1_PITCH__SHIFT
;
1284 dw2
= info
.hiz
.offset
;
1285 dw4
= info
.hiz
.qpitch
;
1292 zs
->payload
[9] = dw1
;
1293 zs
->payload
[10] = dw2
;
1294 zs
->payload
[11] = dw4
;
1295 /* do not increment reference count */
1296 zs
->hiz_bo
= info
.hiz
.bo
;
1300 viewport_get_guardband(const struct ilo_dev_info
*dev
,
1301 int center_x
, int center_y
,
1302 int *min_gbx
, int *max_gbx
,
1303 int *min_gby
, int *max_gby
)
1306 * From the Sandy Bridge PRM, volume 2 part 1, page 234:
1308 * "Per-Device Guardband Extents
1310 * - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
1311 * - Maximum Post-Clamp Delta (X or Y): 16K"
1313 * "In addition, in order to be correctly rendered, objects must have a
1314 * screenspace bounding box not exceeding 8K in the X or Y direction.
1315 * This additional restriction must also be comprehended by software,
1316 * i.e., enforced by use of clipping."
1318 * From the Ivy Bridge PRM, volume 2 part 1, page 248:
1320 * "Per-Device Guardband Extents
1322 * - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
1323 * - Maximum Post-Clamp Delta (X or Y): N/A"
1325 * "In addition, in order to be correctly rendered, objects must have a
1326 * screenspace bounding box not exceeding 8K in the X or Y direction.
1327 * This additional restriction must also be comprehended by software,
1328 * i.e., enforced by use of clipping."
1330 * Combined, the bounding box of any object can not exceed 8K in both
1333 * Below we set the guardband as a squre of length 8K, centered at where
1334 * the viewport is. This makes sure all objects passing the GB test are
1335 * valid to the renderer, and those failing the XY clipping have a
1336 * better chance of passing the GB test.
1338 const int max_extent
= (ilo_dev_gen(dev
) >= ILO_GEN(7)) ? 32768 : 16384;
1339 const int half_len
= 8192 / 2;
1341 /* make sure the guardband is within the valid range */
1342 if (center_x
- half_len
< -max_extent
)
1343 center_x
= -max_extent
+ half_len
;
1344 else if (center_x
+ half_len
> max_extent
- 1)
1345 center_x
= max_extent
- half_len
;
1347 if (center_y
- half_len
< -max_extent
)
1348 center_y
= -max_extent
+ half_len
;
1349 else if (center_y
+ half_len
> max_extent
- 1)
1350 center_y
= max_extent
- half_len
;
1352 *min_gbx
= (float) (center_x
- half_len
);
1353 *max_gbx
= (float) (center_x
+ half_len
);
1354 *min_gby
= (float) (center_y
- half_len
);
1355 *max_gby
= (float) (center_y
+ half_len
);
1359 ilo_gpe_set_viewport_cso(const struct ilo_dev_info
*dev
,
1360 const struct pipe_viewport_state
*state
,
1361 struct ilo_viewport_cso
*vp
)
1363 const float scale_x
= fabs(state
->scale
[0]);
1364 const float scale_y
= fabs(state
->scale
[1]);
1365 const float scale_z
= fabs(state
->scale
[2]);
1366 int min_gbx
, max_gbx
, min_gby
, max_gby
;
1368 ILO_DEV_ASSERT(dev
, 6, 8);
1370 viewport_get_guardband(dev
,
1371 (int) state
->translate
[0],
1372 (int) state
->translate
[1],
1373 &min_gbx
, &max_gbx
, &min_gby
, &max_gby
);
1376 vp
->m00
= state
->scale
[0];
1377 vp
->m11
= state
->scale
[1];
1378 vp
->m22
= state
->scale
[2];
1379 vp
->m30
= state
->translate
[0];
1380 vp
->m31
= state
->translate
[1];
1381 vp
->m32
= state
->translate
[2];
1383 /* guardband in NDC space */
1384 vp
->min_gbx
= ((float) min_gbx
- state
->translate
[0]) / scale_x
;
1385 vp
->max_gbx
= ((float) max_gbx
- state
->translate
[0]) / scale_x
;
1386 vp
->min_gby
= ((float) min_gby
- state
->translate
[1]) / scale_y
;
1387 vp
->max_gby
= ((float) max_gby
- state
->translate
[1]) / scale_y
;
1389 /* viewport in screen space */
1390 vp
->min_x
= scale_x
* -1.0f
+ state
->translate
[0];
1391 vp
->max_x
= scale_x
* 1.0f
+ state
->translate
[0];
1392 vp
->min_y
= scale_y
* -1.0f
+ state
->translate
[1];
1393 vp
->max_y
= scale_y
* 1.0f
+ state
->translate
[1];
1394 vp
->min_z
= scale_z
* -1.0f
+ state
->translate
[2];
1395 vp
->max_z
= scale_z
* 1.0f
+ state
->translate
[2];
1399 * Translate a pipe logicop to the matching hardware logicop.
1402 gen6_translate_pipe_logicop(unsigned logicop
)
1405 case PIPE_LOGICOP_CLEAR
: return GEN6_LOGICOP_CLEAR
;
1406 case PIPE_LOGICOP_NOR
: return GEN6_LOGICOP_NOR
;
1407 case PIPE_LOGICOP_AND_INVERTED
: return GEN6_LOGICOP_AND_INVERTED
;
1408 case PIPE_LOGICOP_COPY_INVERTED
: return GEN6_LOGICOP_COPY_INVERTED
;
1409 case PIPE_LOGICOP_AND_REVERSE
: return GEN6_LOGICOP_AND_REVERSE
;
1410 case PIPE_LOGICOP_INVERT
: return GEN6_LOGICOP_INVERT
;
1411 case PIPE_LOGICOP_XOR
: return GEN6_LOGICOP_XOR
;
1412 case PIPE_LOGICOP_NAND
: return GEN6_LOGICOP_NAND
;
1413 case PIPE_LOGICOP_AND
: return GEN6_LOGICOP_AND
;
1414 case PIPE_LOGICOP_EQUIV
: return GEN6_LOGICOP_EQUIV
;
1415 case PIPE_LOGICOP_NOOP
: return GEN6_LOGICOP_NOOP
;
1416 case PIPE_LOGICOP_OR_INVERTED
: return GEN6_LOGICOP_OR_INVERTED
;
1417 case PIPE_LOGICOP_COPY
: return GEN6_LOGICOP_COPY
;
1418 case PIPE_LOGICOP_OR_REVERSE
: return GEN6_LOGICOP_OR_REVERSE
;
1419 case PIPE_LOGICOP_OR
: return GEN6_LOGICOP_OR
;
1420 case PIPE_LOGICOP_SET
: return GEN6_LOGICOP_SET
;
1422 assert(!"unknown logicop function");
1423 return GEN6_LOGICOP_CLEAR
;
1428 * Translate a pipe blend function to the matching hardware blend function.
1431 gen6_translate_pipe_blend(unsigned blend
)
1434 case PIPE_BLEND_ADD
: return GEN6_BLENDFUNCTION_ADD
;
1435 case PIPE_BLEND_SUBTRACT
: return GEN6_BLENDFUNCTION_SUBTRACT
;
1436 case PIPE_BLEND_REVERSE_SUBTRACT
: return GEN6_BLENDFUNCTION_REVERSE_SUBTRACT
;
1437 case PIPE_BLEND_MIN
: return GEN6_BLENDFUNCTION_MIN
;
1438 case PIPE_BLEND_MAX
: return GEN6_BLENDFUNCTION_MAX
;
1440 assert(!"unknown blend function");
1441 return GEN6_BLENDFUNCTION_ADD
;
1446 * Translate a pipe blend factor to the matching hardware blend factor.
1449 gen6_translate_pipe_blendfactor(unsigned blendfactor
)
1451 switch (blendfactor
) {
1452 case PIPE_BLENDFACTOR_ONE
: return GEN6_BLENDFACTOR_ONE
;
1453 case PIPE_BLENDFACTOR_SRC_COLOR
: return GEN6_BLENDFACTOR_SRC_COLOR
;
1454 case PIPE_BLENDFACTOR_SRC_ALPHA
: return GEN6_BLENDFACTOR_SRC_ALPHA
;
1455 case PIPE_BLENDFACTOR_DST_ALPHA
: return GEN6_BLENDFACTOR_DST_ALPHA
;
1456 case PIPE_BLENDFACTOR_DST_COLOR
: return GEN6_BLENDFACTOR_DST_COLOR
;
1457 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
: return GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE
;
1458 case PIPE_BLENDFACTOR_CONST_COLOR
: return GEN6_BLENDFACTOR_CONST_COLOR
;
1459 case PIPE_BLENDFACTOR_CONST_ALPHA
: return GEN6_BLENDFACTOR_CONST_ALPHA
;
1460 case PIPE_BLENDFACTOR_SRC1_COLOR
: return GEN6_BLENDFACTOR_SRC1_COLOR
;
1461 case PIPE_BLENDFACTOR_SRC1_ALPHA
: return GEN6_BLENDFACTOR_SRC1_ALPHA
;
1462 case PIPE_BLENDFACTOR_ZERO
: return GEN6_BLENDFACTOR_ZERO
;
1463 case PIPE_BLENDFACTOR_INV_SRC_COLOR
: return GEN6_BLENDFACTOR_INV_SRC_COLOR
;
1464 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
: return GEN6_BLENDFACTOR_INV_SRC_ALPHA
;
1465 case PIPE_BLENDFACTOR_INV_DST_ALPHA
: return GEN6_BLENDFACTOR_INV_DST_ALPHA
;
1466 case PIPE_BLENDFACTOR_INV_DST_COLOR
: return GEN6_BLENDFACTOR_INV_DST_COLOR
;
1467 case PIPE_BLENDFACTOR_INV_CONST_COLOR
: return GEN6_BLENDFACTOR_INV_CONST_COLOR
;
1468 case PIPE_BLENDFACTOR_INV_CONST_ALPHA
: return GEN6_BLENDFACTOR_INV_CONST_ALPHA
;
1469 case PIPE_BLENDFACTOR_INV_SRC1_COLOR
: return GEN6_BLENDFACTOR_INV_SRC1_COLOR
;
1470 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA
: return GEN6_BLENDFACTOR_INV_SRC1_ALPHA
;
1472 assert(!"unknown blend factor");
1473 return GEN6_BLENDFACTOR_ONE
;
1478 * Translate a pipe stencil op to the matching hardware stencil op.
1481 gen6_translate_pipe_stencil_op(unsigned stencil_op
)
1483 switch (stencil_op
) {
1484 case PIPE_STENCIL_OP_KEEP
: return GEN6_STENCILOP_KEEP
;
1485 case PIPE_STENCIL_OP_ZERO
: return GEN6_STENCILOP_ZERO
;
1486 case PIPE_STENCIL_OP_REPLACE
: return GEN6_STENCILOP_REPLACE
;
1487 case PIPE_STENCIL_OP_INCR
: return GEN6_STENCILOP_INCRSAT
;
1488 case PIPE_STENCIL_OP_DECR
: return GEN6_STENCILOP_DECRSAT
;
1489 case PIPE_STENCIL_OP_INCR_WRAP
: return GEN6_STENCILOP_INCR
;
1490 case PIPE_STENCIL_OP_DECR_WRAP
: return GEN6_STENCILOP_DECR
;
1491 case PIPE_STENCIL_OP_INVERT
: return GEN6_STENCILOP_INVERT
;
1493 assert(!"unknown stencil op");
1494 return GEN6_STENCILOP_KEEP
;
1499 gen6_blend_factor_dst_alpha_forced_one(int factor
)
1502 case GEN6_BLENDFACTOR_DST_ALPHA
:
1503 return GEN6_BLENDFACTOR_ONE
;
1504 case GEN6_BLENDFACTOR_INV_DST_ALPHA
:
1505 case GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE
:
1506 return GEN6_BLENDFACTOR_ZERO
;
1513 blend_get_rt_blend_enable_gen6(const struct ilo_dev_info
*dev
,
1514 const struct pipe_rt_blend_state
*rt
,
1515 bool dst_alpha_forced_one
)
1517 int rgb_src
, rgb_dst
, a_src
, a_dst
;
1520 ILO_DEV_ASSERT(dev
, 6, 7.5);
1522 if (!rt
->blend_enable
)
1525 rgb_src
= gen6_translate_pipe_blendfactor(rt
->rgb_src_factor
);
1526 rgb_dst
= gen6_translate_pipe_blendfactor(rt
->rgb_dst_factor
);
1527 a_src
= gen6_translate_pipe_blendfactor(rt
->alpha_src_factor
);
1528 a_dst
= gen6_translate_pipe_blendfactor(rt
->alpha_dst_factor
);
1530 if (dst_alpha_forced_one
) {
1531 rgb_src
= gen6_blend_factor_dst_alpha_forced_one(rgb_src
);
1532 rgb_dst
= gen6_blend_factor_dst_alpha_forced_one(rgb_dst
);
1533 a_src
= gen6_blend_factor_dst_alpha_forced_one(a_src
);
1534 a_dst
= gen6_blend_factor_dst_alpha_forced_one(a_dst
);
1537 dw
= GEN6_RT_DW0_BLEND_ENABLE
|
1538 gen6_translate_pipe_blend(rt
->alpha_func
) << 26 |
1541 gen6_translate_pipe_blend(rt
->rgb_func
) << 11 |
1545 if (rt
->rgb_func
!= rt
->alpha_func
||
1546 rgb_src
!= a_src
|| rgb_dst
!= a_dst
)
1547 dw
|= GEN6_RT_DW0_INDEPENDENT_ALPHA_ENABLE
;
1553 blend_get_rt_blend_enable_gen8(const struct ilo_dev_info
*dev
,
1554 const struct pipe_rt_blend_state
*rt
,
1555 bool dst_alpha_forced_one
,
1556 bool *independent_alpha
)
1558 int rgb_src
, rgb_dst
, a_src
, a_dst
;
1561 ILO_DEV_ASSERT(dev
, 8, 8);
1563 if (!rt
->blend_enable
) {
1564 *independent_alpha
= false;
1568 rgb_src
= gen6_translate_pipe_blendfactor(rt
->rgb_src_factor
);
1569 rgb_dst
= gen6_translate_pipe_blendfactor(rt
->rgb_dst_factor
);
1570 a_src
= gen6_translate_pipe_blendfactor(rt
->alpha_src_factor
);
1571 a_dst
= gen6_translate_pipe_blendfactor(rt
->alpha_dst_factor
);
1573 if (dst_alpha_forced_one
) {
1574 rgb_src
= gen6_blend_factor_dst_alpha_forced_one(rgb_src
);
1575 rgb_dst
= gen6_blend_factor_dst_alpha_forced_one(rgb_dst
);
1576 a_src
= gen6_blend_factor_dst_alpha_forced_one(a_src
);
1577 a_dst
= gen6_blend_factor_dst_alpha_forced_one(a_dst
);
1580 dw
= GEN8_RT_DW0_BLEND_ENABLE
|
1583 gen6_translate_pipe_blend(rt
->rgb_func
) << 18 |
1586 gen6_translate_pipe_blend(rt
->alpha_func
) << 5;
1588 *independent_alpha
= (rt
->rgb_func
!= rt
->alpha_func
||
1596 blend_init_cso_gen6(const struct ilo_dev_info
*dev
,
1597 const struct pipe_blend_state
*state
,
1598 struct ilo_blend_state
*blend
,
1601 const struct pipe_rt_blend_state
*rt
= &state
->rt
[index
];
1602 struct ilo_blend_cso
*cso
= &blend
->cso
[index
];
1604 ILO_DEV_ASSERT(dev
, 6, 7.5);
1606 cso
->payload
[0] = 0;
1607 cso
->payload
[1] = GEN6_RT_DW1_COLORCLAMP_RTFORMAT
|
1608 GEN6_RT_DW1_PRE_BLEND_CLAMP
|
1609 GEN6_RT_DW1_POST_BLEND_CLAMP
;
1611 if (!(rt
->colormask
& PIPE_MASK_A
))
1612 cso
->payload
[1] |= GEN6_RT_DW1_WRITE_DISABLE_A
;
1613 if (!(rt
->colormask
& PIPE_MASK_R
))
1614 cso
->payload
[1] |= GEN6_RT_DW1_WRITE_DISABLE_R
;
1615 if (!(rt
->colormask
& PIPE_MASK_G
))
1616 cso
->payload
[1] |= GEN6_RT_DW1_WRITE_DISABLE_G
;
1617 if (!(rt
->colormask
& PIPE_MASK_B
))
1618 cso
->payload
[1] |= GEN6_RT_DW1_WRITE_DISABLE_B
;
1621 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
1623 * "Color Buffer Blending and Logic Ops must not be enabled
1624 * simultaneously, or behavior is UNDEFINED."
1626 * Since state->logicop_enable takes precedence over rt->blend_enable,
1627 * no special care is needed.
1629 if (state
->logicop_enable
) {
1631 cso
->dw_blend_dst_alpha_forced_one
= 0;
1633 cso
->dw_blend
= blend_get_rt_blend_enable_gen6(dev
, rt
, false);
1634 cso
->dw_blend_dst_alpha_forced_one
=
1635 blend_get_rt_blend_enable_gen6(dev
, rt
, true);
1640 blend_init_cso_gen8(const struct ilo_dev_info
*dev
,
1641 const struct pipe_blend_state
*state
,
1642 struct ilo_blend_state
*blend
,
1645 const struct pipe_rt_blend_state
*rt
= &state
->rt
[index
];
1646 struct ilo_blend_cso
*cso
= &blend
->cso
[index
];
1647 bool independent_alpha
= false;
1649 ILO_DEV_ASSERT(dev
, 8, 8);
1651 cso
->payload
[0] = 0;
1652 cso
->payload
[1] = GEN8_RT_DW1_COLORCLAMP_RTFORMAT
|
1653 GEN8_RT_DW1_PRE_BLEND_CLAMP
|
1654 GEN8_RT_DW1_POST_BLEND_CLAMP
;
1656 if (!(rt
->colormask
& PIPE_MASK_A
))
1657 cso
->payload
[0] |= GEN8_RT_DW0_WRITE_DISABLE_A
;
1658 if (!(rt
->colormask
& PIPE_MASK_R
))
1659 cso
->payload
[0] |= GEN8_RT_DW0_WRITE_DISABLE_R
;
1660 if (!(rt
->colormask
& PIPE_MASK_G
))
1661 cso
->payload
[0] |= GEN8_RT_DW0_WRITE_DISABLE_G
;
1662 if (!(rt
->colormask
& PIPE_MASK_B
))
1663 cso
->payload
[0] |= GEN8_RT_DW0_WRITE_DISABLE_B
;
1665 if (state
->logicop_enable
) {
1667 cso
->dw_blend_dst_alpha_forced_one
= 0;
1671 cso
->dw_blend
= blend_get_rt_blend_enable_gen8(dev
, rt
, false, &tmp
[0]);
1672 cso
->dw_blend_dst_alpha_forced_one
=
1673 blend_get_rt_blend_enable_gen8(dev
, rt
, true, &tmp
[1]);
1675 if (tmp
[0] || tmp
[1])
1676 independent_alpha
= true;
1679 return independent_alpha
;
1683 blend_get_logicop_enable_gen6(const struct ilo_dev_info
*dev
,
1684 const struct pipe_blend_state
*state
)
1686 ILO_DEV_ASSERT(dev
, 6, 7.5);
1688 if (!state
->logicop_enable
)
1691 return GEN6_RT_DW1_LOGICOP_ENABLE
|
1692 gen6_translate_pipe_logicop(state
->logicop_func
) << 18;
1696 blend_get_logicop_enable_gen8(const struct ilo_dev_info
*dev
,
1697 const struct pipe_blend_state
*state
)
1699 ILO_DEV_ASSERT(dev
, 8, 8);
1701 if (!state
->logicop_enable
)
1704 return GEN8_RT_DW1_LOGICOP_ENABLE
|
1705 gen6_translate_pipe_logicop(state
->logicop_func
) << 27;
1709 blend_get_alpha_mod_gen6(const struct ilo_dev_info
*dev
,
1710 const struct pipe_blend_state
*state
,
1715 ILO_DEV_ASSERT(dev
, 6, 7.5);
1717 if (state
->alpha_to_coverage
) {
1718 dw
|= GEN6_RT_DW1_ALPHA_TO_COVERAGE
;
1719 if (ilo_dev_gen(dev
) >= ILO_GEN(7))
1720 dw
|= GEN6_RT_DW1_ALPHA_TO_COVERAGE_DITHER
;
1723 * From the Sandy Bridge PRM, volume 2 part 1, page 378:
1725 * "If Dual Source Blending is enabled, this bit (AlphaToOne Enable)
1726 * must be disabled."
1728 if (state
->alpha_to_one
&& !dual_blend
)
1729 dw
|= GEN6_RT_DW1_ALPHA_TO_ONE
;
1735 blend_get_alpha_mod_gen8(const struct ilo_dev_info
*dev
,
1736 const struct pipe_blend_state
*state
,
1741 ILO_DEV_ASSERT(dev
, 8, 8);
1743 if (state
->alpha_to_coverage
) {
1744 dw
|= GEN8_BLEND_DW0_ALPHA_TO_COVERAGE
|
1745 GEN8_BLEND_DW0_ALPHA_TO_COVERAGE_DITHER
;
1748 if (state
->alpha_to_one
&& !dual_blend
)
1749 dw
|= GEN8_BLEND_DW0_ALPHA_TO_ONE
;
1755 blend_get_ps_blend_gen8(const struct ilo_dev_info
*dev
, uint32_t rt_dw0
)
1757 int rgb_src
, rgb_dst
, a_src
, a_dst
;
1760 ILO_DEV_ASSERT(dev
, 8, 8);
1762 if (!(rt_dw0
& GEN8_RT_DW0_BLEND_ENABLE
))
1765 a_src
= GEN_EXTRACT(rt_dw0
, GEN8_RT_DW0_SRC_ALPHA_FACTOR
);
1766 a_dst
= GEN_EXTRACT(rt_dw0
, GEN8_RT_DW0_DST_ALPHA_FACTOR
);
1767 rgb_src
= GEN_EXTRACT(rt_dw0
, GEN8_RT_DW0_SRC_COLOR_FACTOR
);
1768 rgb_dst
= GEN_EXTRACT(rt_dw0
, GEN8_RT_DW0_DST_COLOR_FACTOR
);
1770 dw
= GEN8_PS_BLEND_DW1_BLEND_ENABLE
;
1771 dw
|= GEN_SHIFT32(a_src
, GEN8_PS_BLEND_DW1_SRC_ALPHA_FACTOR
);
1772 dw
|= GEN_SHIFT32(a_dst
, GEN8_PS_BLEND_DW1_DST_ALPHA_FACTOR
);
1773 dw
|= GEN_SHIFT32(rgb_src
, GEN8_PS_BLEND_DW1_SRC_COLOR_FACTOR
);
1774 dw
|= GEN_SHIFT32(rgb_dst
, GEN8_PS_BLEND_DW1_DST_COLOR_FACTOR
);
1776 if (a_src
!= rgb_src
|| a_dst
!= rgb_dst
)
1777 dw
|= GEN8_PS_BLEND_DW1_INDEPENDENT_ALPHA_ENABLE
;
1783 ilo_gpe_init_blend(const struct ilo_dev_info
*dev
,
1784 const struct pipe_blend_state
*state
,
1785 struct ilo_blend_state
*blend
)
1789 ILO_DEV_ASSERT(dev
, 6, 8);
1791 blend
->dual_blend
= (util_blend_state_is_dual(state
, 0) &&
1792 state
->rt
[0].blend_enable
&&
1793 !state
->logicop_enable
);
1794 blend
->alpha_to_coverage
= state
->alpha_to_coverage
;
1796 if (ilo_dev_gen(dev
) >= ILO_GEN(8)) {
1797 bool independent_alpha
;
1799 blend
->dw_alpha_mod
=
1800 blend_get_alpha_mod_gen8(dev
, state
, blend
->dual_blend
);
1801 blend
->dw_logicop
= blend_get_logicop_enable_gen8(dev
, state
);
1802 blend
->dw_shared
= (state
->dither
) ? GEN8_BLEND_DW0_DITHER_ENABLE
: 0;
1804 independent_alpha
= blend_init_cso_gen8(dev
, state
, blend
, 0);
1805 if (independent_alpha
)
1806 blend
->dw_shared
|= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE
;
1808 blend
->dw_ps_blend
= blend_get_ps_blend_gen8(dev
,
1809 blend
->cso
[0].dw_blend
);
1810 blend
->dw_ps_blend_dst_alpha_forced_one
= blend_get_ps_blend_gen8(dev
,
1811 blend
->cso
[0].dw_blend_dst_alpha_forced_one
);
1813 if (state
->independent_blend_enable
) {
1814 for (i
= 1; i
< Elements(blend
->cso
); i
++) {
1815 independent_alpha
= blend_init_cso_gen8(dev
, state
, blend
, i
);
1816 if (independent_alpha
)
1817 blend
->dw_shared
|= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE
;
1820 for (i
= 1; i
< Elements(blend
->cso
); i
++)
1821 blend
->cso
[i
] = blend
->cso
[0];
1824 blend
->dw_alpha_mod
=
1825 blend_get_alpha_mod_gen6(dev
, state
, blend
->dual_blend
);
1826 blend
->dw_logicop
= blend_get_logicop_enable_gen6(dev
, state
);
1827 blend
->dw_shared
= (state
->dither
) ? GEN6_RT_DW1_DITHER_ENABLE
: 0;
1829 blend
->dw_ps_blend
= 0;
1830 blend
->dw_ps_blend_dst_alpha_forced_one
= 0;
1832 blend_init_cso_gen6(dev
, state
, blend
, 0);
1833 if (state
->independent_blend_enable
) {
1834 for (i
= 1; i
< Elements(blend
->cso
); i
++)
1835 blend_init_cso_gen6(dev
, state
, blend
, i
);
1837 for (i
= 1; i
< Elements(blend
->cso
); i
++)
1838 blend
->cso
[i
] = blend
->cso
[0];
1844 * Translate a pipe DSA test function to the matching hardware compare
1848 gen6_translate_dsa_func(unsigned func
)
1851 case PIPE_FUNC_NEVER
: return GEN6_COMPAREFUNCTION_NEVER
;
1852 case PIPE_FUNC_LESS
: return GEN6_COMPAREFUNCTION_LESS
;
1853 case PIPE_FUNC_EQUAL
: return GEN6_COMPAREFUNCTION_EQUAL
;
1854 case PIPE_FUNC_LEQUAL
: return GEN6_COMPAREFUNCTION_LEQUAL
;
1855 case PIPE_FUNC_GREATER
: return GEN6_COMPAREFUNCTION_GREATER
;
1856 case PIPE_FUNC_NOTEQUAL
: return GEN6_COMPAREFUNCTION_NOTEQUAL
;
1857 case PIPE_FUNC_GEQUAL
: return GEN6_COMPAREFUNCTION_GEQUAL
;
1858 case PIPE_FUNC_ALWAYS
: return GEN6_COMPAREFUNCTION_ALWAYS
;
1860 assert(!"unknown depth/stencil/alpha test function");
1861 return GEN6_COMPAREFUNCTION_NEVER
;
1866 dsa_get_stencil_enable_gen6(const struct ilo_dev_info
*dev
,
1867 const struct pipe_stencil_state
*stencil0
,
1868 const struct pipe_stencil_state
*stencil1
)
1872 ILO_DEV_ASSERT(dev
, 6, 7.5);
1874 if (!stencil0
->enabled
)
1878 * From the Sandy Bridge PRM, volume 2 part 1, page 359:
1880 * "If the Depth Buffer is either undefined or does not have a surface
1881 * format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
1882 * stencil buffer is disabled, Stencil Test Enable must be DISABLED"
1884 * From the Sandy Bridge PRM, volume 2 part 1, page 370:
1886 * "This field (Stencil Test Enable) cannot be enabled if
1887 * Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
1889 * TODO We do not check these yet.
1891 dw
= GEN6_ZS_DW0_STENCIL_TEST_ENABLE
|
1892 gen6_translate_dsa_func(stencil0
->func
) << 28 |
1893 gen6_translate_pipe_stencil_op(stencil0
->fail_op
) << 25 |
1894 gen6_translate_pipe_stencil_op(stencil0
->zfail_op
) << 22 |
1895 gen6_translate_pipe_stencil_op(stencil0
->zpass_op
) << 19;
1896 if (stencil0
->writemask
)
1897 dw
|= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE
;
1899 if (stencil1
->enabled
) {
1900 dw
|= GEN6_ZS_DW0_STENCIL1_ENABLE
|
1901 gen6_translate_dsa_func(stencil1
->func
) << 12 |
1902 gen6_translate_pipe_stencil_op(stencil1
->fail_op
) << 9 |
1903 gen6_translate_pipe_stencil_op(stencil1
->zfail_op
) << 6 |
1904 gen6_translate_pipe_stencil_op(stencil1
->zpass_op
) << 3;
1905 if (stencil1
->writemask
)
1906 dw
|= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE
;
1913 dsa_get_stencil_enable_gen8(const struct ilo_dev_info
*dev
,
1914 const struct pipe_stencil_state
*stencil0
,
1915 const struct pipe_stencil_state
*stencil1
)
1919 ILO_DEV_ASSERT(dev
, 8, 8);
1921 if (!stencil0
->enabled
)
1924 dw
= gen6_translate_pipe_stencil_op(stencil0
->fail_op
) << 29 |
1925 gen6_translate_pipe_stencil_op(stencil0
->zfail_op
) << 26 |
1926 gen6_translate_pipe_stencil_op(stencil0
->zpass_op
) << 23 |
1927 gen6_translate_dsa_func(stencil0
->func
) << 8 |
1928 GEN8_ZS_DW1_STENCIL_TEST_ENABLE
;
1929 if (stencil0
->writemask
)
1930 dw
|= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE
;
1932 if (stencil1
->enabled
) {
1933 dw
|= gen6_translate_dsa_func(stencil1
->func
) << 20 |
1934 gen6_translate_pipe_stencil_op(stencil1
->fail_op
) << 17 |
1935 gen6_translate_pipe_stencil_op(stencil1
->zfail_op
) << 14 |
1936 gen6_translate_pipe_stencil_op(stencil1
->zpass_op
) << 11 |
1937 GEN8_ZS_DW1_STENCIL1_ENABLE
;
1938 if (stencil1
->writemask
)
1939 dw
|= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE
;
1946 dsa_get_depth_enable_gen6(const struct ilo_dev_info
*dev
,
1947 const struct pipe_depth_state
*state
)
1951 ILO_DEV_ASSERT(dev
, 6, 7.5);
1954 * From the Sandy Bridge PRM, volume 2 part 1, page 360:
1956 * "Enabling the Depth Test function without defining a Depth Buffer is
1959 * From the Sandy Bridge PRM, volume 2 part 1, page 375:
1961 * "A Depth Buffer must be defined before enabling writes to it, or
1962 * operation is UNDEFINED."
1964 * TODO We do not check these yet.
1966 if (state
->enabled
) {
1967 dw
= GEN6_ZS_DW2_DEPTH_TEST_ENABLE
|
1968 gen6_translate_dsa_func(state
->func
) << 27;
1970 dw
= GEN6_COMPAREFUNCTION_ALWAYS
<< 27;
1973 if (state
->writemask
)
1974 dw
|= GEN6_ZS_DW2_DEPTH_WRITE_ENABLE
;
1980 dsa_get_depth_enable_gen8(const struct ilo_dev_info
*dev
,
1981 const struct pipe_depth_state
*state
)
1985 ILO_DEV_ASSERT(dev
, 8, 8);
1987 if (state
->enabled
) {
1988 dw
= GEN8_ZS_DW1_DEPTH_TEST_ENABLE
|
1989 gen6_translate_dsa_func(state
->func
) << 5;
1991 dw
= GEN6_COMPAREFUNCTION_ALWAYS
<< 5;
1994 if (state
->writemask
)
1995 dw
|= GEN8_ZS_DW1_DEPTH_WRITE_ENABLE
;
2001 dsa_get_alpha_enable_gen6(const struct ilo_dev_info
*dev
,
2002 const struct pipe_alpha_state
*state
)
2006 ILO_DEV_ASSERT(dev
, 6, 8);
2008 if (!state
->enabled
)
2011 /* this will be ORed to BLEND_STATE */
2012 dw
= GEN6_RT_DW1_ALPHA_TEST_ENABLE
|
2013 gen6_translate_dsa_func(state
->func
) << 13;
2019 ilo_gpe_init_dsa(const struct ilo_dev_info
*dev
,
2020 const struct pipe_depth_stencil_alpha_state
*state
,
2021 struct ilo_dsa_state
*dsa
)
2023 ILO_DEV_ASSERT(dev
, 6, 8);
2025 STATIC_ASSERT(Elements(dsa
->payload
) >= 3);
2027 if (ilo_dev_gen(dev
) >= ILO_GEN(8)) {
2028 const uint32_t dw_stencil
= dsa_get_stencil_enable_gen8(dev
,
2029 &state
->stencil
[0], &state
->stencil
[1]);
2030 const uint32_t dw_depth
= dsa_get_depth_enable_gen8(dev
, &state
->depth
);
2032 assert(!(dw_stencil
& dw_depth
));
2033 dsa
->payload
[0] = dw_stencil
| dw_depth
;
2035 dsa
->payload
[0] = dsa_get_stencil_enable_gen6(dev
,
2036 &state
->stencil
[0], &state
->stencil
[1]);
2037 dsa
->payload
[2] = dsa_get_depth_enable_gen6(dev
, &state
->depth
);
2040 dsa
->payload
[1] = state
->stencil
[0].valuemask
<< 24 |
2041 state
->stencil
[0].writemask
<< 16 |
2042 state
->stencil
[1].valuemask
<< 8 |
2043 state
->stencil
[1].writemask
;
2045 dsa
->dw_blend_alpha
= dsa_get_alpha_enable_gen6(dev
, &state
->alpha
);
2046 dsa
->dw_ps_blend_alpha
= (state
->alpha
.enabled
) ?
2047 GEN8_PS_BLEND_DW1_ALPHA_TEST_ENABLE
: 0;
2049 dsa
->alpha_ref
= float_to_ubyte(state
->alpha
.ref_value
);
2053 ilo_gpe_set_scissor(const struct ilo_dev_info
*dev
,
2054 unsigned start_slot
,
2055 unsigned num_states
,
2056 const struct pipe_scissor_state
*states
,
2057 struct ilo_scissor_state
*scissor
)
2061 ILO_DEV_ASSERT(dev
, 6, 8);
2063 for (i
= 0; i
< num_states
; i
++) {
2064 uint16_t min_x
, min_y
, max_x
, max_y
;
2066 /* both max and min are inclusive in SCISSOR_RECT */
2067 if (states
[i
].minx
< states
[i
].maxx
&&
2068 states
[i
].miny
< states
[i
].maxy
) {
2069 min_x
= states
[i
].minx
;
2070 min_y
= states
[i
].miny
;
2071 max_x
= states
[i
].maxx
- 1;
2072 max_y
= states
[i
].maxy
- 1;
2075 /* we have to make min greater than max */
2082 scissor
->payload
[(start_slot
+ i
) * 2 + 0] = min_y
<< 16 | min_x
;
2083 scissor
->payload
[(start_slot
+ i
) * 2 + 1] = max_y
<< 16 | max_x
;
2086 if (!start_slot
&& num_states
)
2087 scissor
->scissor0
= states
[0];
2091 ilo_gpe_set_scissor_null(const struct ilo_dev_info
*dev
,
2092 struct ilo_scissor_state
*scissor
)
2096 for (i
= 0; i
< Elements(scissor
->payload
); i
+= 2) {
2097 scissor
->payload
[i
+ 0] = 1 << 16 | 1;
2098 scissor
->payload
[i
+ 1] = 0;
2103 fb_set_blend_caps(const struct ilo_dev_info
*dev
,
2104 enum pipe_format format
,
2105 struct ilo_fb_blend_caps
*caps
)
2107 const struct util_format_description
*desc
=
2108 util_format_description(format
);
2109 const int ch
= util_format_get_first_non_void_channel(format
);
2111 memset(caps
, 0, sizeof(*caps
));
2113 if (format
== PIPE_FORMAT_NONE
|| desc
->is_mixed
)
2117 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
2119 * "Logic Ops are only supported on *_UNORM surfaces (excluding _SRGB
2120 * variants), otherwise Logic Ops must be DISABLED."
2122 * According to the classic driver, this is lifted on Gen8+.
2124 if (ilo_dev_gen(dev
) >= ILO_GEN(8)) {
2125 caps
->can_logicop
= true;
2127 caps
->can_logicop
= (ch
>= 0 && desc
->channel
[ch
].normalized
&&
2128 desc
->channel
[ch
].type
== UTIL_FORMAT_TYPE_UNSIGNED
&&
2129 desc
->colorspace
== UTIL_FORMAT_COLORSPACE_RGB
);
2132 /* no blending for pure integer formats */
2133 caps
->can_blend
= !util_format_is_pure_integer(format
);
2136 * From the Sandy Bridge PRM, volume 2 part 1, page 382:
2138 * "Alpha Test can only be enabled if Pixel Shader outputs a float
2141 caps
->can_alpha_test
= !util_format_is_pure_integer(format
);
2143 caps
->dst_alpha_forced_one
=
2144 (ilo_translate_render_format(dev
, format
) !=
2145 ilo_translate_color_format(dev
, format
));
2148 if (caps
->dst_alpha_forced_one
) {
2149 enum pipe_format render_format
;
2152 case PIPE_FORMAT_B8G8R8X8_UNORM
:
2153 render_format
= PIPE_FORMAT_B8G8R8A8_UNORM
;
2156 render_format
= PIPE_FORMAT_NONE
;
2160 assert(ilo_translate_render_format(dev
, format
) ==
2161 ilo_translate_color_format(dev
, render_format
));
2166 ilo_gpe_set_fb(const struct ilo_dev_info
*dev
,
2167 const struct pipe_framebuffer_state
*state
,
2168 struct ilo_fb_state
*fb
)
2170 const struct pipe_surface
*first_surf
= NULL
;
2173 ILO_DEV_ASSERT(dev
, 6, 8);
2175 util_copy_framebuffer_state(&fb
->state
, state
);
2177 ilo_gpe_init_view_surface_null(dev
,
2178 (state
->width
) ? state
->width
: 1,
2179 (state
->height
) ? state
->height
: 1,
2180 1, 0, &fb
->null_rt
);
2182 for (i
= 0; i
< state
->nr_cbufs
; i
++) {
2183 if (state
->cbufs
[i
]) {
2184 fb_set_blend_caps(dev
, state
->cbufs
[i
]->format
, &fb
->blend_caps
[i
]);
2187 first_surf
= state
->cbufs
[i
];
2189 fb_set_blend_caps(dev
, PIPE_FORMAT_NONE
, &fb
->blend_caps
[i
]);
2193 if (!first_surf
&& state
->zsbuf
)
2194 first_surf
= state
->zsbuf
;
2196 fb
->num_samples
= (first_surf
) ? first_surf
->texture
->nr_samples
: 1;
2197 if (!fb
->num_samples
)
2198 fb
->num_samples
= 1;
2201 * The PRMs list several restrictions when the framebuffer has more than
2202 * one surface. It seems they are actually lifted on GEN6+.