2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2014 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "genhw/genhw.h"
29 #include "util/u_framebuffer.h"
30 #include "util/u_half.h"
32 #include "ilo_format.h"
33 #include "ilo_image.h"
34 #include "ilo_state_3d.h"
35 #include "../ilo_shader.h"
38 fs_init_cso_gen6(const struct ilo_dev
*dev
,
39 const struct ilo_shader_state
*fs
,
40 union ilo_shader_cso
*cso
)
42 int start_grf
, input_count
, sampler_count
, max_threads
;
43 uint32_t dw2
, dw4
, dw5
, dw6
;
45 ILO_DEV_ASSERT(dev
, 6, 6);
47 start_grf
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_URB_DATA_START_REG
);
48 input_count
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_INPUT_COUNT
);
49 sampler_count
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_SAMPLER_COUNT
);
51 /* see brwCreateContext() */
52 max_threads
= (dev
->gt
== 2) ? 80 : 40;
54 dw2
= (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT
;
55 dw2
|= ((sampler_count
+ 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
;
57 dw4
= start_grf
<< GEN6_WM_DW4_URB_GRF_START0__SHIFT
|
58 0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT
|
59 0 << GEN6_WM_DW4_URB_GRF_START2__SHIFT
;
61 dw5
= (max_threads
- 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT
;
64 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
66 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
67 * PS kernel or color calculator has the ability to kill (discard)
68 * pixels or samples, other than due to depth or stencil testing.
69 * This bit is required to be ENABLED in the following situations:
71 * The API pixel shader program contains "killpix" or "discard"
72 * instructions, or other code in the pixel shader kernel that can
73 * cause the final pixel mask to differ from the pixel mask received
76 * A sampler with chroma key enabled with kill pixel mode is used by
79 * Any render target has Alpha Test Enable or AlphaToCoverage Enable
82 * The pixel shader kernel generates and outputs oMask.
84 * Note: As ClipDistance clipping is fully supported in hardware and
85 * therefore not via PS instructions, there should be no need to
86 * ENABLE this bit due to ClipDistance clipping."
88 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_USE_KILL
))
89 dw5
|= GEN6_WM_DW5_PS_KILL_PIXEL
;
92 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
94 * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
95 * field must be set to disabled."
97 * TODO This is not checked yet.
99 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_OUTPUT_Z
))
100 dw5
|= GEN6_WM_DW5_PS_COMPUTE_DEPTH
;
102 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_INPUT_Z
))
103 dw5
|= GEN6_WM_DW5_PS_USE_DEPTH
;
105 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_INPUT_W
))
106 dw5
|= GEN6_WM_DW5_PS_USE_W
;
109 * TODO set this bit only when
111 * a) fs writes colors and color is not masked, or
112 * b) fs writes depth, or
116 dw5
|= GEN6_WM_DW5_PS_DISPATCH_ENABLE
;
118 assert(!ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_DISPATCH_16_OFFSET
));
119 dw5
|= GEN6_PS_DISPATCH_8
<< GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT
;
121 dw6
= input_count
<< GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT
|
122 GEN6_POSOFFSET_NONE
<< GEN6_WM_DW6_PS_POSOFFSET__SHIFT
;
124 STATIC_ASSERT(Elements(cso
->ps_payload
) >= 4);
125 cso
->ps_payload
[0] = dw2
;
126 cso
->ps_payload
[1] = dw4
;
127 cso
->ps_payload
[2] = dw5
;
128 cso
->ps_payload
[3] = dw6
;
132 fs_get_wm_gen7(const struct ilo_dev
*dev
,
133 const struct ilo_shader_state
*fs
)
137 ILO_DEV_ASSERT(dev
, 7, 7.5);
142 * TODO set this bit only when
144 * a) fs writes colors and color is not masked, or
145 * b) fs writes depth, or
148 dw
|= GEN7_WM_DW1_PS_DISPATCH_ENABLE
;
151 * From the Ivy Bridge PRM, volume 2 part 1, page 278:
153 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that
154 * the PS kernel or color calculator has the ability to kill
155 * (discard) pixels or samples, other than due to depth or stencil
156 * testing. This bit is required to be ENABLED in the following
159 * - The API pixel shader program contains "killpix" or "discard"
160 * instructions, or other code in the pixel shader kernel that
161 * can cause the final pixel mask to differ from the pixel mask
162 * received on dispatch.
164 * - A sampler with chroma key enabled with kill pixel mode is used
165 * by the pixel shader.
167 * - Any render target has Alpha Test Enable or AlphaToCoverage
170 * - The pixel shader kernel generates and outputs oMask.
172 * Note: As ClipDistance clipping is fully supported in hardware
173 * and therefore not via PS instructions, there should be no need
174 * to ENABLE this bit due to ClipDistance clipping."
176 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_USE_KILL
))
177 dw
|= GEN7_WM_DW1_PS_KILL_PIXEL
;
179 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_OUTPUT_Z
))
180 dw
|= GEN7_PSCDEPTH_ON
<< GEN7_WM_DW1_PSCDEPTH__SHIFT
;
182 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_INPUT_Z
))
183 dw
|= GEN7_WM_DW1_PS_USE_DEPTH
;
185 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_INPUT_W
))
186 dw
|= GEN7_WM_DW1_PS_USE_W
;
192 fs_init_cso_gen7(const struct ilo_dev
*dev
,
193 const struct ilo_shader_state
*fs
,
194 union ilo_shader_cso
*cso
)
196 int start_grf
, sampler_count
, max_threads
;
197 uint32_t dw2
, dw4
, dw5
;
199 ILO_DEV_ASSERT(dev
, 7, 7.5);
201 start_grf
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_URB_DATA_START_REG
);
202 sampler_count
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_SAMPLER_COUNT
);
204 dw2
= (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT
;
205 dw2
|= ((sampler_count
+ 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
;
207 dw4
= GEN6_POSOFFSET_NONE
<< GEN7_PS_DW4_POSOFFSET__SHIFT
;
209 /* see brwCreateContext() */
210 switch (ilo_dev_gen(dev
)) {
212 max_threads
= (dev
->gt
== 3) ? 408 : (dev
->gt
== 2) ? 204 : 102;
213 dw4
|= (max_threads
- 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT
;
214 dw4
|= 1 << GEN75_PS_DW4_SAMPLE_MASK__SHIFT
;
218 max_threads
= (dev
->gt
== 2) ? 172 : 48;
219 dw4
|= (max_threads
- 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT
;
223 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_PCB_CBUF0_SIZE
))
224 dw4
|= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE
;
226 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_INPUT_COUNT
))
227 dw4
|= GEN7_PS_DW4_ATTR_ENABLE
;
229 assert(!ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_DISPATCH_16_OFFSET
));
230 dw4
|= GEN6_PS_DISPATCH_8
<< GEN7_PS_DW4_DISPATCH_MODE__SHIFT
;
232 dw5
= start_grf
<< GEN7_PS_DW5_URB_GRF_START0__SHIFT
|
233 0 << GEN7_PS_DW5_URB_GRF_START1__SHIFT
|
234 0 << GEN7_PS_DW5_URB_GRF_START2__SHIFT
;
236 STATIC_ASSERT(Elements(cso
->ps_payload
) >= 4);
237 cso
->ps_payload
[0] = dw2
;
238 cso
->ps_payload
[1] = dw4
;
239 cso
->ps_payload
[2] = dw5
;
240 cso
->ps_payload
[3] = fs_get_wm_gen7(dev
, fs
);
244 fs_get_psx_gen8(const struct ilo_dev
*dev
,
245 const struct ilo_shader_state
*fs
)
249 ILO_DEV_ASSERT(dev
, 8, 8);
251 dw
= GEN8_PSX_DW1_VALID
;
253 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_USE_KILL
))
254 dw
|= GEN8_PSX_DW1_KILL_PIXEL
;
255 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_OUTPUT_Z
))
256 dw
|= GEN7_PSCDEPTH_ON
<< GEN8_PSX_DW1_PSCDEPTH__SHIFT
;
257 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_INPUT_Z
))
258 dw
|= GEN8_PSX_DW1_USE_DEPTH
;
259 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_INPUT_W
))
260 dw
|= GEN8_PSX_DW1_USE_W
;
261 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_INPUT_COUNT
))
262 dw
|= GEN8_PSX_DW1_ATTR_ENABLE
;
268 fs_init_cso_gen8(const struct ilo_dev
*dev
,
269 const struct ilo_shader_state
*fs
,
270 union ilo_shader_cso
*cso
)
272 int start_grf
, sampler_count
;
273 uint32_t dw3
, dw6
, dw7
;
275 ILO_DEV_ASSERT(dev
, 8, 8);
277 start_grf
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_URB_DATA_START_REG
);
278 sampler_count
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_SAMPLER_COUNT
);
280 dw3
= (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT
;
281 dw3
|= ((sampler_count
+ 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
;
284 dw6
= (64 - 2) << GEN8_PS_DW6_MAX_THREADS__SHIFT
|
285 GEN6_POSOFFSET_NONE
<< GEN8_PS_DW6_POSOFFSET__SHIFT
;
286 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_PCB_CBUF0_SIZE
))
287 dw6
|= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE
;
289 assert(!ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_DISPATCH_16_OFFSET
));
290 dw6
|= GEN6_PS_DISPATCH_8
<< GEN8_PS_DW6_DISPATCH_MODE__SHIFT
;
292 dw7
= start_grf
<< GEN8_PS_DW7_URB_GRF_START0__SHIFT
|
293 0 << GEN8_PS_DW7_URB_GRF_START1__SHIFT
|
294 0 << GEN8_PS_DW7_URB_GRF_START2__SHIFT
;
296 STATIC_ASSERT(Elements(cso
->ps_payload
) >= 4);
297 cso
->ps_payload
[0] = dw3
;
298 cso
->ps_payload
[1] = dw6
;
299 cso
->ps_payload
[2] = dw7
;
300 cso
->ps_payload
[3] = fs_get_psx_gen8(dev
, fs
);
304 ilo_gpe_init_fs_cso(const struct ilo_dev
*dev
,
305 const struct ilo_shader_state
*fs
,
306 union ilo_shader_cso
*cso
)
308 if (ilo_dev_gen(dev
) >= ILO_GEN(8))
309 fs_init_cso_gen8(dev
, fs
, cso
);
310 else if (ilo_dev_gen(dev
) >= ILO_GEN(7))
311 fs_init_cso_gen7(dev
, fs
, cso
);
313 fs_init_cso_gen6(dev
, fs
, cso
);
317 fb_set_blend_caps(const struct ilo_dev
*dev
,
318 enum pipe_format format
,
319 struct ilo_fb_blend_caps
*caps
)
321 const struct util_format_description
*desc
=
322 util_format_description(format
);
323 const int ch
= util_format_get_first_non_void_channel(format
);
325 memset(caps
, 0, sizeof(*caps
));
327 if (format
== PIPE_FORMAT_NONE
|| desc
->is_mixed
)
330 caps
->is_unorm
= (ch
>= 0 && desc
->channel
[ch
].normalized
&&
331 desc
->channel
[ch
].type
== UTIL_FORMAT_TYPE_UNSIGNED
&&
332 desc
->colorspace
== UTIL_FORMAT_COLORSPACE_RGB
);
333 caps
->is_integer
= util_format_is_pure_integer(format
);
336 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
338 * "Logic Ops are only supported on *_UNORM surfaces (excluding _SRGB
339 * variants), otherwise Logic Ops must be DISABLED."
341 * According to the classic driver, this is lifted on Gen8+.
343 caps
->can_logicop
= (ilo_dev_gen(dev
) >= ILO_GEN(8) || caps
->is_unorm
);
345 /* no blending for pure integer formats */
346 caps
->can_blend
= !caps
->is_integer
;
349 * From the Sandy Bridge PRM, volume 2 part 1, page 382:
351 * "Alpha Test can only be enabled if Pixel Shader outputs a float
354 caps
->can_alpha_test
= !caps
->is_integer
;
356 caps
->force_dst_alpha_one
=
357 (ilo_format_translate_render(dev
, format
) !=
358 ilo_format_translate_color(dev
, format
));
361 if (caps
->force_dst_alpha_one
) {
362 enum pipe_format render_format
;
365 case PIPE_FORMAT_B8G8R8X8_UNORM
:
366 render_format
= PIPE_FORMAT_B8G8R8A8_UNORM
;
369 render_format
= PIPE_FORMAT_NONE
;
373 assert(ilo_format_translate_render(dev
, format
) ==
374 ilo_format_translate_color(dev
, render_format
));
379 ilo_gpe_set_fb(const struct ilo_dev
*dev
,
380 const struct pipe_framebuffer_state
*state
,
381 struct ilo_fb_state
*fb
)
383 const struct pipe_surface
*first_surf
= NULL
;
386 ILO_DEV_ASSERT(dev
, 6, 8);
388 util_copy_framebuffer_state(&fb
->state
, state
);
390 fb
->has_integer_rt
= false;
391 for (i
= 0; i
< state
->nr_cbufs
; i
++) {
392 if (state
->cbufs
[i
]) {
393 fb_set_blend_caps(dev
, state
->cbufs
[i
]->format
, &fb
->blend_caps
[i
]);
395 fb
->has_integer_rt
|= fb
->blend_caps
[i
].is_integer
;
398 first_surf
= state
->cbufs
[i
];
400 fb_set_blend_caps(dev
, PIPE_FORMAT_NONE
, &fb
->blend_caps
[i
]);
404 if (!first_surf
&& state
->zsbuf
)
405 first_surf
= state
->zsbuf
;
407 fb
->num_samples
= (first_surf
) ? first_surf
->texture
->nr_samples
: 1;
408 if (!fb
->num_samples
)
412 const struct ilo_surface_cso
*cso
=
413 (const struct ilo_surface_cso
*) state
->zsbuf
;
415 fb
->has_hiz
= cso
->u
.zs
.hiz_bo
;
416 fb
->depth_offset_format
=
417 ilo_state_zs_get_depth_format(&cso
->u
.zs
, dev
);
420 fb
->depth_offset_format
= GEN6_ZFORMAT_D32_FLOAT
;
424 * The PRMs list several restrictions when the framebuffer has more than
425 * one surface. It seems they are actually lifted on GEN6+.