ilo: add ilo_state_{vs,hs,ds,gs} to ilo_shader_cso
[mesa.git] / src / gallium / drivers / ilo / core / ilo_state_3d_bottom.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2014 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "genhw/genhw.h"
29 #include "util/u_framebuffer.h"
30 #include "util/u_half.h"
31
32 #include "ilo_format.h"
33 #include "ilo_image.h"
34 #include "ilo_state_3d.h"
35 #include "../ilo_shader.h"
36
37 static void
38 fs_init_cso_gen6(const struct ilo_dev *dev,
39 const struct ilo_shader_state *fs,
40 union ilo_shader_cso *cso)
41 {
42 int start_grf, input_count, sampler_count, max_threads;
43 uint32_t dw2, dw4, dw5, dw6;
44
45 ILO_DEV_ASSERT(dev, 6, 6);
46
47 start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
48 input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
49 sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
50
51 /* see brwCreateContext() */
52 max_threads = (dev->gt == 2) ? 80 : 40;
53
54 dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
55 dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
56
57 dw4 = start_grf << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
58 0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
59 0 << GEN6_WM_DW4_URB_GRF_START2__SHIFT;
60
61 dw5 = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
62
63 /*
64 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
65 *
66 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
67 * PS kernel or color calculator has the ability to kill (discard)
68 * pixels or samples, other than due to depth or stencil testing.
69 * This bit is required to be ENABLED in the following situations:
70 *
71 * The API pixel shader program contains "killpix" or "discard"
72 * instructions, or other code in the pixel shader kernel that can
73 * cause the final pixel mask to differ from the pixel mask received
74 * on dispatch.
75 *
76 * A sampler with chroma key enabled with kill pixel mode is used by
77 * the pixel shader.
78 *
79 * Any render target has Alpha Test Enable or AlphaToCoverage Enable
80 * enabled.
81 *
82 * The pixel shader kernel generates and outputs oMask.
83 *
84 * Note: As ClipDistance clipping is fully supported in hardware and
85 * therefore not via PS instructions, there should be no need to
86 * ENABLE this bit due to ClipDistance clipping."
87 */
88 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
89 dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL;
90
91 /*
92 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
93 *
94 * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
95 * field must be set to disabled."
96 *
97 * TODO This is not checked yet.
98 */
99 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
100 dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH;
101
102 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
103 dw5 |= GEN6_WM_DW5_PS_USE_DEPTH;
104
105 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
106 dw5 |= GEN6_WM_DW5_PS_USE_W;
107
108 /*
109 * TODO set this bit only when
110 *
111 * a) fs writes colors and color is not masked, or
112 * b) fs writes depth, or
113 * c) fs or cc kills
114 */
115 if (true)
116 dw5 |= GEN6_WM_DW5_PS_DISPATCH_ENABLE;
117
118 assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
119 dw5 |= GEN6_PS_DISPATCH_8 << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT;
120
121 dw6 = input_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT |
122 GEN6_POSOFFSET_NONE << GEN6_WM_DW6_PS_POSOFFSET__SHIFT;
123
124 STATIC_ASSERT(Elements(cso->ps_payload) >= 4);
125 cso->ps_payload[0] = dw2;
126 cso->ps_payload[1] = dw4;
127 cso->ps_payload[2] = dw5;
128 cso->ps_payload[3] = dw6;
129 }
130
131 static uint32_t
132 fs_get_wm_gen7(const struct ilo_dev *dev,
133 const struct ilo_shader_state *fs)
134 {
135 uint32_t dw;
136
137 ILO_DEV_ASSERT(dev, 7, 7.5);
138
139 dw = 0;
140
141 /*
142 * TODO set this bit only when
143 *
144 * a) fs writes colors and color is not masked, or
145 * b) fs writes depth, or
146 * c) fs or cc kills
147 */
148 dw |= GEN7_WM_DW1_PS_DISPATCH_ENABLE;
149
150 /*
151 * From the Ivy Bridge PRM, volume 2 part 1, page 278:
152 *
153 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that
154 * the PS kernel or color calculator has the ability to kill
155 * (discard) pixels or samples, other than due to depth or stencil
156 * testing. This bit is required to be ENABLED in the following
157 * situations:
158 *
159 * - The API pixel shader program contains "killpix" or "discard"
160 * instructions, or other code in the pixel shader kernel that
161 * can cause the final pixel mask to differ from the pixel mask
162 * received on dispatch.
163 *
164 * - A sampler with chroma key enabled with kill pixel mode is used
165 * by the pixel shader.
166 *
167 * - Any render target has Alpha Test Enable or AlphaToCoverage
168 * Enable enabled.
169 *
170 * - The pixel shader kernel generates and outputs oMask.
171 *
172 * Note: As ClipDistance clipping is fully supported in hardware
173 * and therefore not via PS instructions, there should be no need
174 * to ENABLE this bit due to ClipDistance clipping."
175 */
176 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
177 dw |= GEN7_WM_DW1_PS_KILL_PIXEL;
178
179 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
180 dw |= GEN7_PSCDEPTH_ON << GEN7_WM_DW1_PSCDEPTH__SHIFT;
181
182 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
183 dw |= GEN7_WM_DW1_PS_USE_DEPTH;
184
185 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
186 dw |= GEN7_WM_DW1_PS_USE_W;
187
188 return dw;
189 }
190
191 static void
192 fs_init_cso_gen7(const struct ilo_dev *dev,
193 const struct ilo_shader_state *fs,
194 union ilo_shader_cso *cso)
195 {
196 int start_grf, sampler_count, max_threads;
197 uint32_t dw2, dw4, dw5;
198
199 ILO_DEV_ASSERT(dev, 7, 7.5);
200
201 start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
202 sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
203
204 dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
205 dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
206
207 dw4 = GEN6_POSOFFSET_NONE << GEN7_PS_DW4_POSOFFSET__SHIFT;
208
209 /* see brwCreateContext() */
210 switch (ilo_dev_gen(dev)) {
211 case ILO_GEN(7.5):
212 max_threads = (dev->gt == 3) ? 408 : (dev->gt == 2) ? 204 : 102;
213 dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT;
214 dw4 |= 1 << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
215 break;
216 case ILO_GEN(7):
217 default:
218 max_threads = (dev->gt == 2) ? 172 : 48;
219 dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
220 break;
221 }
222
223 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE))
224 dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE;
225
226 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT))
227 dw4 |= GEN7_PS_DW4_ATTR_ENABLE;
228
229 assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
230 dw4 |= GEN6_PS_DISPATCH_8 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
231
232 dw5 = start_grf << GEN7_PS_DW5_URB_GRF_START0__SHIFT |
233 0 << GEN7_PS_DW5_URB_GRF_START1__SHIFT |
234 0 << GEN7_PS_DW5_URB_GRF_START2__SHIFT;
235
236 STATIC_ASSERT(Elements(cso->ps_payload) >= 4);
237 cso->ps_payload[0] = dw2;
238 cso->ps_payload[1] = dw4;
239 cso->ps_payload[2] = dw5;
240 cso->ps_payload[3] = fs_get_wm_gen7(dev, fs);
241 }
242
243 static uint32_t
244 fs_get_psx_gen8(const struct ilo_dev *dev,
245 const struct ilo_shader_state *fs)
246 {
247 uint32_t dw;
248
249 ILO_DEV_ASSERT(dev, 8, 8);
250
251 dw = GEN8_PSX_DW1_VALID;
252
253 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
254 dw |= GEN8_PSX_DW1_KILL_PIXEL;
255 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
256 dw |= GEN7_PSCDEPTH_ON << GEN8_PSX_DW1_PSCDEPTH__SHIFT;
257 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
258 dw |= GEN8_PSX_DW1_USE_DEPTH;
259 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
260 dw |= GEN8_PSX_DW1_USE_W;
261 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT))
262 dw |= GEN8_PSX_DW1_ATTR_ENABLE;
263
264 return dw;
265 }
266
267 static void
268 fs_init_cso_gen8(const struct ilo_dev *dev,
269 const struct ilo_shader_state *fs,
270 union ilo_shader_cso *cso)
271 {
272 int start_grf, sampler_count;
273 uint32_t dw3, dw6, dw7;
274
275 ILO_DEV_ASSERT(dev, 8, 8);
276
277 start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
278 sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
279
280 dw3 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
281 dw3 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
282
283 /* always 64? */
284 dw6 = (64 - 2) << GEN8_PS_DW6_MAX_THREADS__SHIFT |
285 GEN6_POSOFFSET_NONE << GEN8_PS_DW6_POSOFFSET__SHIFT;
286 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE))
287 dw6 |= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE;
288
289 assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
290 dw6 |= GEN6_PS_DISPATCH_8 << GEN8_PS_DW6_DISPATCH_MODE__SHIFT;
291
292 dw7 = start_grf << GEN8_PS_DW7_URB_GRF_START0__SHIFT |
293 0 << GEN8_PS_DW7_URB_GRF_START1__SHIFT |
294 0 << GEN8_PS_DW7_URB_GRF_START2__SHIFT;
295
296 STATIC_ASSERT(Elements(cso->ps_payload) >= 4);
297 cso->ps_payload[0] = dw3;
298 cso->ps_payload[1] = dw6;
299 cso->ps_payload[2] = dw7;
300 cso->ps_payload[3] = fs_get_psx_gen8(dev, fs);
301 }
302
303 void
304 ilo_gpe_init_fs_cso(const struct ilo_dev *dev,
305 const struct ilo_shader_state *fs,
306 union ilo_shader_cso *cso)
307 {
308 if (ilo_dev_gen(dev) >= ILO_GEN(8))
309 fs_init_cso_gen8(dev, fs, cso);
310 else if (ilo_dev_gen(dev) >= ILO_GEN(7))
311 fs_init_cso_gen7(dev, fs, cso);
312 else
313 fs_init_cso_gen6(dev, fs, cso);
314 }
315
316 static void
317 fb_set_blend_caps(const struct ilo_dev *dev,
318 enum pipe_format format,
319 struct ilo_fb_blend_caps *caps)
320 {
321 const struct util_format_description *desc =
322 util_format_description(format);
323 const int ch = util_format_get_first_non_void_channel(format);
324
325 memset(caps, 0, sizeof(*caps));
326
327 if (format == PIPE_FORMAT_NONE || desc->is_mixed)
328 return;
329
330 caps->is_unorm = (ch >= 0 && desc->channel[ch].normalized &&
331 desc->channel[ch].type == UTIL_FORMAT_TYPE_UNSIGNED &&
332 desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB);
333 caps->is_integer = util_format_is_pure_integer(format);
334
335 /*
336 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
337 *
338 * "Logic Ops are only supported on *_UNORM surfaces (excluding _SRGB
339 * variants), otherwise Logic Ops must be DISABLED."
340 *
341 * According to the classic driver, this is lifted on Gen8+.
342 */
343 caps->can_logicop = (ilo_dev_gen(dev) >= ILO_GEN(8) || caps->is_unorm);
344
345 /* no blending for pure integer formats */
346 caps->can_blend = !caps->is_integer;
347
348 /*
349 * From the Sandy Bridge PRM, volume 2 part 1, page 382:
350 *
351 * "Alpha Test can only be enabled if Pixel Shader outputs a float
352 * alpha value."
353 */
354 caps->can_alpha_test = !caps->is_integer;
355
356 caps->force_dst_alpha_one =
357 (ilo_format_translate_render(dev, format) !=
358 ilo_format_translate_color(dev, format));
359
360 /* sanity check */
361 if (caps->force_dst_alpha_one) {
362 enum pipe_format render_format;
363
364 switch (format) {
365 case PIPE_FORMAT_B8G8R8X8_UNORM:
366 render_format = PIPE_FORMAT_B8G8R8A8_UNORM;
367 break;
368 default:
369 render_format = PIPE_FORMAT_NONE;
370 break;
371 }
372
373 assert(ilo_format_translate_render(dev, format) ==
374 ilo_format_translate_color(dev, render_format));
375 }
376 }
377
378 void
379 ilo_gpe_set_fb(const struct ilo_dev *dev,
380 const struct pipe_framebuffer_state *state,
381 struct ilo_fb_state *fb)
382 {
383 const struct pipe_surface *first_surf = NULL;
384 int i;
385
386 ILO_DEV_ASSERT(dev, 6, 8);
387
388 util_copy_framebuffer_state(&fb->state, state);
389
390 fb->has_integer_rt = false;
391 for (i = 0; i < state->nr_cbufs; i++) {
392 if (state->cbufs[i]) {
393 fb_set_blend_caps(dev, state->cbufs[i]->format, &fb->blend_caps[i]);
394
395 fb->has_integer_rt |= fb->blend_caps[i].is_integer;
396
397 if (!first_surf)
398 first_surf = state->cbufs[i];
399 } else {
400 fb_set_blend_caps(dev, PIPE_FORMAT_NONE, &fb->blend_caps[i]);
401 }
402 }
403
404 if (!first_surf && state->zsbuf)
405 first_surf = state->zsbuf;
406
407 fb->num_samples = (first_surf) ? first_surf->texture->nr_samples : 1;
408 if (!fb->num_samples)
409 fb->num_samples = 1;
410
411 if (state->zsbuf) {
412 const struct ilo_surface_cso *cso =
413 (const struct ilo_surface_cso *) state->zsbuf;
414
415 fb->has_hiz = cso->u.zs.hiz_bo;
416 fb->depth_offset_format =
417 ilo_state_zs_get_depth_format(&cso->u.zs, dev);
418 } else {
419 fb->has_hiz = false;
420 fb->depth_offset_format = GEN6_ZFORMAT_D32_FLOAT;
421 }
422
423 /*
424 * The PRMs list several restrictions when the framebuffer has more than
425 * one surface. It seems they are actually lifted on GEN6+.
426 */
427 }