Merge branch 'wip/nir-vtn' into vulkan
[mesa.git] / src / gallium / drivers / ilo / core / ilo_state_shader_ps.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2015 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "ilo_debug.h"
29 #include "ilo_state_shader.h"
30
31 struct pixel_ff {
32 uint8_t dispatch_modes;
33
34 uint32_t kernel_offsets[3];
35 uint8_t grf_starts[3];
36 bool pcb_enable;
37 uint8_t scratch_space;
38
39 uint8_t sampler_count;
40 uint8_t surface_count;
41 bool has_uav;
42
43 uint16_t thread_count;
44
45 struct ilo_state_ps_dispatch_conds conds;
46
47 bool kill_pixel;
48 bool dispatch_enable;
49 bool dual_source_blending;
50 uint32_t sample_mask;
51 };
52
53 static bool
54 ps_kernel_validate_gen6(const struct ilo_dev *dev,
55 const struct ilo_state_shader_kernel_info *kernel)
56 {
57 /* "Dispatch GRF Start Register for Constant/Setup Data" is U7 */
58 const uint8_t max_grf_start = 128;
59 /*
60 * From the Sandy Bridge PRM, volume 2 part 1, page 271:
61 *
62 * "(Per-Thread Scratch Space)
63 * Range [0,11] indicating [1k bytes, 2M bytes] in powers of two"
64 */
65 const uint32_t max_scratch_size = 2 * 1024 * 1024;
66
67 ILO_DEV_ASSERT(dev, 6, 8);
68
69 /* "Kernel Start Pointer" is 64-byte aligned */
70 assert(kernel->offset % 64 == 0);
71
72 assert(kernel->grf_start < max_grf_start);
73 assert(kernel->scratch_size <= max_scratch_size);
74
75 return true;
76 }
77
78 static bool
79 ps_validate_gen6(const struct ilo_dev *dev,
80 const struct ilo_state_ps_info *info)
81 {
82 const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8;
83 const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16;
84 const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32;
85 const struct ilo_state_ps_io_info *io = &info->io;
86
87 ILO_DEV_ASSERT(dev, 6, 8);
88
89 if (!ps_kernel_validate_gen6(dev, kernel_8) ||
90 !ps_kernel_validate_gen6(dev, kernel_16) ||
91 !ps_kernel_validate_gen6(dev, kernel_32))
92 return false;
93
94 /* unsupported on Gen6 */
95 if (ilo_dev_gen(dev) == ILO_GEN(6))
96 assert(!io->use_coverage_mask);
97
98 /*
99 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
100 *
101 * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
102 * field must be set to disabled."
103 */
104 if (ilo_dev_gen(dev) == ILO_GEN(6) && io->pscdepth != GEN7_PSCDEPTH_OFF)
105 assert(info->cv_has_depth_buffer);
106
107 if (!info->per_sample_dispatch) {
108 /*
109 * From the Sandy Bridge PRM, volume 2 part 1, page 281:
110 *
111 * "MSDISPMODE_PERSAMPLE is required in order to select
112 * POSOFFSET_SAMPLE."
113 */
114 assert(io->posoffset != GEN6_POSOFFSET_SAMPLE);
115
116 /*
117 * From the Sandy Bridge PRM, volume 2 part 1, page 282:
118 *
119 * "MSDISPMODE_PERSAMPLE is required in order to select
120 * INTERP_SAMPLE."
121 *
122 * From the Sandy Bridge PRM, volume 2 part 1, page 283:
123 *
124 * "MSDISPMODE_PERSAMPLE is required in order to select Perspective
125 * Sample or Non-perspective Sample barycentric coordinates."
126 */
127 assert(!info->cv_per_sample_interp);
128 }
129
130 /*
131 *
132 * From the Sandy Bridge PRM, volume 2 part 1, page 314:
133 *
134 * "Pixel Shader Dispatch, Alpha... must all be disabled."
135 *
136 * Simply disallow any valid kernel when there is early-z op. Also, when
137 * there is no valid kernel, io should be zeroed.
138 */
139 if (info->valid_kernels)
140 assert(!info->cv_has_earlyz_op);
141 else
142 assert(ilo_is_zeroed(io, sizeof(*io)));
143
144 return true;
145 }
146
147 static uint8_t
148 ps_get_gen6_dispatch_modes(const struct ilo_dev *dev,
149 const struct ilo_state_ps_info *info)
150 {
151 const struct ilo_state_ps_io_info *io = &info->io;
152 uint8_t dispatch_modes = info->valid_kernels;
153
154 ILO_DEV_ASSERT(dev, 6, 8);
155
156 if (!dispatch_modes)
157 return 0;
158
159 /*
160 * From the Sandy Bridge PRM, volume 2 part 1, page 334:
161 *
162 * "Not valid on [DevSNB] if 4x PERPIXEL mode with pixel shader
163 * computed depth."
164 *
165 * "Valid on all products, except when in non-1x PERSAMPLE mode
166 * (applies to [DevSNB+] only)"
167 *
168 * From the Sandy Bridge PRM, volume 4 part 1, page 239:
169 *
170 * "[DevSNB]: When Pixel Shader outputs oDepth and PS invocation mode
171 * is PERPIXEL, Message Type for Render Target Write must be SIMD8.
172 *
173 * Errata: [DevSNB+]: When Pixel Shader outputs oMask, this message
174 * type is not supported: SIMD8 (including SIMD8_DUALSRC_xx)."
175 *
176 * It is really hard to follow what combinations are valid on what
177 * platforms. Judging from the restrictions on RT write messages on Gen6,
178 * oDepth and oMask related issues should be Gen6-specific. PERSAMPLE
179 * issue should be universal, and disallows multiple dispatch modes.
180 */
181 if (ilo_dev_gen(dev) == ILO_GEN(6)) {
182 if (io->pscdepth != GEN7_PSCDEPTH_OFF && !info->per_sample_dispatch)
183 dispatch_modes &= GEN6_PS_DISPATCH_8;
184 if (io->write_omask)
185 dispatch_modes &= ~GEN6_PS_DISPATCH_8;
186 }
187 if (info->per_sample_dispatch && !info->sample_count_one) {
188 /* prefer 32 over 16 over 8 */
189 if (dispatch_modes & GEN6_PS_DISPATCH_32)
190 dispatch_modes &= GEN6_PS_DISPATCH_32;
191 else if (dispatch_modes & GEN6_PS_DISPATCH_16)
192 dispatch_modes &= GEN6_PS_DISPATCH_16;
193 else
194 dispatch_modes &= GEN6_PS_DISPATCH_8;
195 }
196
197 /*
198 * From the Broadwell PRM, volume 2b, page 149:
199 *
200 * "When Render Target Fast Clear Enable is ENABLED or Render Target
201 * Resolve Type = RESOLVE_PARTIAL or RESOLVE_FULL, this bit (8 Pixel
202 * Dispatch or Dual-8 Pixel Dispatch Enable) must be DISABLED."
203 */
204 if (info->rt_clear_enable || info->rt_resolve_enable)
205 dispatch_modes &= ~GEN6_PS_DISPATCH_8;
206
207 assert(dispatch_modes);
208
209 return dispatch_modes;
210 }
211
212 static uint16_t
213 ps_get_gen6_thread_count(const struct ilo_dev *dev,
214 const struct ilo_state_ps_info *info)
215 {
216 uint16_t thread_count;
217
218 ILO_DEV_ASSERT(dev, 6, 8);
219
220 /* Maximum Number of Threads of 3DSTATE_PS */
221 switch (ilo_dev_gen(dev)) {
222 case ILO_GEN(8):
223 /* scaled automatically */
224 thread_count = 64 - 1;
225 break;
226 case ILO_GEN(7.5):
227 thread_count = (dev->gt == 3) ? 408 :
228 (dev->gt == 2) ? 204 : 102;
229 break;
230 case ILO_GEN(7):
231 thread_count = (dev->gt == 2) ? 172 : 48;
232 break;
233 case ILO_GEN(6):
234 default:
235 /* from the classic driver instead of the PRM */
236 thread_count = (dev->gt == 2) ? 80 : 40;
237 break;
238 }
239
240 return thread_count - 1;
241 }
242
243 static bool
244 ps_params_get_gen6_kill_pixel(const struct ilo_dev *dev,
245 const struct ilo_state_ps_params_info *params,
246 const struct ilo_state_ps_dispatch_conds *conds)
247 {
248 ILO_DEV_ASSERT(dev, 6, 8);
249
250 /*
251 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
252 *
253 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
254 * PS kernel or color calculator has the ability to kill (discard)
255 * pixels or samples, other than due to depth or stencil testing.
256 * This bit is required to be ENABLED in the following situations:
257 *
258 * The API pixel shader program contains "killpix" or "discard"
259 * instructions, or other code in the pixel shader kernel that can
260 * cause the final pixel mask to differ from the pixel mask received
261 * on dispatch.
262 *
263 * A sampler with chroma key enabled with kill pixel mode is used by
264 * the pixel shader.
265 *
266 * Any render target has Alpha Test Enable or AlphaToCoverage Enable
267 * enabled.
268 *
269 * The pixel shader kernel generates and outputs oMask.
270 *
271 * Note: As ClipDistance clipping is fully supported in hardware and
272 * therefore not via PS instructions, there should be no need to
273 * ENABLE this bit due to ClipDistance clipping."
274 */
275 return (conds->ps_may_kill || params->alpha_may_kill);
276 }
277
278 static bool
279 ps_params_get_gen6_dispatch_enable(const struct ilo_dev *dev,
280 const struct ilo_state_ps_params_info *params,
281 const struct ilo_state_ps_dispatch_conds *conds)
282 {
283 /*
284 * We want to skip dispatching when EarlyZ suffices. The conditions that
285 * require dispatching are
286 *
287 * - PS writes RTs and RTs are writeable
288 * - PS changes depth value and depth test/write is enabled
289 * - PS changes stencil value and stencil test is enabled
290 * - PS writes UAVs
291 * - PS or CC kills pixels
292 * - EDSC is PSEXEC, and depth test/write or stencil test is enabled
293 */
294 bool dispatch_required =
295 ((conds->has_rt_write && params->has_writeable_rt) ||
296 conds->write_odepth ||
297 conds->write_ostencil ||
298 conds->has_uav_write ||
299 ps_params_get_gen6_kill_pixel(dev, params, conds) ||
300 params->earlyz_control_psexec);
301
302 ILO_DEV_ASSERT(dev, 6, 8);
303
304 /*
305 * From the Ivy Bridge PRM, volume 2 part 1, page 280:
306 *
307 * "If EDSC_PSEXEC mode is selected, Thread Dispatch Enable must be
308 * set."
309 */
310 if (ilo_dev_gen(dev) < ILO_GEN(8) && params->earlyz_control_psexec)
311 dispatch_required = true;
312
313 /* assert it is valid to dispatch */
314 if (dispatch_required)
315 assert(conds->ps_valid);
316
317 return dispatch_required;
318 }
319
320 static bool
321 ps_get_gen6_ff_kernels(const struct ilo_dev *dev,
322 const struct ilo_state_ps_info *info,
323 struct pixel_ff *ff)
324 {
325 const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8;
326 const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16;
327 const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32;
328 uint32_t scratch_size;
329
330 ILO_DEV_ASSERT(dev, 6, 8);
331
332 ff->dispatch_modes = ps_get_gen6_dispatch_modes(dev, info);
333
334 /* initialize kernel offsets and GRF starts */
335 if (util_is_power_of_two(ff->dispatch_modes)) {
336 if (ff->dispatch_modes & GEN6_PS_DISPATCH_8) {
337 ff->kernel_offsets[0] = kernel_8->offset;
338 ff->grf_starts[0] = kernel_8->grf_start;
339 } else if (ff->dispatch_modes & GEN6_PS_DISPATCH_16) {
340 ff->kernel_offsets[0] = kernel_16->offset;
341 ff->grf_starts[0] = kernel_16->grf_start;
342 } else if (ff->dispatch_modes & GEN6_PS_DISPATCH_32) {
343 ff->kernel_offsets[0] = kernel_32->offset;
344 ff->grf_starts[0] = kernel_32->grf_start;
345 }
346 } else {
347 ff->kernel_offsets[0] = kernel_8->offset;
348 ff->kernel_offsets[1] = kernel_32->offset;
349 ff->kernel_offsets[2] = kernel_16->offset;
350
351 ff->grf_starts[0] = kernel_8->grf_start;
352 ff->grf_starts[1] = kernel_32->grf_start;
353 ff->grf_starts[2] = kernel_16->grf_start;
354 }
355
356 /* we do not want to save it */
357 assert(ff->kernel_offsets[0] == 0);
358
359 ff->pcb_enable = (((ff->dispatch_modes & GEN6_PS_DISPATCH_8) &&
360 kernel_8->pcb_attr_count) ||
361 ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) &&
362 kernel_16->pcb_attr_count) ||
363 ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
364 kernel_32->pcb_attr_count));
365
366 scratch_size = 0;
367 if ((ff->dispatch_modes & GEN6_PS_DISPATCH_8) &&
368 scratch_size < kernel_8->scratch_size)
369 scratch_size = kernel_8->scratch_size;
370 if ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) &&
371 scratch_size < kernel_16->scratch_size)
372 scratch_size = kernel_16->scratch_size;
373 if ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
374 scratch_size < kernel_32->scratch_size)
375 scratch_size = kernel_32->scratch_size;
376
377 /* next power of two, starting from 1KB */
378 ff->scratch_space = (scratch_size > 1024) ?
379 (util_last_bit(scratch_size - 1) - 10): 0;
380
381 /* GPU hangs on Haswell if none of the dispatch mode bits is set */
382 if (ilo_dev_gen(dev) == ILO_GEN(7.5) && !ff->dispatch_modes)
383 ff->dispatch_modes |= GEN6_PS_DISPATCH_8;
384
385 return true;
386 }
387
388 static bool
389 ps_get_gen6_ff(const struct ilo_dev *dev,
390 const struct ilo_state_ps_info *info,
391 struct pixel_ff *ff)
392 {
393 const struct ilo_state_shader_resource_info *resource = &info->resource;
394 const struct ilo_state_ps_io_info *io = &info->io;
395 const struct ilo_state_ps_params_info *params = &info->params;
396
397 ILO_DEV_ASSERT(dev, 6, 8);
398
399 memset(ff, 0, sizeof(*ff));
400
401 if (!ps_validate_gen6(dev, info) || !ps_get_gen6_ff_kernels(dev, info, ff))
402 return false;
403
404 ff->sampler_count = (resource->sampler_count <= 12) ?
405 (resource->sampler_count + 3) / 4 : 4;
406 ff->surface_count = resource->surface_count;
407 ff->has_uav = resource->has_uav;
408
409 ff->thread_count = ps_get_gen6_thread_count(dev, info);
410
411 ff->conds.ps_valid = (info->valid_kernels != 0x0);
412 ff->conds.has_rt_write = io->has_rt_write;
413 ff->conds.write_odepth = (io->pscdepth != GEN7_PSCDEPTH_OFF);
414 ff->conds.write_ostencil = false;
415 ff->conds.has_uav_write = resource->has_uav;
416 ff->conds.ps_may_kill = (io->write_pixel_mask || io->write_omask);
417
418 ff->kill_pixel = ps_params_get_gen6_kill_pixel(dev, params, &ff->conds);
419 ff->dispatch_enable =
420 ps_params_get_gen6_dispatch_enable(dev, params, &ff->conds);
421 ff->dual_source_blending = params->dual_source_blending;
422 ff->sample_mask = params->sample_mask;
423
424 return true;
425 }
426
427 static bool
428 ps_set_gen6_3dstate_wm(struct ilo_state_ps *ps,
429 const struct ilo_dev *dev,
430 const struct ilo_state_ps_info *info,
431 const struct pixel_ff *ff)
432 {
433 const struct ilo_state_ps_io_info *io = &info->io;
434 uint32_t dw2, dw3, dw4, dw5, dw6;
435
436 ILO_DEV_ASSERT(dev, 6, 6);
437
438 dw2 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
439 ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
440
441 if (false)
442 dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
443
444 dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
445
446 dw4 = ff->grf_starts[0] << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
447 ff->grf_starts[1] << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
448 ff->grf_starts[2] << GEN6_WM_DW4_URB_GRF_START2__SHIFT;
449
450 dw5 = ff->thread_count << GEN6_WM_DW5_MAX_THREADS__SHIFT |
451 ff->dispatch_modes << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT;
452
453 if (ff->kill_pixel)
454 dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL;
455
456 if (io->pscdepth != GEN7_PSCDEPTH_OFF)
457 dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH;
458 if (io->use_z)
459 dw5 |= GEN6_WM_DW5_PS_USE_DEPTH;
460
461 if (ff->dispatch_enable)
462 dw5 |= GEN6_WM_DW5_PS_DISPATCH_ENABLE;
463
464 if (io->write_omask)
465 dw5 |= GEN6_WM_DW5_PS_COMPUTE_OMASK;
466 if (io->use_w)
467 dw5 |= GEN6_WM_DW5_PS_USE_W;
468
469 if (ff->dual_source_blending)
470 dw5 |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND;
471
472 dw6 = io->attr_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT |
473 io->posoffset << GEN6_WM_DW6_PS_POSOFFSET__SHIFT;
474
475 dw6 |= (info->per_sample_dispatch) ?
476 GEN6_WM_DW6_MSDISPMODE_PERSAMPLE : GEN6_WM_DW6_MSDISPMODE_PERPIXEL;
477
478 STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 7);
479 ps->ps[0] = dw2;
480 ps->ps[1] = dw3;
481 ps->ps[2] = dw4;
482 ps->ps[3] = dw5;
483 ps->ps[4] = dw6;
484 ps->ps[5] = ff->kernel_offsets[1];
485 ps->ps[6] = ff->kernel_offsets[2];
486
487 return true;
488 }
489
490 static bool
491 ps_set_gen7_3dstate_wm(struct ilo_state_ps *ps,
492 const struct ilo_dev *dev,
493 const struct ilo_state_ps_info *info,
494 const struct pixel_ff *ff)
495 {
496 const struct ilo_state_ps_io_info *io = &info->io;
497 uint32_t dw1, dw2;
498
499 ILO_DEV_ASSERT(dev, 7, 7.5);
500
501 dw1 = io->pscdepth << GEN7_WM_DW1_PSCDEPTH__SHIFT;
502
503 if (ff->dispatch_enable)
504 dw1 |= GEN7_WM_DW1_PS_DISPATCH_ENABLE;
505 if (ff->kill_pixel)
506 dw1 |= GEN7_WM_DW1_PS_KILL_PIXEL;
507
508 if (io->use_z)
509 dw1 |= GEN7_WM_DW1_PS_USE_DEPTH;
510 if (io->use_w)
511 dw1 |= GEN7_WM_DW1_PS_USE_W;
512 if (io->use_coverage_mask)
513 dw1 |= GEN7_WM_DW1_PS_USE_COVERAGE_MASK;
514
515 dw2 = (info->per_sample_dispatch) ?
516 GEN7_WM_DW2_MSDISPMODE_PERSAMPLE : GEN7_WM_DW2_MSDISPMODE_PERPIXEL;
517
518 STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 2);
519 ps->ps[0] = dw1;
520 ps->ps[1] = dw2;
521
522 return true;
523 }
524
525 static bool
526 ps_set_gen7_3DSTATE_PS(struct ilo_state_ps *ps,
527 const struct ilo_dev *dev,
528 const struct ilo_state_ps_info *info,
529 const struct pixel_ff *ff)
530 {
531 const struct ilo_state_ps_io_info *io = &info->io;
532 uint32_t dw2, dw3, dw4, dw5;
533
534 ILO_DEV_ASSERT(dev, 7, 7.5);
535
536 dw2 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
537 ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
538
539 if (false)
540 dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
541
542 dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
543
544 dw4 = io->posoffset << GEN7_PS_DW4_POSOFFSET__SHIFT |
545 ff->dispatch_modes << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
546
547 if (ilo_dev_gen(dev) == ILO_GEN(7.5)) {
548 dw4 |= ff->thread_count << GEN75_PS_DW4_MAX_THREADS__SHIFT |
549 (ff->sample_mask & 0xff) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
550 } else {
551 dw4 |= ff->thread_count << GEN7_PS_DW4_MAX_THREADS__SHIFT;
552 }
553
554 if (ff->pcb_enable)
555 dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE;
556 if (io->attr_count)
557 dw4 |= GEN7_PS_DW4_ATTR_ENABLE;
558 if (io->write_omask)
559 dw4 |= GEN7_PS_DW4_COMPUTE_OMASK;
560 if (info->rt_clear_enable)
561 dw4 |= GEN7_PS_DW4_RT_FAST_CLEAR;
562 if (ff->dual_source_blending)
563 dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND;
564 if (info->rt_resolve_enable)
565 dw4 |= GEN7_PS_DW4_RT_RESOLVE;
566 if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff->has_uav)
567 dw4 |= GEN75_PS_DW4_ACCESS_UAV;
568
569 dw5 = ff->grf_starts[0] << GEN7_PS_DW5_URB_GRF_START0__SHIFT |
570 ff->grf_starts[1] << GEN7_PS_DW5_URB_GRF_START1__SHIFT |
571 ff->grf_starts[2] << GEN7_PS_DW5_URB_GRF_START2__SHIFT;
572
573 STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 8);
574 ps->ps[2] = dw2;
575 ps->ps[3] = dw3;
576 ps->ps[4] = dw4;
577 ps->ps[5] = dw5;
578 ps->ps[6] = ff->kernel_offsets[1];
579 ps->ps[7] = ff->kernel_offsets[2];
580
581 return true;
582 }
583
584 static bool
585 ps_set_gen8_3DSTATE_PS(struct ilo_state_ps *ps,
586 const struct ilo_dev *dev,
587 const struct ilo_state_ps_info *info,
588 const struct pixel_ff *ff)
589 {
590 const struct ilo_state_ps_io_info *io = &info->io;
591 uint32_t dw3, dw4, dw6, dw7;
592
593 ILO_DEV_ASSERT(dev, 8, 8);
594
595 dw3 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
596 ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
597
598 if (false)
599 dw3 |= GEN6_THREADDISP_FP_MODE_ALT;
600
601 dw4 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
602
603 dw6 = ff->thread_count << GEN8_PS_DW6_MAX_THREADS__SHIFT |
604 io->posoffset << GEN8_PS_DW6_POSOFFSET__SHIFT |
605 ff->dispatch_modes << GEN8_PS_DW6_DISPATCH_MODE__SHIFT;
606
607 if (ff->pcb_enable)
608 dw6 |= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE;
609
610 if (info->rt_clear_enable)
611 dw6 |= GEN8_PS_DW6_RT_FAST_CLEAR;
612 if (info->rt_resolve_enable)
613 dw6 |= GEN8_PS_DW6_RT_RESOLVE;
614
615 dw7 = ff->grf_starts[0] << GEN8_PS_DW7_URB_GRF_START0__SHIFT |
616 ff->grf_starts[1] << GEN8_PS_DW7_URB_GRF_START1__SHIFT |
617 ff->grf_starts[2] << GEN8_PS_DW7_URB_GRF_START2__SHIFT;
618
619 STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 6);
620 ps->ps[0] = dw3;
621 ps->ps[1] = dw4;
622 ps->ps[2] = dw6;
623 ps->ps[3] = dw7;
624 ps->ps[4] = ff->kernel_offsets[1];
625 ps->ps[5] = ff->kernel_offsets[2];
626
627 return true;
628 }
629
630 static bool
631 ps_set_gen8_3DSTATE_PS_EXTRA(struct ilo_state_ps *ps,
632 const struct ilo_dev *dev,
633 const struct ilo_state_ps_info *info,
634 const struct pixel_ff *ff)
635 {
636 const struct ilo_state_ps_io_info *io = &info->io;
637 uint32_t dw1;
638
639 ILO_DEV_ASSERT(dev, 8, 8);
640
641 dw1 = io->pscdepth << GEN8_PSX_DW1_PSCDEPTH__SHIFT;
642
643 if (info->valid_kernels)
644 dw1 |= GEN8_PSX_DW1_VALID;
645 if (!io->has_rt_write)
646 dw1 |= GEN8_PSX_DW1_UAV_ONLY;
647 if (io->write_omask)
648 dw1 |= GEN8_PSX_DW1_COMPUTE_OMASK;
649 if (io->write_pixel_mask)
650 dw1 |= GEN8_PSX_DW1_KILL_PIXEL;
651
652 if (io->use_z)
653 dw1 |= GEN8_PSX_DW1_USE_DEPTH;
654 if (io->use_w)
655 dw1 |= GEN8_PSX_DW1_USE_W;
656 if (io->attr_count)
657 dw1 |= GEN8_PSX_DW1_ATTR_ENABLE;
658
659 if (info->per_sample_dispatch)
660 dw1 |= GEN8_PSX_DW1_PER_SAMPLE;
661 if (ff->has_uav)
662 dw1 |= GEN8_PSX_DW1_ACCESS_UAV;
663 if (io->use_coverage_mask)
664 dw1 |= GEN8_PSX_DW1_USE_COVERAGE_MASK;
665
666 /*
667 * From the Broadwell PRM, volume 2b, page 151:
668 *
669 * "When this bit (Pixel Shader Valid) clear the rest of this command
670 * should also be clear.
671 */
672 if (!info->valid_kernels)
673 dw1 = 0;
674
675 STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 5);
676 ps->ps[4] = dw1;
677
678 return true;
679 }
680
681 bool
682 ilo_state_ps_init(struct ilo_state_ps *ps,
683 const struct ilo_dev *dev,
684 const struct ilo_state_ps_info *info)
685 {
686 struct pixel_ff ff;
687 bool ret = true;
688
689 assert(ilo_is_zeroed(ps, sizeof(*ps)));
690
691 ret &= ps_get_gen6_ff(dev, info, &ff);
692
693 if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
694 ret &= ps_set_gen8_3DSTATE_PS(ps, dev, info, &ff);
695 ret &= ps_set_gen8_3DSTATE_PS_EXTRA(ps, dev, info, &ff);
696 } else if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
697 ret &= ps_set_gen7_3dstate_wm(ps, dev, info, &ff);
698 ret &= ps_set_gen7_3DSTATE_PS(ps, dev, info, &ff);
699 } else {
700 ret &= ps_set_gen6_3dstate_wm(ps, dev, info, &ff);
701 }
702
703 /* save conditions */
704 ps->conds = ff.conds;
705
706 assert(ret);
707
708 return ret;
709 }
710
711 bool
712 ilo_state_ps_init_disabled(struct ilo_state_ps *ps,
713 const struct ilo_dev *dev)
714 {
715 struct ilo_state_ps_info info;
716
717 memset(&info, 0, sizeof(info));
718
719 return ilo_state_ps_init(ps, dev, &info);
720 }
721
722 bool
723 ilo_state_ps_set_params(struct ilo_state_ps *ps,
724 const struct ilo_dev *dev,
725 const struct ilo_state_ps_params_info *params)
726 {
727 ILO_DEV_ASSERT(dev, 6, 8);
728
729 /* modify sample mask */
730 if (ilo_dev_gen(dev) == ILO_GEN(7.5)) {
731 ps->ps[4] = (ps->ps[4] & ~GEN75_PS_DW4_SAMPLE_MASK__MASK) |
732 (params->sample_mask & 0xff) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
733 }
734
735 /* modify dispatch enable, pixel kill, and dual source blending */
736 if (ilo_dev_gen(dev) < ILO_GEN(8)) {
737 if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
738 if (ps_params_get_gen6_dispatch_enable(dev, params, &ps->conds))
739 ps->ps[0] |= GEN7_WM_DW1_PS_DISPATCH_ENABLE;
740 else
741 ps->ps[0] &= ~GEN7_WM_DW1_PS_DISPATCH_ENABLE;
742
743 if (ps_params_get_gen6_kill_pixel(dev, params, &ps->conds))
744 ps->ps[0] |= GEN7_WM_DW1_PS_KILL_PIXEL;
745 else
746 ps->ps[0] &= ~GEN7_WM_DW1_PS_KILL_PIXEL;
747
748 if (params->dual_source_blending)
749 ps->ps[4] |= GEN7_PS_DW4_DUAL_SOURCE_BLEND;
750 else
751 ps->ps[4] &= ~GEN7_PS_DW4_DUAL_SOURCE_BLEND;
752 } else {
753 if (ps_params_get_gen6_dispatch_enable(dev, params, &ps->conds))
754 ps->ps[3] |= GEN6_WM_DW5_PS_DISPATCH_ENABLE;
755 else
756 ps->ps[3] &= ~GEN6_WM_DW5_PS_DISPATCH_ENABLE;
757
758 if (ps_params_get_gen6_kill_pixel(dev, params, &ps->conds))
759 ps->ps[3] |= GEN6_WM_DW5_PS_KILL_PIXEL;
760 else
761 ps->ps[3] &= ~GEN6_WM_DW5_PS_KILL_PIXEL;
762
763 if (params->dual_source_blending)
764 ps->ps[3] |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND;
765 else
766 ps->ps[3] &= ~GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND;
767 }
768 }
769
770 return true;
771 }