Merge remote-tracking branch 'mesa-public/master' into vulkan
[mesa.git] / src / gallium / drivers / ilo / core / ilo_state_shader_ps.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2015 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "ilo_debug.h"
29 #include "ilo_state_shader.h"
30
31 struct pixel_ff {
32 uint8_t dispatch_modes;
33
34 uint32_t kernel_offsets[3];
35 uint8_t grf_starts[3];
36 bool pcb_enable;
37 uint8_t per_thread_scratch_space;
38 uint32_t per_thread_scratch_size;
39
40 uint8_t sampler_count;
41 uint8_t surface_count;
42 bool has_uav;
43
44 uint16_t thread_count;
45
46 struct ilo_state_ps_dispatch_conds conds;
47
48 bool kill_pixel;
49 bool dispatch_enable;
50 bool dual_source_blending;
51 uint32_t sample_mask;
52 };
53
54 static bool
55 ps_kernel_validate_gen6(const struct ilo_dev *dev,
56 const struct ilo_state_shader_kernel_info *kernel)
57 {
58 /* "Dispatch GRF Start Register for Constant/Setup Data" is U7 */
59 const uint8_t max_grf_start = 128;
60
61 ILO_DEV_ASSERT(dev, 6, 8);
62
63 /* "Kernel Start Pointer" is 64-byte aligned */
64 assert(kernel->offset % 64 == 0);
65
66 assert(kernel->grf_start < max_grf_start);
67
68 return true;
69 }
70
71 static bool
72 ps_validate_gen6(const struct ilo_dev *dev,
73 const struct ilo_state_ps_info *info)
74 {
75 const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8;
76 const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16;
77 const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32;
78 const struct ilo_state_ps_io_info *io = &info->io;
79
80 ILO_DEV_ASSERT(dev, 6, 8);
81
82 if (!ps_kernel_validate_gen6(dev, kernel_8) ||
83 !ps_kernel_validate_gen6(dev, kernel_16) ||
84 !ps_kernel_validate_gen6(dev, kernel_32))
85 return false;
86
87 /* unsupported on Gen6 */
88 if (ilo_dev_gen(dev) == ILO_GEN(6))
89 assert(!io->use_coverage_mask);
90
91 /*
92 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
93 *
94 * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
95 * field must be set to disabled."
96 */
97 if (ilo_dev_gen(dev) == ILO_GEN(6) && io->pscdepth != GEN7_PSCDEPTH_OFF)
98 assert(info->cv_has_depth_buffer);
99
100 if (!info->per_sample_dispatch) {
101 /*
102 * From the Sandy Bridge PRM, volume 2 part 1, page 281:
103 *
104 * "MSDISPMODE_PERSAMPLE is required in order to select
105 * POSOFFSET_SAMPLE."
106 */
107 assert(io->posoffset != GEN6_POSOFFSET_SAMPLE);
108
109 /*
110 * From the Sandy Bridge PRM, volume 2 part 1, page 282:
111 *
112 * "MSDISPMODE_PERSAMPLE is required in order to select
113 * INTERP_SAMPLE."
114 *
115 * From the Sandy Bridge PRM, volume 2 part 1, page 283:
116 *
117 * "MSDISPMODE_PERSAMPLE is required in order to select Perspective
118 * Sample or Non-perspective Sample barycentric coordinates."
119 */
120 assert(!info->cv_per_sample_interp);
121 }
122
123 /*
124 *
125 * From the Sandy Bridge PRM, volume 2 part 1, page 314:
126 *
127 * "Pixel Shader Dispatch, Alpha... must all be disabled."
128 *
129 * Simply disallow any valid kernel when there is early-z op. Also, when
130 * there is no valid kernel, io should be zeroed.
131 */
132 if (info->valid_kernels)
133 assert(!info->cv_has_earlyz_op);
134 else
135 assert(ilo_is_zeroed(io, sizeof(*io)));
136
137 return true;
138 }
139
140 static uint8_t
141 ps_get_gen6_dispatch_modes(const struct ilo_dev *dev,
142 const struct ilo_state_ps_info *info)
143 {
144 const struct ilo_state_ps_io_info *io = &info->io;
145 uint8_t dispatch_modes = info->valid_kernels;
146
147 ILO_DEV_ASSERT(dev, 6, 8);
148
149 if (!dispatch_modes)
150 return 0;
151
152 /*
153 * From the Sandy Bridge PRM, volume 2 part 1, page 334:
154 *
155 * "Not valid on [DevSNB] if 4x PERPIXEL mode with pixel shader
156 * computed depth."
157 *
158 * "Valid on all products, except when in non-1x PERSAMPLE mode
159 * (applies to [DevSNB+] only)"
160 *
161 * From the Sandy Bridge PRM, volume 4 part 1, page 239:
162 *
163 * "[DevSNB]: When Pixel Shader outputs oDepth and PS invocation mode
164 * is PERPIXEL, Message Type for Render Target Write must be SIMD8.
165 *
166 * Errata: [DevSNB+]: When Pixel Shader outputs oMask, this message
167 * type is not supported: SIMD8 (including SIMD8_DUALSRC_xx)."
168 *
169 * It is really hard to follow what combinations are valid on what
170 * platforms. Judging from the restrictions on RT write messages on Gen6,
171 * oDepth and oMask related issues should be Gen6-specific. PERSAMPLE
172 * issue should be universal, and disallows multiple dispatch modes.
173 */
174 if (ilo_dev_gen(dev) == ILO_GEN(6)) {
175 if (io->pscdepth != GEN7_PSCDEPTH_OFF && !info->per_sample_dispatch)
176 dispatch_modes &= GEN6_PS_DISPATCH_8;
177 if (io->write_omask)
178 dispatch_modes &= ~GEN6_PS_DISPATCH_8;
179 }
180 if (info->per_sample_dispatch && !info->sample_count_one) {
181 /* prefer 32 over 16 over 8 */
182 if (dispatch_modes & GEN6_PS_DISPATCH_32)
183 dispatch_modes &= GEN6_PS_DISPATCH_32;
184 else if (dispatch_modes & GEN6_PS_DISPATCH_16)
185 dispatch_modes &= GEN6_PS_DISPATCH_16;
186 else
187 dispatch_modes &= GEN6_PS_DISPATCH_8;
188 }
189
190 /*
191 * From the Broadwell PRM, volume 2b, page 149:
192 *
193 * "When Render Target Fast Clear Enable is ENABLED or Render Target
194 * Resolve Type = RESOLVE_PARTIAL or RESOLVE_FULL, this bit (8 Pixel
195 * Dispatch or Dual-8 Pixel Dispatch Enable) must be DISABLED."
196 */
197 if (info->rt_clear_enable || info->rt_resolve_enable)
198 dispatch_modes &= ~GEN6_PS_DISPATCH_8;
199
200 assert(dispatch_modes);
201
202 return dispatch_modes;
203 }
204
205 static uint16_t
206 ps_get_gen6_thread_count(const struct ilo_dev *dev,
207 const struct ilo_state_ps_info *info)
208 {
209 uint16_t thread_count;
210
211 ILO_DEV_ASSERT(dev, 6, 8);
212
213 /* Maximum Number of Threads of 3DSTATE_PS */
214 switch (ilo_dev_gen(dev)) {
215 case ILO_GEN(8):
216 /* scaled automatically */
217 thread_count = 64 - 1;
218 break;
219 case ILO_GEN(7.5):
220 thread_count = (dev->gt == 3) ? 408 :
221 (dev->gt == 2) ? 204 : 102;
222 break;
223 case ILO_GEN(7):
224 thread_count = (dev->gt == 2) ? 172 : 48;
225 break;
226 case ILO_GEN(6):
227 default:
228 /* from the classic driver instead of the PRM */
229 thread_count = (dev->gt == 2) ? 80 : 40;
230 break;
231 }
232
233 return thread_count - 1;
234 }
235
236 static bool
237 ps_params_get_gen6_kill_pixel(const struct ilo_dev *dev,
238 const struct ilo_state_ps_params_info *params,
239 const struct ilo_state_ps_dispatch_conds *conds)
240 {
241 ILO_DEV_ASSERT(dev, 6, 8);
242
243 /*
244 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
245 *
246 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
247 * PS kernel or color calculator has the ability to kill (discard)
248 * pixels or samples, other than due to depth or stencil testing.
249 * This bit is required to be ENABLED in the following situations:
250 *
251 * The API pixel shader program contains "killpix" or "discard"
252 * instructions, or other code in the pixel shader kernel that can
253 * cause the final pixel mask to differ from the pixel mask received
254 * on dispatch.
255 *
256 * A sampler with chroma key enabled with kill pixel mode is used by
257 * the pixel shader.
258 *
259 * Any render target has Alpha Test Enable or AlphaToCoverage Enable
260 * enabled.
261 *
262 * The pixel shader kernel generates and outputs oMask.
263 *
264 * Note: As ClipDistance clipping is fully supported in hardware and
265 * therefore not via PS instructions, there should be no need to
266 * ENABLE this bit due to ClipDistance clipping."
267 */
268 return (conds->ps_may_kill || params->alpha_may_kill);
269 }
270
271 static bool
272 ps_params_get_gen6_dispatch_enable(const struct ilo_dev *dev,
273 const struct ilo_state_ps_params_info *params,
274 const struct ilo_state_ps_dispatch_conds *conds)
275 {
276 /*
277 * We want to skip dispatching when EarlyZ suffices. The conditions that
278 * require dispatching are
279 *
280 * - PS writes RTs and RTs are writeable
281 * - PS changes depth value and depth test/write is enabled
282 * - PS changes stencil value and stencil test is enabled
283 * - PS writes UAVs
284 * - PS or CC kills pixels
285 * - EDSC is PSEXEC, and depth test/write or stencil test is enabled
286 */
287 bool dispatch_required =
288 ((conds->has_rt_write && params->has_writeable_rt) ||
289 conds->write_odepth ||
290 conds->write_ostencil ||
291 conds->has_uav_write ||
292 ps_params_get_gen6_kill_pixel(dev, params, conds) ||
293 params->earlyz_control_psexec);
294
295 ILO_DEV_ASSERT(dev, 6, 8);
296
297 /*
298 * From the Ivy Bridge PRM, volume 2 part 1, page 280:
299 *
300 * "If EDSC_PSEXEC mode is selected, Thread Dispatch Enable must be
301 * set."
302 */
303 if (ilo_dev_gen(dev) < ILO_GEN(8) && params->earlyz_control_psexec)
304 dispatch_required = true;
305
306 /* assert it is valid to dispatch */
307 if (dispatch_required)
308 assert(conds->ps_valid);
309
310 return dispatch_required;
311 }
312
313 static bool
314 ps_get_gen6_ff_kernels(const struct ilo_dev *dev,
315 const struct ilo_state_ps_info *info,
316 struct pixel_ff *ff)
317 {
318 const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8;
319 const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16;
320 const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32;
321
322 ILO_DEV_ASSERT(dev, 6, 8);
323
324 ff->dispatch_modes = ps_get_gen6_dispatch_modes(dev, info);
325
326 /* initialize kernel offsets and GRF starts */
327 if (util_is_power_of_two(ff->dispatch_modes)) {
328 if (ff->dispatch_modes & GEN6_PS_DISPATCH_8) {
329 ff->kernel_offsets[0] = kernel_8->offset;
330 ff->grf_starts[0] = kernel_8->grf_start;
331 } else if (ff->dispatch_modes & GEN6_PS_DISPATCH_16) {
332 ff->kernel_offsets[0] = kernel_16->offset;
333 ff->grf_starts[0] = kernel_16->grf_start;
334 } else if (ff->dispatch_modes & GEN6_PS_DISPATCH_32) {
335 ff->kernel_offsets[0] = kernel_32->offset;
336 ff->grf_starts[0] = kernel_32->grf_start;
337 }
338 } else {
339 ff->kernel_offsets[0] = kernel_8->offset;
340 ff->kernel_offsets[1] = kernel_32->offset;
341 ff->kernel_offsets[2] = kernel_16->offset;
342
343 ff->grf_starts[0] = kernel_8->grf_start;
344 ff->grf_starts[1] = kernel_32->grf_start;
345 ff->grf_starts[2] = kernel_16->grf_start;
346 }
347
348 /* we do not want to save it */
349 assert(ff->kernel_offsets[0] == 0);
350
351 ff->pcb_enable = (((ff->dispatch_modes & GEN6_PS_DISPATCH_8) &&
352 kernel_8->pcb_attr_count) ||
353 ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) &&
354 kernel_16->pcb_attr_count) ||
355 ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
356 kernel_32->pcb_attr_count));
357
358 /* GPU hangs on Haswell if none of the dispatch mode bits is set */
359 if (ilo_dev_gen(dev) == ILO_GEN(7.5) && !ff->dispatch_modes)
360 ff->dispatch_modes |= GEN6_PS_DISPATCH_8;
361
362 return true;
363 }
364
365 static bool
366 ps_get_gen6_ff(const struct ilo_dev *dev,
367 const struct ilo_state_ps_info *info,
368 struct pixel_ff *ff)
369 {
370 const struct ilo_state_shader_resource_info *resource = &info->resource;
371 const struct ilo_state_ps_io_info *io = &info->io;
372 const struct ilo_state_ps_params_info *params = &info->params;
373
374 ILO_DEV_ASSERT(dev, 6, 8);
375
376 memset(ff, 0, sizeof(*ff));
377
378 if (!ps_validate_gen6(dev, info) || !ps_get_gen6_ff_kernels(dev, info, ff))
379 return false;
380
381 if (info->per_thread_scratch_size) {
382 /*
383 * From the Sandy Bridge PRM, volume 2 part 1, page 271:
384 *
385 * "(Per-Thread Scratch Space)
386 * Range [0,11] indicating [1k bytes, 2M bytes] in powers of two"
387 */
388 assert(info->per_thread_scratch_size <= 2 * 1024 * 1024);
389
390 /* next power of two, starting from 1KB */
391 ff->per_thread_scratch_space = (info->per_thread_scratch_size > 1024) ?
392 (util_last_bit(info->per_thread_scratch_size - 1) - 10) : 0;
393 ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space);
394 }
395
396 ff->sampler_count = (resource->sampler_count <= 12) ?
397 (resource->sampler_count + 3) / 4 : 4;
398 ff->surface_count = resource->surface_count;
399 ff->has_uav = resource->has_uav;
400
401 ff->thread_count = ps_get_gen6_thread_count(dev, info);
402
403 ff->conds.ps_valid = (info->valid_kernels != 0x0);
404 ff->conds.has_rt_write = io->has_rt_write;
405 ff->conds.write_odepth = (io->pscdepth != GEN7_PSCDEPTH_OFF);
406 ff->conds.write_ostencil = false;
407 ff->conds.has_uav_write = resource->has_uav;
408 ff->conds.ps_may_kill = (io->write_pixel_mask || io->write_omask);
409
410 ff->kill_pixel = ps_params_get_gen6_kill_pixel(dev, params, &ff->conds);
411 ff->dispatch_enable =
412 ps_params_get_gen6_dispatch_enable(dev, params, &ff->conds);
413 ff->dual_source_blending = params->dual_source_blending;
414 ff->sample_mask = params->sample_mask;
415
416 return true;
417 }
418
419 static bool
420 ps_set_gen6_3dstate_wm(struct ilo_state_ps *ps,
421 const struct ilo_dev *dev,
422 const struct ilo_state_ps_info *info,
423 const struct pixel_ff *ff)
424 {
425 const struct ilo_state_ps_io_info *io = &info->io;
426 uint32_t dw2, dw3, dw4, dw5, dw6;
427
428 ILO_DEV_ASSERT(dev, 6, 6);
429
430 dw2 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
431 ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
432
433 if (false)
434 dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
435
436 dw3 = ff->per_thread_scratch_space <<
437 GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
438
439 dw4 = ff->grf_starts[0] << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
440 ff->grf_starts[1] << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
441 ff->grf_starts[2] << GEN6_WM_DW4_URB_GRF_START2__SHIFT;
442
443 dw5 = ff->thread_count << GEN6_WM_DW5_MAX_THREADS__SHIFT |
444 ff->dispatch_modes << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT;
445
446 if (ff->kill_pixel)
447 dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL;
448
449 if (io->pscdepth != GEN7_PSCDEPTH_OFF)
450 dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH;
451 if (io->use_z)
452 dw5 |= GEN6_WM_DW5_PS_USE_DEPTH;
453
454 if (ff->dispatch_enable)
455 dw5 |= GEN6_WM_DW5_PS_DISPATCH_ENABLE;
456
457 if (io->write_omask)
458 dw5 |= GEN6_WM_DW5_PS_COMPUTE_OMASK;
459 if (io->use_w)
460 dw5 |= GEN6_WM_DW5_PS_USE_W;
461
462 if (ff->dual_source_blending)
463 dw5 |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND;
464
465 dw6 = io->attr_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT |
466 io->posoffset << GEN6_WM_DW6_PS_POSOFFSET__SHIFT;
467
468 dw6 |= (info->per_sample_dispatch) ?
469 GEN6_WM_DW6_MSDISPMODE_PERSAMPLE : GEN6_WM_DW6_MSDISPMODE_PERPIXEL;
470
471 STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 7);
472 ps->ps[0] = dw2;
473 ps->ps[1] = dw3;
474 ps->ps[2] = dw4;
475 ps->ps[3] = dw5;
476 ps->ps[4] = dw6;
477 ps->ps[5] = ff->kernel_offsets[1];
478 ps->ps[6] = ff->kernel_offsets[2];
479
480 return true;
481 }
482
483 static bool
484 ps_set_gen7_3dstate_wm(struct ilo_state_ps *ps,
485 const struct ilo_dev *dev,
486 const struct ilo_state_ps_info *info,
487 const struct pixel_ff *ff)
488 {
489 const struct ilo_state_ps_io_info *io = &info->io;
490 uint32_t dw1, dw2;
491
492 ILO_DEV_ASSERT(dev, 7, 7.5);
493
494 dw1 = io->pscdepth << GEN7_WM_DW1_PSCDEPTH__SHIFT;
495
496 if (ff->dispatch_enable)
497 dw1 |= GEN7_WM_DW1_PS_DISPATCH_ENABLE;
498 if (ff->kill_pixel)
499 dw1 |= GEN7_WM_DW1_PS_KILL_PIXEL;
500
501 if (io->use_z)
502 dw1 |= GEN7_WM_DW1_PS_USE_DEPTH;
503 if (io->use_w)
504 dw1 |= GEN7_WM_DW1_PS_USE_W;
505 if (io->use_coverage_mask)
506 dw1 |= GEN7_WM_DW1_PS_USE_COVERAGE_MASK;
507
508 dw2 = (info->per_sample_dispatch) ?
509 GEN7_WM_DW2_MSDISPMODE_PERSAMPLE : GEN7_WM_DW2_MSDISPMODE_PERPIXEL;
510
511 STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 2);
512 ps->ps[0] = dw1;
513 ps->ps[1] = dw2;
514
515 return true;
516 }
517
518 static bool
519 ps_set_gen7_3DSTATE_PS(struct ilo_state_ps *ps,
520 const struct ilo_dev *dev,
521 const struct ilo_state_ps_info *info,
522 const struct pixel_ff *ff)
523 {
524 const struct ilo_state_ps_io_info *io = &info->io;
525 uint32_t dw2, dw3, dw4, dw5;
526
527 ILO_DEV_ASSERT(dev, 7, 7.5);
528
529 dw2 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
530 ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
531
532 if (false)
533 dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
534
535 dw3 = ff->per_thread_scratch_space <<
536 GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
537
538 dw4 = io->posoffset << GEN7_PS_DW4_POSOFFSET__SHIFT |
539 ff->dispatch_modes << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
540
541 if (ilo_dev_gen(dev) == ILO_GEN(7.5)) {
542 dw4 |= ff->thread_count << GEN75_PS_DW4_MAX_THREADS__SHIFT |
543 (ff->sample_mask & 0xff) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
544 } else {
545 dw4 |= ff->thread_count << GEN7_PS_DW4_MAX_THREADS__SHIFT;
546 }
547
548 if (ff->pcb_enable)
549 dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE;
550 if (io->attr_count)
551 dw4 |= GEN7_PS_DW4_ATTR_ENABLE;
552 if (io->write_omask)
553 dw4 |= GEN7_PS_DW4_COMPUTE_OMASK;
554 if (info->rt_clear_enable)
555 dw4 |= GEN7_PS_DW4_RT_FAST_CLEAR;
556 if (ff->dual_source_blending)
557 dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND;
558 if (info->rt_resolve_enable)
559 dw4 |= GEN7_PS_DW4_RT_RESOLVE;
560 if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff->has_uav)
561 dw4 |= GEN75_PS_DW4_ACCESS_UAV;
562
563 dw5 = ff->grf_starts[0] << GEN7_PS_DW5_URB_GRF_START0__SHIFT |
564 ff->grf_starts[1] << GEN7_PS_DW5_URB_GRF_START1__SHIFT |
565 ff->grf_starts[2] << GEN7_PS_DW5_URB_GRF_START2__SHIFT;
566
567 STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 8);
568 ps->ps[2] = dw2;
569 ps->ps[3] = dw3;
570 ps->ps[4] = dw4;
571 ps->ps[5] = dw5;
572 ps->ps[6] = ff->kernel_offsets[1];
573 ps->ps[7] = ff->kernel_offsets[2];
574
575 return true;
576 }
577
578 static bool
579 ps_set_gen8_3DSTATE_PS(struct ilo_state_ps *ps,
580 const struct ilo_dev *dev,
581 const struct ilo_state_ps_info *info,
582 const struct pixel_ff *ff)
583 {
584 const struct ilo_state_ps_io_info *io = &info->io;
585 uint32_t dw3, dw4, dw6, dw7;
586
587 ILO_DEV_ASSERT(dev, 8, 8);
588
589 /*
590 * Set VME here for correct computation of LODs and others. Not sure why
591 * it is needed now.
592 */
593 dw3 = GEN6_THREADDISP_VME |
594 ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
595 ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
596
597 if (false)
598 dw3 |= GEN6_THREADDISP_FP_MODE_ALT;
599
600 dw4 = ff->per_thread_scratch_space <<
601 GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
602
603 dw6 = ff->thread_count << GEN8_PS_DW6_MAX_THREADS__SHIFT |
604 io->posoffset << GEN8_PS_DW6_POSOFFSET__SHIFT |
605 ff->dispatch_modes << GEN8_PS_DW6_DISPATCH_MODE__SHIFT;
606
607 if (ff->pcb_enable)
608 dw6 |= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE;
609
610 if (info->rt_clear_enable)
611 dw6 |= GEN8_PS_DW6_RT_FAST_CLEAR;
612 if (info->rt_resolve_enable)
613 dw6 |= GEN8_PS_DW6_RT_RESOLVE;
614
615 dw7 = ff->grf_starts[0] << GEN8_PS_DW7_URB_GRF_START0__SHIFT |
616 ff->grf_starts[1] << GEN8_PS_DW7_URB_GRF_START1__SHIFT |
617 ff->grf_starts[2] << GEN8_PS_DW7_URB_GRF_START2__SHIFT;
618
619 STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 6);
620 ps->ps[0] = dw3;
621 ps->ps[1] = dw4;
622 ps->ps[2] = dw6;
623 ps->ps[3] = dw7;
624 ps->ps[4] = ff->kernel_offsets[1];
625 ps->ps[5] = ff->kernel_offsets[2];
626
627 return true;
628 }
629
630 static bool
631 ps_set_gen8_3DSTATE_PS_EXTRA(struct ilo_state_ps *ps,
632 const struct ilo_dev *dev,
633 const struct ilo_state_ps_info *info,
634 const struct pixel_ff *ff)
635 {
636 const struct ilo_state_ps_io_info *io = &info->io;
637 uint32_t dw1;
638
639 ILO_DEV_ASSERT(dev, 8, 8);
640
641 dw1 = io->pscdepth << GEN8_PSX_DW1_PSCDEPTH__SHIFT;
642
643 if (info->valid_kernels)
644 dw1 |= GEN8_PSX_DW1_VALID;
645 if (!io->has_rt_write)
646 dw1 |= GEN8_PSX_DW1_UAV_ONLY;
647 if (io->write_omask)
648 dw1 |= GEN8_PSX_DW1_COMPUTE_OMASK;
649 if (io->write_pixel_mask)
650 dw1 |= GEN8_PSX_DW1_KILL_PIXEL;
651
652 if (io->use_z)
653 dw1 |= GEN8_PSX_DW1_USE_DEPTH;
654 if (io->use_w)
655 dw1 |= GEN8_PSX_DW1_USE_W;
656 if (io->attr_count)
657 dw1 |= GEN8_PSX_DW1_ATTR_ENABLE;
658
659 if (info->per_sample_dispatch)
660 dw1 |= GEN8_PSX_DW1_PER_SAMPLE;
661 if (ff->has_uav)
662 dw1 |= GEN8_PSX_DW1_ACCESS_UAV;
663 if (io->use_coverage_mask)
664 dw1 |= GEN8_PSX_DW1_USE_COVERAGE_MASK;
665
666 /*
667 * From the Broadwell PRM, volume 2b, page 151:
668 *
669 * "When this bit (Pixel Shader Valid) clear the rest of this command
670 * should also be clear.
671 */
672 if (!info->valid_kernels)
673 dw1 = 0;
674
675 STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 5);
676 ps->ps[4] = dw1;
677
678 return true;
679 }
680
681 bool
682 ilo_state_ps_init(struct ilo_state_ps *ps,
683 const struct ilo_dev *dev,
684 const struct ilo_state_ps_info *info)
685 {
686 struct pixel_ff ff;
687 bool ret = true;
688
689 assert(ilo_is_zeroed(ps, sizeof(*ps)));
690
691 ret &= ps_get_gen6_ff(dev, info, &ff);
692
693 if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
694 ret &= ps_set_gen8_3DSTATE_PS(ps, dev, info, &ff);
695 ret &= ps_set_gen8_3DSTATE_PS_EXTRA(ps, dev, info, &ff);
696 } else if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
697 ret &= ps_set_gen7_3dstate_wm(ps, dev, info, &ff);
698 ret &= ps_set_gen7_3DSTATE_PS(ps, dev, info, &ff);
699 } else {
700 ret &= ps_set_gen6_3dstate_wm(ps, dev, info, &ff);
701 }
702
703 ps->scratch_size = ff.per_thread_scratch_size * ff.thread_count;
704 /* save conditions */
705 ps->conds = ff.conds;
706
707 assert(ret);
708
709 return ret;
710 }
711
712 bool
713 ilo_state_ps_init_disabled(struct ilo_state_ps *ps,
714 const struct ilo_dev *dev)
715 {
716 struct ilo_state_ps_info info;
717
718 memset(&info, 0, sizeof(info));
719
720 return ilo_state_ps_init(ps, dev, &info);
721 }
722
723 bool
724 ilo_state_ps_set_params(struct ilo_state_ps *ps,
725 const struct ilo_dev *dev,
726 const struct ilo_state_ps_params_info *params)
727 {
728 ILO_DEV_ASSERT(dev, 6, 8);
729
730 /* modify sample mask */
731 if (ilo_dev_gen(dev) == ILO_GEN(7.5)) {
732 ps->ps[4] = (ps->ps[4] & ~GEN75_PS_DW4_SAMPLE_MASK__MASK) |
733 (params->sample_mask & 0xff) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
734 }
735
736 /* modify dispatch enable, pixel kill, and dual source blending */
737 if (ilo_dev_gen(dev) < ILO_GEN(8)) {
738 if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
739 if (ps_params_get_gen6_dispatch_enable(dev, params, &ps->conds))
740 ps->ps[0] |= GEN7_WM_DW1_PS_DISPATCH_ENABLE;
741 else
742 ps->ps[0] &= ~GEN7_WM_DW1_PS_DISPATCH_ENABLE;
743
744 if (ps_params_get_gen6_kill_pixel(dev, params, &ps->conds))
745 ps->ps[0] |= GEN7_WM_DW1_PS_KILL_PIXEL;
746 else
747 ps->ps[0] &= ~GEN7_WM_DW1_PS_KILL_PIXEL;
748
749 if (params->dual_source_blending)
750 ps->ps[4] |= GEN7_PS_DW4_DUAL_SOURCE_BLEND;
751 else
752 ps->ps[4] &= ~GEN7_PS_DW4_DUAL_SOURCE_BLEND;
753 } else {
754 if (ps_params_get_gen6_dispatch_enable(dev, params, &ps->conds))
755 ps->ps[3] |= GEN6_WM_DW5_PS_DISPATCH_ENABLE;
756 else
757 ps->ps[3] &= ~GEN6_WM_DW5_PS_DISPATCH_ENABLE;
758
759 if (ps_params_get_gen6_kill_pixel(dev, params, &ps->conds))
760 ps->ps[3] |= GEN6_WM_DW5_PS_KILL_PIXEL;
761 else
762 ps->ps[3] &= ~GEN6_WM_DW5_PS_KILL_PIXEL;
763
764 if (params->dual_source_blending)
765 ps->ps[3] |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND;
766 else
767 ps->ps[3] &= ~GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND;
768 }
769 }
770
771 return true;
772 }