nv50/ir: silence unsupported TGSI_PROPERTY_CS_FIXED_BLOCK_*
[mesa.git] / src / intel / vulkan / gen8_pipeline.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31
32 #include "genxml/gen_macros.h"
33 #include "genxml/genX_pack.h"
34
35 #include "genX_pipeline_util.h"
36
37 static void
38 emit_ia_state(struct anv_pipeline *pipeline,
39 const VkPipelineInputAssemblyStateCreateInfo *info,
40 const struct anv_graphics_pipeline_create_info *extra)
41 {
42 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_TOPOLOGY), vft) {
43 vft.PrimitiveTopologyType = pipeline->topology;
44 }
45 }
46
47 static void
48 emit_rs_state(struct anv_pipeline *pipeline,
49 const VkPipelineRasterizationStateCreateInfo *info,
50 const VkPipelineMultisampleStateCreateInfo *ms_info,
51 const struct anv_graphics_pipeline_create_info *extra)
52 {
53 uint32_t samples = 1;
54
55 if (ms_info)
56 samples = ms_info->rasterizationSamples;
57
58 struct GENX(3DSTATE_SF) sf = {
59 GENX(3DSTATE_SF_header),
60 .ViewportTransformEnable = !(extra && extra->use_rectlist),
61 .TriangleStripListProvokingVertexSelect = 0,
62 .LineStripListProvokingVertexSelect = 0,
63 .TriangleFanProvokingVertexSelect = 1,
64 .PointWidthSource = Vertex,
65 .PointWidth = 1.0,
66 };
67
68 /* FINISHME: VkBool32 rasterizerDiscardEnable; */
69
70 GENX(3DSTATE_SF_pack)(NULL, pipeline->gen8.sf, &sf);
71
72 struct GENX(3DSTATE_RASTER) raster = {
73 GENX(3DSTATE_RASTER_header),
74
75 /* For details on 3DSTATE_RASTER multisample state, see the BSpec table
76 * "Multisample Modes State".
77 */
78 .DXMultisampleRasterizationEnable = samples > 1,
79 .ForcedSampleCount = FSC_NUMRASTSAMPLES_0,
80 .ForceMultisampling = false,
81
82 .FrontWinding = vk_to_gen_front_face[info->frontFace],
83 .CullMode = vk_to_gen_cullmode[info->cullMode],
84 .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode],
85 .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode],
86 .ScissorRectangleEnable = !(extra && extra->use_rectlist),
87 #if GEN_GEN == 8
88 .ViewportZClipTestEnable = true,
89 #else
90 /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */
91 .ViewportZFarClipTestEnable = true,
92 .ViewportZNearClipTestEnable = true,
93 #endif
94 .GlobalDepthOffsetEnableSolid = info->depthBiasEnable,
95 .GlobalDepthOffsetEnableWireframe = info->depthBiasEnable,
96 .GlobalDepthOffsetEnablePoint = info->depthBiasEnable,
97 };
98
99 GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gen8.raster, &raster);
100 }
101
102 static void
103 emit_cb_state(struct anv_pipeline *pipeline,
104 const VkPipelineColorBlendStateCreateInfo *info,
105 const VkPipelineMultisampleStateCreateInfo *ms_info)
106 {
107 struct anv_device *device = pipeline->device;
108
109 uint32_t num_dwords = GENX(BLEND_STATE_length);
110 pipeline->blend_state =
111 anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64);
112
113 struct GENX(BLEND_STATE) blend_state = {
114 .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
115 .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable,
116 };
117
118 /* Default everything to disabled */
119 for (uint32_t i = 0; i < 8; i++) {
120 blend_state.Entry[i].WriteDisableAlpha = true;
121 blend_state.Entry[i].WriteDisableRed = true;
122 blend_state.Entry[i].WriteDisableGreen = true;
123 blend_state.Entry[i].WriteDisableBlue = true;
124 }
125
126 struct anv_pipeline_bind_map *map =
127 &pipeline->bindings[MESA_SHADER_FRAGMENT];
128
129 bool has_writeable_rt = false;
130 for (unsigned i = 0; i < map->surface_count; i++) {
131 struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i];
132
133 /* All color attachments are at the beginning of the binding table */
134 if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
135 break;
136
137 /* We can have at most 8 attachments */
138 assert(i < 8);
139
140 if (binding->offset >= info->attachmentCount)
141 continue;
142
143 const VkPipelineColorBlendAttachmentState *a =
144 &info->pAttachments[binding->offset];
145
146 if (a->srcColorBlendFactor != a->srcAlphaBlendFactor ||
147 a->dstColorBlendFactor != a->dstAlphaBlendFactor ||
148 a->colorBlendOp != a->alphaBlendOp) {
149 blend_state.IndependentAlphaBlendEnable = true;
150 }
151
152 blend_state.Entry[i] = (struct GENX(BLEND_STATE_ENTRY)) {
153 .LogicOpEnable = info->logicOpEnable,
154 .LogicOpFunction = vk_to_gen_logic_op[info->logicOp],
155 .ColorBufferBlendEnable = a->blendEnable,
156 .PreBlendSourceOnlyClampEnable = false,
157 .ColorClampRange = COLORCLAMP_RTFORMAT,
158 .PreBlendColorClampEnable = true,
159 .PostBlendColorClampEnable = true,
160 .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor],
161 .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor],
162 .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp],
163 .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor],
164 .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor],
165 .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp],
166 .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
167 .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
168 .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
169 .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
170 };
171
172 if (a->colorWriteMask != 0)
173 has_writeable_rt = true;
174
175 /* Our hardware applies the blend factor prior to the blend function
176 * regardless of what function is used. Technically, this means the
177 * hardware can do MORE than GL or Vulkan specify. However, it also
178 * means that, for MIN and MAX, we have to stomp the blend factor to
179 * ONE to make it a no-op.
180 */
181 if (a->colorBlendOp == VK_BLEND_OP_MIN ||
182 a->colorBlendOp == VK_BLEND_OP_MAX) {
183 blend_state.Entry[i].SourceBlendFactor = BLENDFACTOR_ONE;
184 blend_state.Entry[i].DestinationBlendFactor = BLENDFACTOR_ONE;
185 }
186 if (a->alphaBlendOp == VK_BLEND_OP_MIN ||
187 a->alphaBlendOp == VK_BLEND_OP_MAX) {
188 blend_state.Entry[i].SourceAlphaBlendFactor = BLENDFACTOR_ONE;
189 blend_state.Entry[i].DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
190 }
191 }
192
193 struct GENX(BLEND_STATE_ENTRY) *bs0 = &blend_state.Entry[0];
194
195 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND), blend) {
196 blend.AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable;
197 blend.HasWriteableRT = has_writeable_rt;
198 blend.ColorBufferBlendEnable = bs0->ColorBufferBlendEnable;
199 blend.SourceAlphaBlendFactor = bs0->SourceAlphaBlendFactor;
200 blend.DestinationAlphaBlendFactor = bs0->DestinationAlphaBlendFactor;
201 blend.SourceBlendFactor = bs0->SourceBlendFactor;
202 blend.DestinationBlendFactor = bs0->DestinationBlendFactor;
203 blend.AlphaTestEnable = false;
204 blend.IndependentAlphaBlendEnable =
205 blend_state.IndependentAlphaBlendEnable;
206 }
207
208 GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state);
209 if (!device->info.has_llc)
210 anv_state_clflush(pipeline->blend_state);
211
212 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
213 bsp.BlendStatePointer = pipeline->blend_state.offset;
214 bsp.BlendStatePointerValid = true;
215 }
216 }
217
218 static void
219 emit_ds_state(struct anv_pipeline *pipeline,
220 const VkPipelineDepthStencilStateCreateInfo *info)
221 {
222 uint32_t *dw = GEN_GEN == 8 ?
223 pipeline->gen8.wm_depth_stencil : pipeline->gen9.wm_depth_stencil;
224
225 if (info == NULL) {
226 /* We're going to OR this together with the dynamic state. We need
227 * to make sure it's initialized to something useful.
228 */
229 memset(pipeline->gen8.wm_depth_stencil, 0,
230 sizeof(pipeline->gen8.wm_depth_stencil));
231 memset(pipeline->gen9.wm_depth_stencil, 0,
232 sizeof(pipeline->gen9.wm_depth_stencil));
233 return;
234 }
235
236 /* VkBool32 depthBoundsTestEnable; // optional (depth_bounds_test) */
237
238 struct GENX(3DSTATE_WM_DEPTH_STENCIL) wm_depth_stencil = {
239 .DepthTestEnable = info->depthTestEnable,
240 .DepthBufferWriteEnable = info->depthWriteEnable,
241 .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp],
242 .DoubleSidedStencilEnable = true,
243
244 .StencilTestEnable = info->stencilTestEnable,
245 .StencilBufferWriteEnable = info->stencilTestEnable,
246 .StencilFailOp = vk_to_gen_stencil_op[info->front.failOp],
247 .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.passOp],
248 .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.depthFailOp],
249 .StencilTestFunction = vk_to_gen_compare_op[info->front.compareOp],
250 .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.failOp],
251 .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.passOp],
252 .BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info->back.depthFailOp],
253 .BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.compareOp],
254 };
255
256 /* From the Broadwell PRM:
257 *
258 * "If Depth_Test_Enable = 1 AND Depth_Test_func = EQUAL, the
259 * Depth_Write_Enable must be set to 0."
260 */
261 if (info->depthTestEnable && info->depthCompareOp == VK_COMPARE_OP_EQUAL)
262 wm_depth_stencil.DepthBufferWriteEnable = false;
263
264 GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, dw, &wm_depth_stencil);
265 }
266
267 static void
268 emit_ms_state(struct anv_pipeline *pipeline,
269 const VkPipelineMultisampleStateCreateInfo *info)
270 {
271 uint32_t samples = 1;
272 uint32_t log2_samples = 0;
273
274 /* From the Vulkan 1.0 spec:
275 * If pSampleMask is NULL, it is treated as if the mask has all bits
276 * enabled, i.e. no coverage is removed from fragments.
277 *
278 * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits.
279 */
280 uint32_t sample_mask = 0xffff;
281
282 if (info) {
283 samples = info->rasterizationSamples;
284 log2_samples = __builtin_ffs(samples) - 1;
285 }
286
287 if (info && info->pSampleMask)
288 sample_mask &= info->pSampleMask[0];
289
290 if (info && info->sampleShadingEnable)
291 anv_finishme("VkPipelineMultisampleStateCreateInfo::sampleShadingEnable");
292
293 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), ms) {
294 /* The PRM says that this bit is valid only for DX9:
295 *
296 * SW can choose to set this bit only for DX9 API. DX10/OGL API's
297 * should not have any effect by setting or not setting this bit.
298 */
299 ms.PixelPositionOffsetEnable = false;
300
301 ms.PixelLocation = CENTER;
302 ms.NumberofMultisamples = log2_samples;
303 }
304
305 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
306 sm.SampleMask = sample_mask;
307 }
308 }
309
310 VkResult
311 genX(graphics_pipeline_create)(
312 VkDevice _device,
313 struct anv_pipeline_cache * cache,
314 const VkGraphicsPipelineCreateInfo* pCreateInfo,
315 const struct anv_graphics_pipeline_create_info *extra,
316 const VkAllocationCallbacks* pAllocator,
317 VkPipeline* pPipeline)
318 {
319 ANV_FROM_HANDLE(anv_device, device, _device);
320 struct anv_pipeline *pipeline;
321 VkResult result;
322 uint32_t offset, length;
323
324 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
325
326 pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
327 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
328 if (pipeline == NULL)
329 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
330
331 result = anv_pipeline_init(pipeline, device, cache,
332 pCreateInfo, extra, pAllocator);
333 if (result != VK_SUCCESS) {
334 anv_free2(&device->alloc, pAllocator, pipeline);
335 return result;
336 }
337
338 assert(pCreateInfo->pVertexInputState);
339 emit_vertex_input(pipeline, pCreateInfo->pVertexInputState, extra);
340 assert(pCreateInfo->pInputAssemblyState);
341 emit_ia_state(pipeline, pCreateInfo->pInputAssemblyState, extra);
342 assert(pCreateInfo->pRasterizationState);
343 emit_rs_state(pipeline, pCreateInfo->pRasterizationState,
344 pCreateInfo->pMultisampleState, extra);
345 emit_ms_state(pipeline, pCreateInfo->pMultisampleState);
346 emit_ds_state(pipeline, pCreateInfo->pDepthStencilState);
347 emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
348 pCreateInfo->pMultisampleState);
349
350 emit_urb_setup(pipeline);
351
352 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
353 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) {
354 clip.ClipEnable = !(extra && extra->use_rectlist);
355 clip.EarlyCullEnable = true;
356 clip.APIMode = 1; /* D3D */
357 clip.ViewportXYClipTestEnable = true;
358
359 clip.ClipMode =
360 pCreateInfo->pRasterizationState->rasterizerDiscardEnable ?
361 REJECT_ALL : NORMAL;
362
363 clip.NonPerspectiveBarycentricEnable = wm_prog_data ?
364 (wm_prog_data->barycentric_interp_modes & 0x38) != 0 : 0;
365
366 clip.TriangleStripListProvokingVertexSelect = 0;
367 clip.LineStripListProvokingVertexSelect = 0;
368 clip.TriangleFanProvokingVertexSelect = 1;
369
370 clip.MinimumPointWidth = 0.125;
371 clip.MaximumPointWidth = 255.875;
372 clip.MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1;
373 }
374
375 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), wm) {
376 wm.StatisticsEnable = true;
377 wm.LineEndCapAntialiasingRegionWidth = _05pixels;
378 wm.LineAntialiasingRegionWidth = _10pixels;
379 wm.EarlyDepthStencilControl = NORMAL;
380 wm.ForceThreadDispatchEnable = NORMAL;
381 wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
382
383 wm.BarycentricInterpolationMode = pipeline->ps_ksp0 == NO_KERNEL ?
384 0 : wm_prog_data->barycentric_interp_modes;
385 }
386
387 if (pipeline->gs_kernel == NO_KERNEL) {
388 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs);
389 } else {
390 const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline);
391 offset = 1;
392 length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset;
393
394 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs) {
395 gs.SingleProgramFlow = false;
396 gs.KernelStartPointer = pipeline->gs_kernel;
397 gs.VectorMaskEnable = false;
398 gs.SamplerCount = 0;
399 gs.BindingTableEntryCount = 0;
400 gs.ExpectedVertexCount = gs_prog_data->vertices_in;
401
402 gs.ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_GEOMETRY];
403 gs.PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base);
404 gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
405 gs.OutputTopology = gs_prog_data->output_topology;
406 gs.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length;
407 gs.IncludeVertexHandles = gs_prog_data->base.include_vue_handles;
408
409 gs.DispatchGRFStartRegisterForURBData =
410 gs_prog_data->base.base.dispatch_grf_start_reg;
411
412 gs.MaximumNumberofThreads = device->info.max_gs_threads / 2 - 1;
413 gs.ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords;
414 gs.DispatchMode = gs_prog_data->base.dispatch_mode;
415 gs.StatisticsEnable = true;
416 gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
417 gs.ReorderMode = TRAILING;
418 gs.Enable = true;
419
420 gs.ControlDataFormat = gs_prog_data->control_data_format;
421
422 gs.StaticOutput = gs_prog_data->static_vertex_count >= 0;
423 gs.StaticOutputVertexCount =
424 gs_prog_data->static_vertex_count >= 0 ?
425 gs_prog_data->static_vertex_count : 0;
426
427 /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled:
428 * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v)
429 * UserClipDistanceCullTestEnableBitmask(v)
430 */
431
432 gs.VertexURBEntryOutputReadOffset = offset;
433 gs.VertexURBEntryOutputLength = length;
434 }
435 }
436
437 const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
438 /* Skip the VUE header and position slots */
439 offset = 1;
440 length = (vs_prog_data->base.vue_map.num_slots + 1) / 2 - offset;
441
442 uint32_t vs_start = pipeline->vs_simd8 != NO_KERNEL ? pipeline->vs_simd8 :
443 pipeline->vs_vec4;
444
445 if (vs_start == NO_KERNEL || (extra && extra->disable_vs)) {
446 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), vs) {
447 vs.FunctionEnable = false;
448 /* Even if VS is disabled, SBE still gets the amount of
449 * vertex data to read from this field. */
450 vs.VertexURBEntryOutputReadOffset = offset;
451 vs.VertexURBEntryOutputLength = length;
452 }
453 } else {
454 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), vs) {
455 vs.KernelStartPointer = vs_start;
456 vs.SingleVertexDispatch = false;
457 vs.VectorMaskEnable = false;
458 vs.SamplerCount = 0;
459
460 vs.BindingTableEntryCount =
461 vs_prog_data->base.base.binding_table.size_bytes / 4,
462
463 vs.ThreadDispatchPriority = false;
464 vs.FloatingPointMode = IEEE754;
465 vs.IllegalOpcodeExceptionEnable = false;
466 vs.AccessesUAV = false;
467 vs.SoftwareExceptionEnable = false;
468
469 vs.ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_VERTEX],
470 vs.PerThreadScratchSpace = scratch_space(&vs_prog_data->base.base);
471
472 vs.DispatchGRFStartRegisterForURBData =
473 vs_prog_data->base.base.dispatch_grf_start_reg;
474
475 vs.VertexURBEntryReadLength = vs_prog_data->base.urb_read_length;
476 vs.VertexURBEntryReadOffset = 0;
477
478 vs.MaximumNumberofThreads = device->info.max_vs_threads - 1;
479 vs.StatisticsEnable = false;
480 vs.SIMD8DispatchEnable = pipeline->vs_simd8 != NO_KERNEL;
481 vs.VertexCacheDisable = false;
482 vs.FunctionEnable = true;
483
484 vs.VertexURBEntryOutputReadOffset = offset;
485 vs.VertexURBEntryOutputLength = length;
486
487 /* TODO */
488 vs.UserClipDistanceClipTestEnableBitmask = 0;
489 vs.UserClipDistanceCullTestEnableBitmask = 0;
490 }
491 }
492
493 const int num_thread_bias = GEN_GEN == 8 ? 2 : 1;
494 if (pipeline->ps_ksp0 == NO_KERNEL) {
495 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps);
496 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), extra) {
497 extra.PixelShaderValid = false;
498 }
499 } else {
500 emit_3dstate_sbe(pipeline);
501
502 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
503 ps.KernelStartPointer0 = pipeline->ps_ksp0;
504 ps.KernelStartPointer1 = 0;
505 ps.KernelStartPointer2 = pipeline->ps_ksp2;
506 ps._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL;
507 ps._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL;
508 ps._32PixelDispatchEnable = false;
509 ps.SingleProgramFlow = false;
510 ps.VectorMaskEnable = true;
511 ps.SamplerCount = 1;
512 ps.PushConstantEnable = wm_prog_data->base.nr_params > 0;
513 ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
514 POSOFFSET_SAMPLE: POSOFFSET_NONE;
515
516 ps.MaximumNumberofThreadsPerPSD = 64 - num_thread_bias;
517
518 ps.ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT];
519 ps.PerThreadScratchSpace = scratch_space(&wm_prog_data->base);
520
521 ps.DispatchGRFStartRegisterForConstantSetupData0 = pipeline->ps_grf_start0;
522 ps.DispatchGRFStartRegisterForConstantSetupData1 = 0;
523 ps.DispatchGRFStartRegisterForConstantSetupData2 = pipeline->ps_grf_start2;
524 }
525
526 bool per_sample_ps = pCreateInfo->pMultisampleState &&
527 pCreateInfo->pMultisampleState->sampleShadingEnable;
528
529 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), ps) {
530 ps.PixelShaderValid = true;
531 ps.PixelShaderKillsPixel = wm_prog_data->uses_kill;
532 ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
533 ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
534 ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
535 ps.PixelShaderIsPerSample = per_sample_ps;
536 ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
537 ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
538 #if GEN_GEN >= 9
539 ps.PixelShaderPullsBary = wm_prog_data->pulls_bary;
540 ps.InputCoverageMaskState = wm_prog_data->uses_sample_mask ?
541 ICMS_INNER_CONSERVATIVE : ICMS_NONE;
542 #else
543 ps.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
544 #endif
545 }
546 }
547
548 *pPipeline = anv_pipeline_to_handle(pipeline);
549
550 return VK_SUCCESS;
551 }