4e04aadfe898beacc7e355ca5ab083afaf541185
[mesa.git] / src / intel / vulkan / gen8_pipeline.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31
32 #include "genxml/gen_macros.h"
33 #include "genxml/genX_pack.h"
34
35 #include "genX_pipeline_util.h"
36
37 static void
38 emit_ia_state(struct anv_pipeline *pipeline,
39 const VkPipelineInputAssemblyStateCreateInfo *info,
40 const struct anv_graphics_pipeline_create_info *extra)
41 {
42 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_TOPOLOGY), vft) {
43 vft.PrimitiveTopologyType = pipeline->topology;
44 }
45 }
46
47 static void
48 emit_rs_state(struct anv_pipeline *pipeline,
49 const VkPipelineRasterizationStateCreateInfo *info,
50 const VkPipelineMultisampleStateCreateInfo *ms_info,
51 const struct anv_graphics_pipeline_create_info *extra)
52 {
53 uint32_t samples = 1;
54
55 if (ms_info)
56 samples = ms_info->rasterizationSamples;
57
58 struct GENX(3DSTATE_SF) sf = {
59 GENX(3DSTATE_SF_header),
60 .ViewportTransformEnable = !(extra && extra->use_rectlist),
61 .TriangleStripListProvokingVertexSelect = 0,
62 .LineStripListProvokingVertexSelect = 0,
63 .TriangleFanProvokingVertexSelect = 1,
64 .PointWidthSource = Vertex,
65 .PointWidth = 1.0,
66 };
67
68 /* FINISHME: VkBool32 rasterizerDiscardEnable; */
69
70 GENX(3DSTATE_SF_pack)(NULL, pipeline->gen8.sf, &sf);
71
72 struct GENX(3DSTATE_RASTER) raster = {
73 GENX(3DSTATE_RASTER_header),
74
75 /* For details on 3DSTATE_RASTER multisample state, see the BSpec table
76 * "Multisample Modes State".
77 */
78 .DXMultisampleRasterizationEnable = samples > 1,
79 .ForcedSampleCount = FSC_NUMRASTSAMPLES_0,
80 .ForceMultisampling = false,
81
82 .FrontWinding = vk_to_gen_front_face[info->frontFace],
83 .CullMode = vk_to_gen_cullmode[info->cullMode],
84 .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode],
85 .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode],
86 .ScissorRectangleEnable = !(extra && extra->use_rectlist),
87 #if GEN_GEN == 8
88 .ViewportZClipTestEnable = !pipeline->depth_clamp_enable,
89 #else
90 /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */
91 .ViewportZFarClipTestEnable = !pipeline->depth_clamp_enable,
92 .ViewportZNearClipTestEnable = !pipeline->depth_clamp_enable,
93 #endif
94 .GlobalDepthOffsetEnableSolid = info->depthBiasEnable,
95 .GlobalDepthOffsetEnableWireframe = info->depthBiasEnable,
96 .GlobalDepthOffsetEnablePoint = info->depthBiasEnable,
97 };
98
99 GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gen8.raster, &raster);
100 }
101
102 static void
103 emit_ms_state(struct anv_pipeline *pipeline,
104 const VkPipelineMultisampleStateCreateInfo *info)
105 {
106 uint32_t samples = 1;
107 uint32_t log2_samples = 0;
108
109 /* From the Vulkan 1.0 spec:
110 * If pSampleMask is NULL, it is treated as if the mask has all bits
111 * enabled, i.e. no coverage is removed from fragments.
112 *
113 * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits.
114 */
115 uint32_t sample_mask = 0xffff;
116
117 if (info) {
118 samples = info->rasterizationSamples;
119 log2_samples = __builtin_ffs(samples) - 1;
120 }
121
122 if (info && info->pSampleMask)
123 sample_mask &= info->pSampleMask[0];
124
125 if (info && info->sampleShadingEnable)
126 anv_finishme("VkPipelineMultisampleStateCreateInfo::sampleShadingEnable");
127
128 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), ms) {
129 /* The PRM says that this bit is valid only for DX9:
130 *
131 * SW can choose to set this bit only for DX9 API. DX10/OGL API's
132 * should not have any effect by setting or not setting this bit.
133 */
134 ms.PixelPositionOffsetEnable = false;
135
136 ms.PixelLocation = CENTER;
137 ms.NumberofMultisamples = log2_samples;
138 }
139
140 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
141 sm.SampleMask = sample_mask;
142 }
143 }
144
145 VkResult
146 genX(graphics_pipeline_create)(
147 VkDevice _device,
148 struct anv_pipeline_cache * cache,
149 const VkGraphicsPipelineCreateInfo* pCreateInfo,
150 const struct anv_graphics_pipeline_create_info *extra,
151 const VkAllocationCallbacks* pAllocator,
152 VkPipeline* pPipeline)
153 {
154 ANV_FROM_HANDLE(anv_device, device, _device);
155 ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass);
156 struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
157 struct anv_pipeline *pipeline;
158 VkResult result;
159 uint32_t offset, length;
160
161 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
162
163 pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
164 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
165 if (pipeline == NULL)
166 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
167
168 result = anv_pipeline_init(pipeline, device, cache,
169 pCreateInfo, extra, pAllocator);
170 if (result != VK_SUCCESS) {
171 anv_free2(&device->alloc, pAllocator, pipeline);
172 return result;
173 }
174
175 assert(pCreateInfo->pVertexInputState);
176 emit_vertex_input(pipeline, pCreateInfo->pVertexInputState, extra);
177 assert(pCreateInfo->pInputAssemblyState);
178 emit_ia_state(pipeline, pCreateInfo->pInputAssemblyState, extra);
179 assert(pCreateInfo->pRasterizationState);
180 emit_rs_state(pipeline, pCreateInfo->pRasterizationState,
181 pCreateInfo->pMultisampleState, extra);
182 emit_ms_state(pipeline, pCreateInfo->pMultisampleState);
183 emit_ds_state(pipeline, pCreateInfo->pDepthStencilState, pass, subpass);
184 emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
185 pCreateInfo->pMultisampleState);
186
187 emit_urb_setup(pipeline);
188
189 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
190 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) {
191 clip.ClipEnable = !(extra && extra->use_rectlist);
192 clip.EarlyCullEnable = true;
193 clip.APIMode = 1; /* D3D */
194 clip.ViewportXYClipTestEnable = true;
195
196 clip.ClipMode =
197 pCreateInfo->pRasterizationState->rasterizerDiscardEnable ?
198 CLIPMODE_REJECT_ALL : CLIPMODE_NORMAL;
199
200 clip.NonPerspectiveBarycentricEnable = wm_prog_data ?
201 (wm_prog_data->barycentric_interp_modes & 0x38) != 0 : 0;
202
203 clip.TriangleStripListProvokingVertexSelect = 0;
204 clip.LineStripListProvokingVertexSelect = 0;
205 clip.TriangleFanProvokingVertexSelect = 1;
206
207 clip.MinimumPointWidth = 0.125;
208 clip.MaximumPointWidth = 255.875;
209 clip.MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1;
210 }
211
212 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), wm) {
213 wm.StatisticsEnable = true;
214 wm.LineEndCapAntialiasingRegionWidth = _05pixels;
215 wm.LineAntialiasingRegionWidth = _10pixels;
216 wm.ForceThreadDispatchEnable = NORMAL;
217 wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
218
219 if (wm_prog_data && wm_prog_data->early_fragment_tests) {
220 wm.EarlyDepthStencilControl = PREPS;
221 } else if (wm_prog_data && wm_prog_data->has_side_effects) {
222 wm.EarlyDepthStencilControl = PSEXEC;
223 } else {
224 wm.EarlyDepthStencilControl = NORMAL;
225 }
226
227 wm.BarycentricInterpolationMode = pipeline->ps_ksp0 == NO_KERNEL ?
228 0 : wm_prog_data->barycentric_interp_modes;
229 }
230
231 if (pipeline->gs_kernel == NO_KERNEL) {
232 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs);
233 } else {
234 const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline);
235 offset = 1;
236 length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset;
237
238 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs) {
239 gs.SingleProgramFlow = false;
240 gs.KernelStartPointer = pipeline->gs_kernel;
241 gs.VectorMaskEnable = false;
242 gs.SamplerCount = 0;
243 gs.BindingTableEntryCount = 0;
244 gs.ExpectedVertexCount = gs_prog_data->vertices_in;
245
246 gs.ScratchSpaceBasePointer = (struct anv_address) {
247 .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
248 MESA_SHADER_GEOMETRY,
249 gs_prog_data->base.base.total_scratch),
250 .offset = 0,
251 };
252 gs.PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base);
253 gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
254 gs.OutputTopology = gs_prog_data->output_topology;
255 gs.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length;
256 gs.IncludeVertexHandles = gs_prog_data->base.include_vue_handles;
257
258 gs.DispatchGRFStartRegisterForURBData =
259 gs_prog_data->base.base.dispatch_grf_start_reg;
260
261 gs.MaximumNumberofThreads = device->info.max_gs_threads / 2 - 1;
262 gs.ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords;
263 gs.DispatchMode = gs_prog_data->base.dispatch_mode;
264 gs.StatisticsEnable = true;
265 gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
266 gs.ReorderMode = TRAILING;
267 gs.Enable = true;
268
269 gs.ControlDataFormat = gs_prog_data->control_data_format;
270
271 gs.StaticOutput = gs_prog_data->static_vertex_count >= 0;
272 gs.StaticOutputVertexCount =
273 gs_prog_data->static_vertex_count >= 0 ?
274 gs_prog_data->static_vertex_count : 0;
275
276 /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled:
277 * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v)
278 * UserClipDistanceCullTestEnableBitmask(v)
279 */
280
281 gs.VertexURBEntryOutputReadOffset = offset;
282 gs.VertexURBEntryOutputLength = length;
283 }
284 }
285
286 const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
287 /* Skip the VUE header and position slots */
288 offset = 1;
289 length = (vs_prog_data->base.vue_map.num_slots + 1) / 2 - offset;
290
291 uint32_t vs_start = pipeline->vs_simd8 != NO_KERNEL ? pipeline->vs_simd8 :
292 pipeline->vs_vec4;
293
294 if (vs_start == NO_KERNEL || (extra && extra->disable_vs)) {
295 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), vs) {
296 vs.FunctionEnable = false;
297 /* Even if VS is disabled, SBE still gets the amount of
298 * vertex data to read from this field. */
299 vs.VertexURBEntryOutputReadOffset = offset;
300 vs.VertexURBEntryOutputLength = length;
301 }
302 } else {
303 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), vs) {
304 vs.KernelStartPointer = vs_start;
305 vs.SingleVertexDispatch = false;
306 vs.VectorMaskEnable = false;
307 vs.SamplerCount = 0;
308
309 vs.BindingTableEntryCount =
310 vs_prog_data->base.base.binding_table.size_bytes / 4,
311
312 vs.ThreadDispatchPriority = false;
313 vs.FloatingPointMode = IEEE754;
314 vs.IllegalOpcodeExceptionEnable = false;
315 vs.AccessesUAV = false;
316 vs.SoftwareExceptionEnable = false;
317
318 vs.ScratchSpaceBasePointer = (struct anv_address) {
319 .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
320 MESA_SHADER_VERTEX,
321 vs_prog_data->base.base.total_scratch),
322 .offset = 0,
323 };
324 vs.PerThreadScratchSpace = scratch_space(&vs_prog_data->base.base);
325
326 vs.DispatchGRFStartRegisterForURBData =
327 vs_prog_data->base.base.dispatch_grf_start_reg;
328
329 vs.VertexURBEntryReadLength = vs_prog_data->base.urb_read_length;
330 vs.VertexURBEntryReadOffset = 0;
331
332 vs.MaximumNumberofThreads = device->info.max_vs_threads - 1;
333 vs.StatisticsEnable = false;
334 vs.SIMD8DispatchEnable = pipeline->vs_simd8 != NO_KERNEL;
335 vs.VertexCacheDisable = false;
336 vs.FunctionEnable = true;
337
338 vs.VertexURBEntryOutputReadOffset = offset;
339 vs.VertexURBEntryOutputLength = length;
340
341 /* TODO */
342 vs.UserClipDistanceClipTestEnableBitmask = 0;
343 vs.UserClipDistanceCullTestEnableBitmask = 0;
344 }
345 }
346
347 const int num_thread_bias = GEN_GEN == 8 ? 2 : 1;
348 if (pipeline->ps_ksp0 == NO_KERNEL) {
349 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps);
350 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), extra) {
351 extra.PixelShaderValid = false;
352 }
353 } else {
354 emit_3dstate_sbe(pipeline);
355
356 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
357 ps.KernelStartPointer0 = pipeline->ps_ksp0;
358 ps.KernelStartPointer1 = 0;
359 ps.KernelStartPointer2 = pipeline->ps_ksp0 + wm_prog_data->prog_offset_2;
360 ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;
361 ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;
362 ps._32PixelDispatchEnable = false;
363 ps.SingleProgramFlow = false;
364 ps.VectorMaskEnable = true;
365 ps.SamplerCount = 1;
366 ps.PushConstantEnable = wm_prog_data->base.nr_params > 0;
367 ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
368 POSOFFSET_SAMPLE: POSOFFSET_NONE;
369
370 ps.MaximumNumberofThreadsPerPSD = 64 - num_thread_bias;
371
372 ps.ScratchSpaceBasePointer = (struct anv_address) {
373 .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
374 MESA_SHADER_FRAGMENT,
375 wm_prog_data->base.total_scratch),
376 .offset = 0,
377 };
378 ps.PerThreadScratchSpace = scratch_space(&wm_prog_data->base);
379
380 ps.DispatchGRFStartRegisterForConstantSetupData0 =
381 wm_prog_data->base.dispatch_grf_start_reg;
382 ps.DispatchGRFStartRegisterForConstantSetupData1 = 0;
383 ps.DispatchGRFStartRegisterForConstantSetupData2 =
384 wm_prog_data->dispatch_grf_start_reg_2;
385 }
386
387 bool per_sample_ps = pCreateInfo->pMultisampleState &&
388 pCreateInfo->pMultisampleState->sampleShadingEnable;
389
390 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), ps) {
391 ps.PixelShaderValid = true;
392 ps.PixelShaderKillsPixel = wm_prog_data->uses_kill;
393 ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
394 ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
395 ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
396 ps.PixelShaderIsPerSample = per_sample_ps;
397 ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
398 ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
399 #if GEN_GEN >= 9
400 ps.PixelShaderPullsBary = wm_prog_data->pulls_bary;
401 ps.InputCoverageMaskState = wm_prog_data->uses_sample_mask ?
402 ICMS_INNER_CONSERVATIVE : ICMS_NONE;
403 #else
404 ps.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
405 #endif
406 }
407 }
408
409 *pPipeline = anv_pipeline_to_handle(pipeline);
410
411 return VK_SUCCESS;
412 }