turnip: parse VkPipelineRasterizationStateCreateInfo
[mesa.git] / src / freedreno / vulkan / tu_pipeline.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 * DEALINGS IN THE SOFTWARE.
26 */
27
28 #include "tu_private.h"
29
30 #include "main/menums.h"
31 #include "nir/nir.h"
32 #include "nir/nir_builder.h"
33 #include "spirv/nir_spirv.h"
34 #include "util/debug.h"
35 #include "util/mesa-sha1.h"
36 #include "util/u_atomic.h"
37 #include "vk_format.h"
38 #include "vk_util.h"
39
40 #include "tu_cs.h"
41
42 struct tu_pipeline_builder
43 {
44 struct tu_device *device;
45 struct tu_pipeline_cache *cache;
46 const VkAllocationCallbacks *alloc;
47 const VkGraphicsPipelineCreateInfo *create_info;
48
49 bool rasterizer_discard;
50 /* these states are affectd by rasterizer_discard */
51 VkSampleCountFlagBits samples;
52 };
53
54 static enum tu_dynamic_state_bits
55 tu_dynamic_state_bit(VkDynamicState state)
56 {
57 switch (state) {
58 case VK_DYNAMIC_STATE_VIEWPORT:
59 return TU_DYNAMIC_VIEWPORT;
60 case VK_DYNAMIC_STATE_SCISSOR:
61 return TU_DYNAMIC_SCISSOR;
62 case VK_DYNAMIC_STATE_LINE_WIDTH:
63 return TU_DYNAMIC_LINE_WIDTH;
64 case VK_DYNAMIC_STATE_DEPTH_BIAS:
65 return TU_DYNAMIC_DEPTH_BIAS;
66 case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
67 return TU_DYNAMIC_BLEND_CONSTANTS;
68 case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
69 return TU_DYNAMIC_DEPTH_BOUNDS;
70 case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
71 return TU_DYNAMIC_STENCIL_COMPARE_MASK;
72 case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
73 return TU_DYNAMIC_STENCIL_WRITE_MASK;
74 case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
75 return TU_DYNAMIC_STENCIL_REFERENCE;
76 default:
77 unreachable("invalid dynamic state");
78 return 0;
79 }
80 }
81
82 static enum pc_di_primtype
83 tu6_primtype(VkPrimitiveTopology topology)
84 {
85 switch (topology) {
86 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
87 return DI_PT_POINTLIST;
88 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
89 return DI_PT_LINELIST;
90 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
91 return DI_PT_LINESTRIP;
92 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
93 return DI_PT_TRILIST;
94 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
95 return DI_PT_TRILIST;
96 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
97 return DI_PT_TRIFAN;
98 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
99 return DI_PT_LINE_ADJ;
100 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
101 return DI_PT_LINESTRIP_ADJ;
102 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
103 return DI_PT_TRI_ADJ;
104 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
105 return DI_PT_TRISTRIP_ADJ;
106 case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
107 default:
108 unreachable("invalid primitive topology");
109 return DI_PT_NONE;
110 }
111 }
112
113 static uint32_t
114 tu6_guardband_adj(uint32_t v)
115 {
116 if (v > 256)
117 return (uint32_t)(511.0 - 65.0 * (log2(v) - 8.0));
118 else
119 return 511;
120 }
121
122 void
123 tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport)
124 {
125 float offsets[3];
126 float scales[3];
127 scales[0] = viewport->width / 2.0f;
128 scales[1] = viewport->height / 2.0f;
129 scales[2] = viewport->maxDepth - viewport->minDepth;
130 offsets[0] = viewport->x + scales[0];
131 offsets[1] = viewport->y + scales[1];
132 offsets[2] = viewport->minDepth;
133
134 VkOffset2D min;
135 VkOffset2D max;
136 min.x = (int32_t) viewport->x;
137 max.x = (int32_t) ceilf(viewport->x + viewport->width);
138 if (viewport->height >= 0.0f) {
139 min.y = (int32_t) viewport->y;
140 max.y = (int32_t) ceilf(viewport->y + viewport->height);
141 } else {
142 min.y = (int32_t)(viewport->y + viewport->height);
143 max.y = (int32_t) ceilf(viewport->y);
144 }
145 /* the spec allows viewport->height to be 0.0f */
146 if (min.y == max.y)
147 max.y++;
148 assert(min.x >= 0 && min.x < max.x);
149 assert(min.y >= 0 && min.y < max.y);
150
151 VkExtent2D guardband_adj;
152 guardband_adj.width = tu6_guardband_adj(max.x - min.x);
153 guardband_adj.height = tu6_guardband_adj(max.y - min.y);
154
155 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_VPORT_XOFFSET_0, 6);
156 tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_XOFFSET_0(offsets[0]));
157 tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_XSCALE_0(scales[0]));
158 tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_YOFFSET_0(offsets[1]));
159 tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_YSCALE_0(scales[1]));
160 tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_ZOFFSET_0(offsets[2]));
161 tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_ZSCALE_0(scales[2]));
162
163 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0, 2);
164 tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(min.x) |
165 A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(min.y));
166 tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(max.x - 1) |
167 A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(max.y - 1));
168
169 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ, 1);
170 tu_cs_emit(cs,
171 A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(guardband_adj.width) |
172 A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(guardband_adj.height));
173 }
174
175 void
176 tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissor)
177 {
178 const VkOffset2D min = scissor->offset;
179 const VkOffset2D max = {
180 scissor->offset.x + scissor->extent.width,
181 scissor->offset.y + scissor->extent.height,
182 };
183
184 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0, 2);
185 tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(min.x) |
186 A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(min.y));
187 tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(max.x - 1) |
188 A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(max.y - 1));
189 }
190
191 static void
192 tu6_emit_gras_unknowns(struct tu_cs *cs)
193 {
194 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_8000, 1);
195 tu_cs_emit(cs, 0x80);
196 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_8001, 1);
197 tu_cs_emit(cs, 0x0);
198 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_8004, 1);
199 tu_cs_emit(cs, 0x0);
200 }
201
202 static void
203 tu6_emit_point_size(struct tu_cs *cs)
204 {
205 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_POINT_MINMAX, 2);
206 tu_cs_emit(cs, A6XX_GRAS_SU_POINT_MINMAX_MIN(1.0f / 16.0f) |
207 A6XX_GRAS_SU_POINT_MINMAX_MAX(4092.0f));
208 tu_cs_emit(cs, A6XX_GRAS_SU_POINT_SIZE(1.0f));
209 }
210
211 static uint32_t
212 tu6_gras_su_cntl(const VkPipelineRasterizationStateCreateInfo *rast_info,
213 VkSampleCountFlagBits samples)
214 {
215 uint32_t gras_su_cntl = 0;
216
217 if (rast_info->cullMode & VK_CULL_MODE_FRONT_BIT)
218 gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_FRONT;
219 if (rast_info->cullMode & VK_CULL_MODE_BACK_BIT)
220 gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_BACK;
221
222 if (rast_info->frontFace == VK_FRONT_FACE_CLOCKWISE)
223 gras_su_cntl |= A6XX_GRAS_SU_CNTL_FRONT_CW;
224
225 /* don't set A6XX_GRAS_SU_CNTL_LINEHALFWIDTH */
226
227 if (rast_info->depthBiasEnable)
228 gras_su_cntl |= A6XX_GRAS_SU_CNTL_POLY_OFFSET;
229
230 if (samples > VK_SAMPLE_COUNT_1_BIT)
231 gras_su_cntl |= A6XX_GRAS_SU_CNTL_MSAA_ENABLE;
232
233 return gras_su_cntl;
234 }
235
236 void
237 tu6_emit_gras_su_cntl(struct tu_cs *cs,
238 uint32_t gras_su_cntl,
239 float line_width)
240 {
241 assert((gras_su_cntl & A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK) == 0);
242 gras_su_cntl |= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(line_width / 2.0f);
243
244 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_CNTL, 1);
245 tu_cs_emit(cs, gras_su_cntl);
246 }
247
248 void
249 tu6_emit_depth_bias(struct tu_cs *cs,
250 float constant_factor,
251 float clamp,
252 float slope_factor)
253 {
254 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_POLY_OFFSET_SCALE, 3);
255 tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_SCALE(slope_factor));
256 tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET(constant_factor));
257 tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(clamp));
258 }
259
260 static VkResult
261 tu_pipeline_builder_create_pipeline(struct tu_pipeline_builder *builder,
262 struct tu_pipeline **out_pipeline)
263 {
264 struct tu_device *dev = builder->device;
265
266 struct tu_pipeline *pipeline =
267 vk_zalloc2(&dev->alloc, builder->alloc, sizeof(*pipeline), 8,
268 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
269 if (!pipeline)
270 return VK_ERROR_OUT_OF_HOST_MEMORY;
271
272 tu_cs_init(&pipeline->cs, TU_CS_MODE_SUB_STREAM, 2048);
273
274 /* reserve the space now such that tu_cs_begin_sub_stream never fails */
275 VkResult result = tu_cs_reserve_space(dev, &pipeline->cs, 2048);
276 if (result != VK_SUCCESS) {
277 vk_free2(&dev->alloc, builder->alloc, pipeline);
278 return result;
279 }
280
281 *out_pipeline = pipeline;
282
283 return VK_SUCCESS;
284 }
285
286 static void
287 tu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder *builder,
288 struct tu_pipeline *pipeline)
289 {
290 const VkPipelineDynamicStateCreateInfo *dynamic_info =
291 builder->create_info->pDynamicState;
292
293 if (!dynamic_info)
294 return;
295
296 for (uint32_t i = 0; i < dynamic_info->dynamicStateCount; i++) {
297 pipeline->dynamic_state.mask |=
298 tu_dynamic_state_bit(dynamic_info->pDynamicStates[i]);
299 }
300 }
301
302 static void
303 tu_pipeline_builder_parse_input_assembly(struct tu_pipeline_builder *builder,
304 struct tu_pipeline *pipeline)
305 {
306 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
307 builder->create_info->pInputAssemblyState;
308
309 pipeline->ia.primtype = tu6_primtype(ia_info->topology);
310 pipeline->ia.primitive_restart = ia_info->primitiveRestartEnable;
311 }
312
313 static void
314 tu_pipeline_builder_parse_viewport(struct tu_pipeline_builder *builder,
315 struct tu_pipeline *pipeline)
316 {
317 /* The spec says:
318 *
319 * pViewportState is a pointer to an instance of the
320 * VkPipelineViewportStateCreateInfo structure, and is ignored if the
321 * pipeline has rasterization disabled."
322 *
323 * We leave the relevant registers stale in that case.
324 */
325 if (builder->rasterizer_discard)
326 return;
327
328 const VkPipelineViewportStateCreateInfo *vp_info =
329 builder->create_info->pViewportState;
330
331 struct tu_cs vp_cs;
332 tu_cs_begin_sub_stream(builder->device, &pipeline->cs, 15, &vp_cs);
333
334 if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_VIEWPORT)) {
335 assert(vp_info->viewportCount == 1);
336 tu6_emit_viewport(&vp_cs, vp_info->pViewports);
337 }
338
339 if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_SCISSOR)) {
340 assert(vp_info->scissorCount == 1);
341 tu6_emit_scissor(&vp_cs, vp_info->pScissors);
342 }
343
344 pipeline->vp.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &vp_cs);
345 }
346
347 static void
348 tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder,
349 struct tu_pipeline *pipeline)
350 {
351 const VkPipelineRasterizationStateCreateInfo *rast_info =
352 builder->create_info->pRasterizationState;
353
354 assert(!rast_info->depthClampEnable);
355 assert(rast_info->polygonMode == VK_POLYGON_MODE_FILL);
356
357 struct tu_cs rast_cs;
358 tu_cs_begin_sub_stream(builder->device, &pipeline->cs, 20, &rast_cs);
359
360 /* move to hw ctx init? */
361 tu6_emit_gras_unknowns(&rast_cs);
362 tu6_emit_point_size(&rast_cs);
363
364 const uint32_t gras_su_cntl =
365 tu6_gras_su_cntl(rast_info, builder->samples);
366
367 if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_LINE_WIDTH))
368 tu6_emit_gras_su_cntl(&rast_cs, gras_su_cntl, rast_info->lineWidth);
369
370 if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_DEPTH_BIAS)) {
371 tu6_emit_depth_bias(&rast_cs, rast_info->depthBiasConstantFactor,
372 rast_info->depthBiasClamp,
373 rast_info->depthBiasSlopeFactor);
374 }
375
376 pipeline->rast.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &rast_cs);
377
378 pipeline->rast.gras_su_cntl = gras_su_cntl;
379 }
380
381 static void
382 tu_pipeline_finish(struct tu_pipeline *pipeline,
383 struct tu_device *dev,
384 const VkAllocationCallbacks *alloc)
385 {
386 tu_cs_finish(dev, &pipeline->cs);
387 }
388
389 static VkResult
390 tu_pipeline_builder_build(struct tu_pipeline_builder *builder,
391 struct tu_pipeline **pipeline)
392 {
393 VkResult result = tu_pipeline_builder_create_pipeline(builder, pipeline);
394 if (result != VK_SUCCESS)
395 return result;
396
397 tu_pipeline_builder_parse_dynamic(builder, *pipeline);
398 tu_pipeline_builder_parse_input_assembly(builder, *pipeline);
399 tu_pipeline_builder_parse_viewport(builder, *pipeline);
400 tu_pipeline_builder_parse_rasterization(builder, *pipeline);
401
402 /* we should have reserved enough space upfront such that the CS never
403 * grows
404 */
405 assert((*pipeline)->cs.bo_count == 1);
406
407 return VK_SUCCESS;
408 }
409
410 static void
411 tu_pipeline_builder_init_graphics(
412 struct tu_pipeline_builder *builder,
413 struct tu_device *dev,
414 struct tu_pipeline_cache *cache,
415 const VkGraphicsPipelineCreateInfo *create_info,
416 const VkAllocationCallbacks *alloc)
417 {
418 *builder = (struct tu_pipeline_builder) {
419 .device = dev,
420 .cache = cache,
421 .create_info = create_info,
422 .alloc = alloc,
423 };
424
425 builder->rasterizer_discard =
426 create_info->pRasterizationState->rasterizerDiscardEnable;
427
428 if (builder->rasterizer_discard)
429 builder->samples = VK_SAMPLE_COUNT_1_BIT;
430 else
431 builder->samples = create_info->pMultisampleState->rasterizationSamples;
432 }
433
434 VkResult
435 tu_CreateGraphicsPipelines(VkDevice device,
436 VkPipelineCache pipelineCache,
437 uint32_t count,
438 const VkGraphicsPipelineCreateInfo *pCreateInfos,
439 const VkAllocationCallbacks *pAllocator,
440 VkPipeline *pPipelines)
441 {
442 TU_FROM_HANDLE(tu_device, dev, device);
443 TU_FROM_HANDLE(tu_pipeline_cache, cache, pipelineCache);
444
445 for (uint32_t i = 0; i < count; i++) {
446 struct tu_pipeline_builder builder;
447 tu_pipeline_builder_init_graphics(&builder, dev, cache,
448 &pCreateInfos[i], pAllocator);
449
450 struct tu_pipeline *pipeline;
451 VkResult result = tu_pipeline_builder_build(&builder, &pipeline);
452
453 if (result != VK_SUCCESS) {
454 for (uint32_t j = 0; j < i; j++) {
455 tu_DestroyPipeline(device, pPipelines[j], pAllocator);
456 pPipelines[j] = VK_NULL_HANDLE;
457 }
458
459 return result;
460 }
461
462 pPipelines[i] = tu_pipeline_to_handle(pipeline);
463 }
464
465 return VK_SUCCESS;
466 }
467
468 static VkResult
469 tu_compute_pipeline_create(VkDevice _device,
470 VkPipelineCache _cache,
471 const VkComputePipelineCreateInfo *pCreateInfo,
472 const VkAllocationCallbacks *pAllocator,
473 VkPipeline *pPipeline)
474 {
475 return VK_SUCCESS;
476 }
477
478 VkResult
479 tu_CreateComputePipelines(VkDevice _device,
480 VkPipelineCache pipelineCache,
481 uint32_t count,
482 const VkComputePipelineCreateInfo *pCreateInfos,
483 const VkAllocationCallbacks *pAllocator,
484 VkPipeline *pPipelines)
485 {
486 VkResult result = VK_SUCCESS;
487
488 unsigned i = 0;
489 for (; i < count; i++) {
490 VkResult r;
491 r = tu_compute_pipeline_create(_device, pipelineCache, &pCreateInfos[i],
492 pAllocator, &pPipelines[i]);
493 if (r != VK_SUCCESS) {
494 result = r;
495 pPipelines[i] = VK_NULL_HANDLE;
496 }
497 }
498
499 return result;
500 }
501
502 void
503 tu_DestroyPipeline(VkDevice _device,
504 VkPipeline _pipeline,
505 const VkAllocationCallbacks *pAllocator)
506 {
507 TU_FROM_HANDLE(tu_device, dev, _device);
508 TU_FROM_HANDLE(tu_pipeline, pipeline, _pipeline);
509
510 if (!_pipeline)
511 return;
512
513 tu_pipeline_finish(pipeline, dev, pAllocator);
514 vk_free2(&dev->alloc, pAllocator, pipeline);
515 }