turnip: parse VkPipeline{Multisample,ColorBlend}StateCreateInfo
[mesa.git] / src / freedreno / vulkan / tu_pipeline.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 * DEALINGS IN THE SOFTWARE.
26 */
27
28 #include "tu_private.h"
29
30 #include "main/menums.h"
31 #include "nir/nir.h"
32 #include "nir/nir_builder.h"
33 #include "spirv/nir_spirv.h"
34 #include "util/debug.h"
35 #include "util/mesa-sha1.h"
36 #include "util/u_atomic.h"
37 #include "vk_format.h"
38 #include "vk_util.h"
39
40 #include "tu_cs.h"
41
42 struct tu_pipeline_builder
43 {
44 struct tu_device *device;
45 struct tu_pipeline_cache *cache;
46 const VkAllocationCallbacks *alloc;
47 const VkGraphicsPipelineCreateInfo *create_info;
48
49 bool rasterizer_discard;
50 /* these states are affectd by rasterizer_discard */
51 VkSampleCountFlagBits samples;
52 bool use_depth_stencil_attachment;
53 bool use_color_attachments;
54 VkFormat color_attachment_formats[MAX_RTS];
55 };
56
57 static enum tu_dynamic_state_bits
58 tu_dynamic_state_bit(VkDynamicState state)
59 {
60 switch (state) {
61 case VK_DYNAMIC_STATE_VIEWPORT:
62 return TU_DYNAMIC_VIEWPORT;
63 case VK_DYNAMIC_STATE_SCISSOR:
64 return TU_DYNAMIC_SCISSOR;
65 case VK_DYNAMIC_STATE_LINE_WIDTH:
66 return TU_DYNAMIC_LINE_WIDTH;
67 case VK_DYNAMIC_STATE_DEPTH_BIAS:
68 return TU_DYNAMIC_DEPTH_BIAS;
69 case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
70 return TU_DYNAMIC_BLEND_CONSTANTS;
71 case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
72 return TU_DYNAMIC_DEPTH_BOUNDS;
73 case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
74 return TU_DYNAMIC_STENCIL_COMPARE_MASK;
75 case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
76 return TU_DYNAMIC_STENCIL_WRITE_MASK;
77 case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
78 return TU_DYNAMIC_STENCIL_REFERENCE;
79 default:
80 unreachable("invalid dynamic state");
81 return 0;
82 }
83 }
84
85 static bool
86 tu_logic_op_reads_dst(VkLogicOp op)
87 {
88 switch (op) {
89 case VK_LOGIC_OP_CLEAR:
90 case VK_LOGIC_OP_COPY:
91 case VK_LOGIC_OP_COPY_INVERTED:
92 case VK_LOGIC_OP_SET:
93 return false;
94 default:
95 return true;
96 }
97 }
98
99 static VkBlendFactor
100 tu_blend_factor_no_dst_alpha(VkBlendFactor factor)
101 {
102 /* treat dst alpha as 1.0 and avoid reading it */
103 switch (factor) {
104 case VK_BLEND_FACTOR_DST_ALPHA:
105 return VK_BLEND_FACTOR_ONE;
106 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
107 return VK_BLEND_FACTOR_ZERO;
108 default:
109 return factor;
110 }
111 }
112
113 static enum pc_di_primtype
114 tu6_primtype(VkPrimitiveTopology topology)
115 {
116 switch (topology) {
117 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
118 return DI_PT_POINTLIST;
119 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
120 return DI_PT_LINELIST;
121 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
122 return DI_PT_LINESTRIP;
123 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
124 return DI_PT_TRILIST;
125 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
126 return DI_PT_TRILIST;
127 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
128 return DI_PT_TRIFAN;
129 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
130 return DI_PT_LINE_ADJ;
131 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
132 return DI_PT_LINESTRIP_ADJ;
133 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
134 return DI_PT_TRI_ADJ;
135 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
136 return DI_PT_TRISTRIP_ADJ;
137 case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
138 default:
139 unreachable("invalid primitive topology");
140 return DI_PT_NONE;
141 }
142 }
143
144 static enum adreno_compare_func
145 tu6_compare_func(VkCompareOp op)
146 {
147 switch (op) {
148 case VK_COMPARE_OP_NEVER:
149 return FUNC_NEVER;
150 case VK_COMPARE_OP_LESS:
151 return FUNC_LESS;
152 case VK_COMPARE_OP_EQUAL:
153 return FUNC_EQUAL;
154 case VK_COMPARE_OP_LESS_OR_EQUAL:
155 return FUNC_LEQUAL;
156 case VK_COMPARE_OP_GREATER:
157 return FUNC_GREATER;
158 case VK_COMPARE_OP_NOT_EQUAL:
159 return FUNC_NOTEQUAL;
160 case VK_COMPARE_OP_GREATER_OR_EQUAL:
161 return FUNC_GEQUAL;
162 case VK_COMPARE_OP_ALWAYS:
163 return FUNC_ALWAYS;
164 default:
165 unreachable("invalid VkCompareOp");
166 return FUNC_NEVER;
167 }
168 }
169
170 static enum adreno_stencil_op
171 tu6_stencil_op(VkStencilOp op)
172 {
173 switch (op) {
174 case VK_STENCIL_OP_KEEP:
175 return STENCIL_KEEP;
176 case VK_STENCIL_OP_ZERO:
177 return STENCIL_ZERO;
178 case VK_STENCIL_OP_REPLACE:
179 return STENCIL_REPLACE;
180 case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
181 return STENCIL_INCR_CLAMP;
182 case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
183 return STENCIL_DECR_CLAMP;
184 case VK_STENCIL_OP_INVERT:
185 return STENCIL_INVERT;
186 case VK_STENCIL_OP_INCREMENT_AND_WRAP:
187 return STENCIL_INCR_WRAP;
188 case VK_STENCIL_OP_DECREMENT_AND_WRAP:
189 return STENCIL_DECR_WRAP;
190 default:
191 unreachable("invalid VkStencilOp");
192 return STENCIL_KEEP;
193 }
194 }
195
196 static enum a3xx_rop_code
197 tu6_rop(VkLogicOp op)
198 {
199 switch (op) {
200 case VK_LOGIC_OP_CLEAR:
201 return ROP_CLEAR;
202 case VK_LOGIC_OP_AND:
203 return ROP_AND;
204 case VK_LOGIC_OP_AND_REVERSE:
205 return ROP_AND_REVERSE;
206 case VK_LOGIC_OP_COPY:
207 return ROP_COPY;
208 case VK_LOGIC_OP_AND_INVERTED:
209 return ROP_AND_INVERTED;
210 case VK_LOGIC_OP_NO_OP:
211 return ROP_NOOP;
212 case VK_LOGIC_OP_XOR:
213 return ROP_XOR;
214 case VK_LOGIC_OP_OR:
215 return ROP_OR;
216 case VK_LOGIC_OP_NOR:
217 return ROP_NOR;
218 case VK_LOGIC_OP_EQUIVALENT:
219 return ROP_EQUIV;
220 case VK_LOGIC_OP_INVERT:
221 return ROP_INVERT;
222 case VK_LOGIC_OP_OR_REVERSE:
223 return ROP_OR_REVERSE;
224 case VK_LOGIC_OP_COPY_INVERTED:
225 return ROP_COPY_INVERTED;
226 case VK_LOGIC_OP_OR_INVERTED:
227 return ROP_OR_INVERTED;
228 case VK_LOGIC_OP_NAND:
229 return ROP_NAND;
230 case VK_LOGIC_OP_SET:
231 return ROP_SET;
232 default:
233 unreachable("invalid VkLogicOp");
234 return ROP_NOOP;
235 }
236 }
237
238 static enum adreno_rb_blend_factor
239 tu6_blend_factor(VkBlendFactor factor)
240 {
241 switch (factor) {
242 case VK_BLEND_FACTOR_ZERO:
243 return FACTOR_ZERO;
244 case VK_BLEND_FACTOR_ONE:
245 return FACTOR_ONE;
246 case VK_BLEND_FACTOR_SRC_COLOR:
247 return FACTOR_SRC_COLOR;
248 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
249 return FACTOR_ONE_MINUS_SRC_COLOR;
250 case VK_BLEND_FACTOR_DST_COLOR:
251 return FACTOR_DST_COLOR;
252 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
253 return FACTOR_ONE_MINUS_DST_COLOR;
254 case VK_BLEND_FACTOR_SRC_ALPHA:
255 return FACTOR_SRC_ALPHA;
256 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
257 return FACTOR_ONE_MINUS_SRC_ALPHA;
258 case VK_BLEND_FACTOR_DST_ALPHA:
259 return FACTOR_DST_ALPHA;
260 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
261 return FACTOR_ONE_MINUS_DST_ALPHA;
262 case VK_BLEND_FACTOR_CONSTANT_COLOR:
263 return FACTOR_CONSTANT_COLOR;
264 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
265 return FACTOR_ONE_MINUS_CONSTANT_COLOR;
266 case VK_BLEND_FACTOR_CONSTANT_ALPHA:
267 return FACTOR_CONSTANT_ALPHA;
268 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
269 return FACTOR_ONE_MINUS_CONSTANT_ALPHA;
270 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
271 return FACTOR_SRC_ALPHA_SATURATE;
272 case VK_BLEND_FACTOR_SRC1_COLOR:
273 return FACTOR_SRC1_COLOR;
274 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
275 return FACTOR_ONE_MINUS_SRC1_COLOR;
276 case VK_BLEND_FACTOR_SRC1_ALPHA:
277 return FACTOR_SRC1_ALPHA;
278 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
279 return FACTOR_ONE_MINUS_SRC1_ALPHA;
280 default:
281 unreachable("invalid VkBlendFactor");
282 return FACTOR_ZERO;
283 }
284 }
285
286 static enum a3xx_rb_blend_opcode
287 tu6_blend_op(VkBlendOp op)
288 {
289 switch (op) {
290 case VK_BLEND_OP_ADD:
291 return BLEND_DST_PLUS_SRC;
292 case VK_BLEND_OP_SUBTRACT:
293 return BLEND_SRC_MINUS_DST;
294 case VK_BLEND_OP_REVERSE_SUBTRACT:
295 return BLEND_DST_MINUS_SRC;
296 case VK_BLEND_OP_MIN:
297 return BLEND_MIN_DST_SRC;
298 case VK_BLEND_OP_MAX:
299 return BLEND_MAX_DST_SRC;
300 default:
301 unreachable("invalid VkBlendOp");
302 return BLEND_DST_PLUS_SRC;
303 }
304 }
305
306 static uint32_t
307 tu6_guardband_adj(uint32_t v)
308 {
309 if (v > 256)
310 return (uint32_t)(511.0 - 65.0 * (log2(v) - 8.0));
311 else
312 return 511;
313 }
314
315 void
316 tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport)
317 {
318 float offsets[3];
319 float scales[3];
320 scales[0] = viewport->width / 2.0f;
321 scales[1] = viewport->height / 2.0f;
322 scales[2] = viewport->maxDepth - viewport->minDepth;
323 offsets[0] = viewport->x + scales[0];
324 offsets[1] = viewport->y + scales[1];
325 offsets[2] = viewport->minDepth;
326
327 VkOffset2D min;
328 VkOffset2D max;
329 min.x = (int32_t) viewport->x;
330 max.x = (int32_t) ceilf(viewport->x + viewport->width);
331 if (viewport->height >= 0.0f) {
332 min.y = (int32_t) viewport->y;
333 max.y = (int32_t) ceilf(viewport->y + viewport->height);
334 } else {
335 min.y = (int32_t)(viewport->y + viewport->height);
336 max.y = (int32_t) ceilf(viewport->y);
337 }
338 /* the spec allows viewport->height to be 0.0f */
339 if (min.y == max.y)
340 max.y++;
341 assert(min.x >= 0 && min.x < max.x);
342 assert(min.y >= 0 && min.y < max.y);
343
344 VkExtent2D guardband_adj;
345 guardband_adj.width = tu6_guardband_adj(max.x - min.x);
346 guardband_adj.height = tu6_guardband_adj(max.y - min.y);
347
348 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_VPORT_XOFFSET_0, 6);
349 tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_XOFFSET_0(offsets[0]));
350 tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_XSCALE_0(scales[0]));
351 tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_YOFFSET_0(offsets[1]));
352 tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_YSCALE_0(scales[1]));
353 tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_ZOFFSET_0(offsets[2]));
354 tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_ZSCALE_0(scales[2]));
355
356 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0, 2);
357 tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(min.x) |
358 A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(min.y));
359 tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(max.x - 1) |
360 A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(max.y - 1));
361
362 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ, 1);
363 tu_cs_emit(cs,
364 A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(guardband_adj.width) |
365 A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(guardband_adj.height));
366 }
367
368 void
369 tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissor)
370 {
371 const VkOffset2D min = scissor->offset;
372 const VkOffset2D max = {
373 scissor->offset.x + scissor->extent.width,
374 scissor->offset.y + scissor->extent.height,
375 };
376
377 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0, 2);
378 tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(min.x) |
379 A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(min.y));
380 tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(max.x - 1) |
381 A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(max.y - 1));
382 }
383
384 static void
385 tu6_emit_gras_unknowns(struct tu_cs *cs)
386 {
387 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_8000, 1);
388 tu_cs_emit(cs, 0x80);
389 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_8001, 1);
390 tu_cs_emit(cs, 0x0);
391 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_8004, 1);
392 tu_cs_emit(cs, 0x0);
393 }
394
395 static void
396 tu6_emit_point_size(struct tu_cs *cs)
397 {
398 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_POINT_MINMAX, 2);
399 tu_cs_emit(cs, A6XX_GRAS_SU_POINT_MINMAX_MIN(1.0f / 16.0f) |
400 A6XX_GRAS_SU_POINT_MINMAX_MAX(4092.0f));
401 tu_cs_emit(cs, A6XX_GRAS_SU_POINT_SIZE(1.0f));
402 }
403
404 static uint32_t
405 tu6_gras_su_cntl(const VkPipelineRasterizationStateCreateInfo *rast_info,
406 VkSampleCountFlagBits samples)
407 {
408 uint32_t gras_su_cntl = 0;
409
410 if (rast_info->cullMode & VK_CULL_MODE_FRONT_BIT)
411 gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_FRONT;
412 if (rast_info->cullMode & VK_CULL_MODE_BACK_BIT)
413 gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_BACK;
414
415 if (rast_info->frontFace == VK_FRONT_FACE_CLOCKWISE)
416 gras_su_cntl |= A6XX_GRAS_SU_CNTL_FRONT_CW;
417
418 /* don't set A6XX_GRAS_SU_CNTL_LINEHALFWIDTH */
419
420 if (rast_info->depthBiasEnable)
421 gras_su_cntl |= A6XX_GRAS_SU_CNTL_POLY_OFFSET;
422
423 if (samples > VK_SAMPLE_COUNT_1_BIT)
424 gras_su_cntl |= A6XX_GRAS_SU_CNTL_MSAA_ENABLE;
425
426 return gras_su_cntl;
427 }
428
429 void
430 tu6_emit_gras_su_cntl(struct tu_cs *cs,
431 uint32_t gras_su_cntl,
432 float line_width)
433 {
434 assert((gras_su_cntl & A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK) == 0);
435 gras_su_cntl |= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(line_width / 2.0f);
436
437 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_CNTL, 1);
438 tu_cs_emit(cs, gras_su_cntl);
439 }
440
441 void
442 tu6_emit_depth_bias(struct tu_cs *cs,
443 float constant_factor,
444 float clamp,
445 float slope_factor)
446 {
447 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_POLY_OFFSET_SCALE, 3);
448 tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_SCALE(slope_factor));
449 tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET(constant_factor));
450 tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(clamp));
451 }
452
453 static void
454 tu6_emit_alpha_control_disable(struct tu_cs *cs)
455 {
456 tu_cs_emit_pkt4(cs, REG_A6XX_RB_ALPHA_CONTROL, 1);
457 tu_cs_emit(cs, 0);
458 }
459
460 static void
461 tu6_emit_depth_control(struct tu_cs *cs,
462 const VkPipelineDepthStencilStateCreateInfo *ds_info)
463 {
464 assert(!ds_info->depthBoundsTestEnable);
465
466 uint32_t rb_depth_cntl = 0;
467 if (ds_info->depthTestEnable) {
468 rb_depth_cntl |=
469 A6XX_RB_DEPTH_CNTL_Z_ENABLE |
470 A6XX_RB_DEPTH_CNTL_ZFUNC(tu6_compare_func(ds_info->depthCompareOp)) |
471 A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
472
473 if (ds_info->depthWriteEnable)
474 rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
475 }
476
477 tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_CNTL, 1);
478 tu_cs_emit(cs, rb_depth_cntl);
479 }
480
481 static void
482 tu6_emit_stencil_control(struct tu_cs *cs,
483 const VkPipelineDepthStencilStateCreateInfo *ds_info)
484 {
485 uint32_t rb_stencil_control = 0;
486 if (ds_info->stencilTestEnable) {
487 const VkStencilOpState *front = &ds_info->front;
488 const VkStencilOpState *back = &ds_info->back;
489 rb_stencil_control |=
490 A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
491 A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
492 A6XX_RB_STENCIL_CONTROL_STENCIL_READ |
493 A6XX_RB_STENCIL_CONTROL_FUNC(tu6_compare_func(front->compareOp)) |
494 A6XX_RB_STENCIL_CONTROL_FAIL(tu6_stencil_op(front->failOp)) |
495 A6XX_RB_STENCIL_CONTROL_ZPASS(tu6_stencil_op(front->passOp)) |
496 A6XX_RB_STENCIL_CONTROL_ZFAIL(tu6_stencil_op(front->depthFailOp)) |
497 A6XX_RB_STENCIL_CONTROL_FUNC_BF(tu6_compare_func(back->compareOp)) |
498 A6XX_RB_STENCIL_CONTROL_FAIL_BF(tu6_stencil_op(back->failOp)) |
499 A6XX_RB_STENCIL_CONTROL_ZPASS_BF(tu6_stencil_op(back->passOp)) |
500 A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(tu6_stencil_op(back->depthFailOp));
501 }
502
503 tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCIL_CONTROL, 1);
504 tu_cs_emit(cs, rb_stencil_control);
505 }
506
507 void
508 tu6_emit_stencil_compare_mask(struct tu_cs *cs, uint32_t front, uint32_t back)
509 {
510 tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCILMASK, 1);
511 tu_cs_emit(
512 cs, A6XX_RB_STENCILMASK_MASK(front) | A6XX_RB_STENCILMASK_BFMASK(back));
513 }
514
515 void
516 tu6_emit_stencil_write_mask(struct tu_cs *cs, uint32_t front, uint32_t back)
517 {
518 tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCILWRMASK, 1);
519 tu_cs_emit(cs, A6XX_RB_STENCILWRMASK_WRMASK(front) |
520 A6XX_RB_STENCILWRMASK_BFWRMASK(back));
521 }
522
523 void
524 tu6_emit_stencil_reference(struct tu_cs *cs, uint32_t front, uint32_t back)
525 {
526 tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCILREF, 1);
527 tu_cs_emit(cs,
528 A6XX_RB_STENCILREF_REF(front) | A6XX_RB_STENCILREF_BFREF(back));
529 }
530
531 static uint32_t
532 tu6_rb_mrt_blend_control(const VkPipelineColorBlendAttachmentState *att,
533 bool has_alpha)
534 {
535 const enum a3xx_rb_blend_opcode color_op = tu6_blend_op(att->colorBlendOp);
536 const enum adreno_rb_blend_factor src_color_factor = tu6_blend_factor(
537 has_alpha ? att->srcColorBlendFactor
538 : tu_blend_factor_no_dst_alpha(att->srcColorBlendFactor));
539 const enum adreno_rb_blend_factor dst_color_factor = tu6_blend_factor(
540 has_alpha ? att->dstColorBlendFactor
541 : tu_blend_factor_no_dst_alpha(att->dstColorBlendFactor));
542 const enum a3xx_rb_blend_opcode alpha_op = tu6_blend_op(att->alphaBlendOp);
543 const enum adreno_rb_blend_factor src_alpha_factor =
544 tu6_blend_factor(att->srcAlphaBlendFactor);
545 const enum adreno_rb_blend_factor dst_alpha_factor =
546 tu6_blend_factor(att->dstAlphaBlendFactor);
547
548 return A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(src_color_factor) |
549 A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(color_op) |
550 A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(dst_color_factor) |
551 A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(src_alpha_factor) |
552 A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(alpha_op) |
553 A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(dst_alpha_factor);
554 }
555
556 static uint32_t
557 tu6_rb_mrt_control(const VkPipelineColorBlendAttachmentState *att,
558 uint32_t rb_mrt_control_rop,
559 bool is_int,
560 bool has_alpha)
561 {
562 uint32_t rb_mrt_control =
563 A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE(att->colorWriteMask);
564
565 /* ignore blending and logic op for integer attachments */
566 if (is_int) {
567 rb_mrt_control |= A6XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY);
568 return rb_mrt_control;
569 }
570
571 rb_mrt_control |= rb_mrt_control_rop;
572
573 if (att->blendEnable) {
574 rb_mrt_control |= A6XX_RB_MRT_CONTROL_BLEND;
575
576 if (has_alpha)
577 rb_mrt_control |= A6XX_RB_MRT_CONTROL_BLEND2;
578 }
579
580 return rb_mrt_control;
581 }
582
583 static void
584 tu6_emit_rb_mrt_controls(struct tu_cs *cs,
585 const VkPipelineColorBlendStateCreateInfo *blend_info,
586 const VkFormat attachment_formats[MAX_RTS],
587 uint32_t *blend_enable_mask)
588 {
589 *blend_enable_mask = 0;
590
591 bool rop_reads_dst = false;
592 uint32_t rb_mrt_control_rop = 0;
593 if (blend_info->logicOpEnable) {
594 rop_reads_dst = tu_logic_op_reads_dst(blend_info->logicOp);
595 rb_mrt_control_rop =
596 A6XX_RB_MRT_CONTROL_ROP_ENABLE |
597 A6XX_RB_MRT_CONTROL_ROP_CODE(tu6_rop(blend_info->logicOp));
598 }
599
600 for (uint32_t i = 0; i < blend_info->attachmentCount; i++) {
601 const VkPipelineColorBlendAttachmentState *att =
602 &blend_info->pAttachments[i];
603 const VkFormat format = attachment_formats[i];
604
605 uint32_t rb_mrt_control = 0;
606 uint32_t rb_mrt_blend_control = 0;
607 if (format != VK_FORMAT_UNDEFINED) {
608 const bool is_int = vk_format_is_int(format);
609 const bool has_alpha = vk_format_has_alpha(format);
610
611 rb_mrt_control =
612 tu6_rb_mrt_control(att, rb_mrt_control_rop, is_int, has_alpha);
613 rb_mrt_blend_control = tu6_rb_mrt_blend_control(att, has_alpha);
614
615 if (att->blendEnable || rop_reads_dst)
616 *blend_enable_mask |= 1 << i;
617 }
618
619 tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_CONTROL(i), 2);
620 tu_cs_emit(cs, rb_mrt_control);
621 tu_cs_emit(cs, rb_mrt_blend_control);
622 }
623
624 for (uint32_t i = blend_info->attachmentCount; i < MAX_RTS; i++) {
625 tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_CONTROL(i), 2);
626 tu_cs_emit(cs, 0);
627 tu_cs_emit(cs, 0);
628 }
629 }
630
631 static void
632 tu6_emit_blend_control(struct tu_cs *cs,
633 uint32_t blend_enable_mask,
634 const VkPipelineMultisampleStateCreateInfo *msaa_info)
635 {
636 assert(!msaa_info->sampleShadingEnable);
637 assert(!msaa_info->alphaToOneEnable);
638
639 uint32_t sp_blend_cntl = A6XX_SP_BLEND_CNTL_UNK8;
640 if (blend_enable_mask)
641 sp_blend_cntl |= A6XX_SP_BLEND_CNTL_ENABLED;
642 if (msaa_info->alphaToCoverageEnable)
643 sp_blend_cntl |= A6XX_SP_BLEND_CNTL_ALPHA_TO_COVERAGE;
644
645 const uint32_t sample_mask =
646 msaa_info->pSampleMask ? *msaa_info->pSampleMask
647 : ((1 << msaa_info->rasterizationSamples) - 1);
648
649 /* set A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND only when enabled? */
650 uint32_t rb_blend_cntl =
651 A6XX_RB_BLEND_CNTL_ENABLE_BLEND(blend_enable_mask) |
652 A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND |
653 A6XX_RB_BLEND_CNTL_SAMPLE_MASK(sample_mask);
654 if (msaa_info->alphaToCoverageEnable)
655 rb_blend_cntl |= A6XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE;
656
657 tu_cs_emit_pkt4(cs, REG_A6XX_SP_BLEND_CNTL, 1);
658 tu_cs_emit(cs, sp_blend_cntl);
659
660 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLEND_CNTL, 1);
661 tu_cs_emit(cs, rb_blend_cntl);
662 }
663
664 void
665 tu6_emit_blend_constants(struct tu_cs *cs, const float constants[4])
666 {
667 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLEND_RED_F32, 4);
668 tu_cs_emit_array(cs, (const uint32_t *) constants, 4);
669 }
670
671 static VkResult
672 tu_pipeline_builder_create_pipeline(struct tu_pipeline_builder *builder,
673 struct tu_pipeline **out_pipeline)
674 {
675 struct tu_device *dev = builder->device;
676
677 struct tu_pipeline *pipeline =
678 vk_zalloc2(&dev->alloc, builder->alloc, sizeof(*pipeline), 8,
679 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
680 if (!pipeline)
681 return VK_ERROR_OUT_OF_HOST_MEMORY;
682
683 tu_cs_init(&pipeline->cs, TU_CS_MODE_SUB_STREAM, 2048);
684
685 /* reserve the space now such that tu_cs_begin_sub_stream never fails */
686 VkResult result = tu_cs_reserve_space(dev, &pipeline->cs, 2048);
687 if (result != VK_SUCCESS) {
688 vk_free2(&dev->alloc, builder->alloc, pipeline);
689 return result;
690 }
691
692 *out_pipeline = pipeline;
693
694 return VK_SUCCESS;
695 }
696
697 static void
698 tu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder *builder,
699 struct tu_pipeline *pipeline)
700 {
701 const VkPipelineDynamicStateCreateInfo *dynamic_info =
702 builder->create_info->pDynamicState;
703
704 if (!dynamic_info)
705 return;
706
707 for (uint32_t i = 0; i < dynamic_info->dynamicStateCount; i++) {
708 pipeline->dynamic_state.mask |=
709 tu_dynamic_state_bit(dynamic_info->pDynamicStates[i]);
710 }
711 }
712
713 static void
714 tu_pipeline_builder_parse_input_assembly(struct tu_pipeline_builder *builder,
715 struct tu_pipeline *pipeline)
716 {
717 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
718 builder->create_info->pInputAssemblyState;
719
720 pipeline->ia.primtype = tu6_primtype(ia_info->topology);
721 pipeline->ia.primitive_restart = ia_info->primitiveRestartEnable;
722 }
723
724 static void
725 tu_pipeline_builder_parse_viewport(struct tu_pipeline_builder *builder,
726 struct tu_pipeline *pipeline)
727 {
728 /* The spec says:
729 *
730 * pViewportState is a pointer to an instance of the
731 * VkPipelineViewportStateCreateInfo structure, and is ignored if the
732 * pipeline has rasterization disabled."
733 *
734 * We leave the relevant registers stale in that case.
735 */
736 if (builder->rasterizer_discard)
737 return;
738
739 const VkPipelineViewportStateCreateInfo *vp_info =
740 builder->create_info->pViewportState;
741
742 struct tu_cs vp_cs;
743 tu_cs_begin_sub_stream(builder->device, &pipeline->cs, 15, &vp_cs);
744
745 if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_VIEWPORT)) {
746 assert(vp_info->viewportCount == 1);
747 tu6_emit_viewport(&vp_cs, vp_info->pViewports);
748 }
749
750 if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_SCISSOR)) {
751 assert(vp_info->scissorCount == 1);
752 tu6_emit_scissor(&vp_cs, vp_info->pScissors);
753 }
754
755 pipeline->vp.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &vp_cs);
756 }
757
758 static void
759 tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder,
760 struct tu_pipeline *pipeline)
761 {
762 const VkPipelineRasterizationStateCreateInfo *rast_info =
763 builder->create_info->pRasterizationState;
764
765 assert(!rast_info->depthClampEnable);
766 assert(rast_info->polygonMode == VK_POLYGON_MODE_FILL);
767
768 struct tu_cs rast_cs;
769 tu_cs_begin_sub_stream(builder->device, &pipeline->cs, 20, &rast_cs);
770
771 /* move to hw ctx init? */
772 tu6_emit_gras_unknowns(&rast_cs);
773 tu6_emit_point_size(&rast_cs);
774
775 const uint32_t gras_su_cntl =
776 tu6_gras_su_cntl(rast_info, builder->samples);
777
778 if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_LINE_WIDTH))
779 tu6_emit_gras_su_cntl(&rast_cs, gras_su_cntl, rast_info->lineWidth);
780
781 if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_DEPTH_BIAS)) {
782 tu6_emit_depth_bias(&rast_cs, rast_info->depthBiasConstantFactor,
783 rast_info->depthBiasClamp,
784 rast_info->depthBiasSlopeFactor);
785 }
786
787 pipeline->rast.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &rast_cs);
788
789 pipeline->rast.gras_su_cntl = gras_su_cntl;
790 }
791
792 static void
793 tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder,
794 struct tu_pipeline *pipeline)
795 {
796 /* The spec says:
797 *
798 * pDepthStencilState is a pointer to an instance of the
799 * VkPipelineDepthStencilStateCreateInfo structure, and is ignored if
800 * the pipeline has rasterization disabled or if the subpass of the
801 * render pass the pipeline is created against does not use a
802 * depth/stencil attachment.
803 *
804 * We disable both depth and stenil tests in those cases.
805 */
806 static const VkPipelineDepthStencilStateCreateInfo dummy_ds_info;
807 const VkPipelineDepthStencilStateCreateInfo *ds_info =
808 builder->use_depth_stencil_attachment
809 ? builder->create_info->pDepthStencilState
810 : &dummy_ds_info;
811
812 struct tu_cs ds_cs;
813 tu_cs_begin_sub_stream(builder->device, &pipeline->cs, 12, &ds_cs);
814
815 /* move to hw ctx init? */
816 tu6_emit_alpha_control_disable(&ds_cs);
817
818 tu6_emit_depth_control(&ds_cs, ds_info);
819 tu6_emit_stencil_control(&ds_cs, ds_info);
820
821 if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_COMPARE_MASK)) {
822 tu6_emit_stencil_compare_mask(&ds_cs, ds_info->front.compareMask,
823 ds_info->back.compareMask);
824 }
825 if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_WRITE_MASK)) {
826 tu6_emit_stencil_write_mask(&ds_cs, ds_info->front.writeMask,
827 ds_info->back.writeMask);
828 }
829 if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_REFERENCE)) {
830 tu6_emit_stencil_reference(&ds_cs, ds_info->front.reference,
831 ds_info->back.reference);
832 }
833
834 pipeline->ds.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &ds_cs);
835 }
836
837 static void
838 tu_pipeline_builder_parse_multisample_and_color_blend(
839 struct tu_pipeline_builder *builder, struct tu_pipeline *pipeline)
840 {
841 /* The spec says:
842 *
843 * pMultisampleState is a pointer to an instance of the
844 * VkPipelineMultisampleStateCreateInfo, and is ignored if the pipeline
845 * has rasterization disabled.
846 *
847 * Also,
848 *
849 * pColorBlendState is a pointer to an instance of the
850 * VkPipelineColorBlendStateCreateInfo structure, and is ignored if the
851 * pipeline has rasterization disabled or if the subpass of the render
852 * pass the pipeline is created against does not use any color
853 * attachments.
854 *
855 * We leave the relevant registers stale when rasterization is disabled.
856 */
857 if (builder->rasterizer_discard)
858 return;
859
860 static const VkPipelineColorBlendStateCreateInfo dummy_blend_info;
861 const VkPipelineMultisampleStateCreateInfo *msaa_info =
862 builder->create_info->pMultisampleState;
863 const VkPipelineColorBlendStateCreateInfo *blend_info =
864 builder->use_color_attachments ? builder->create_info->pColorBlendState
865 : &dummy_blend_info;
866
867 struct tu_cs blend_cs;
868 tu_cs_begin_sub_stream(builder->device, &pipeline->cs, MAX_RTS * 3 + 9,
869 &blend_cs);
870
871 uint32_t blend_enable_mask;
872 tu6_emit_rb_mrt_controls(&blend_cs, blend_info,
873 builder->color_attachment_formats,
874 &blend_enable_mask);
875
876 if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_BLEND_CONSTANTS))
877 tu6_emit_blend_constants(&blend_cs, blend_info->blendConstants);
878
879 tu6_emit_blend_control(&blend_cs, blend_enable_mask, msaa_info);
880
881 pipeline->blend.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &blend_cs);
882 }
883
884 static void
885 tu_pipeline_finish(struct tu_pipeline *pipeline,
886 struct tu_device *dev,
887 const VkAllocationCallbacks *alloc)
888 {
889 tu_cs_finish(dev, &pipeline->cs);
890 }
891
892 static VkResult
893 tu_pipeline_builder_build(struct tu_pipeline_builder *builder,
894 struct tu_pipeline **pipeline)
895 {
896 VkResult result = tu_pipeline_builder_create_pipeline(builder, pipeline);
897 if (result != VK_SUCCESS)
898 return result;
899
900 tu_pipeline_builder_parse_dynamic(builder, *pipeline);
901 tu_pipeline_builder_parse_input_assembly(builder, *pipeline);
902 tu_pipeline_builder_parse_viewport(builder, *pipeline);
903 tu_pipeline_builder_parse_rasterization(builder, *pipeline);
904 tu_pipeline_builder_parse_depth_stencil(builder, *pipeline);
905 tu_pipeline_builder_parse_multisample_and_color_blend(builder, *pipeline);
906
907 /* we should have reserved enough space upfront such that the CS never
908 * grows
909 */
910 assert((*pipeline)->cs.bo_count == 1);
911
912 return VK_SUCCESS;
913 }
914
915 static void
916 tu_pipeline_builder_init_graphics(
917 struct tu_pipeline_builder *builder,
918 struct tu_device *dev,
919 struct tu_pipeline_cache *cache,
920 const VkGraphicsPipelineCreateInfo *create_info,
921 const VkAllocationCallbacks *alloc)
922 {
923 *builder = (struct tu_pipeline_builder) {
924 .device = dev,
925 .cache = cache,
926 .create_info = create_info,
927 .alloc = alloc,
928 };
929
930 builder->rasterizer_discard =
931 create_info->pRasterizationState->rasterizerDiscardEnable;
932
933 if (builder->rasterizer_discard) {
934 builder->samples = VK_SAMPLE_COUNT_1_BIT;
935 } else {
936 builder->samples = create_info->pMultisampleState->rasterizationSamples;
937
938 const struct tu_render_pass *pass =
939 tu_render_pass_from_handle(create_info->renderPass);
940 const struct tu_subpass *subpass =
941 &pass->subpasses[create_info->subpass];
942
943 builder->use_depth_stencil_attachment =
944 subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED;
945
946 for (uint32_t i = 0; i < subpass->color_count; i++) {
947 const uint32_t a = subpass->color_attachments[i].attachment;
948 if (a == VK_ATTACHMENT_UNUSED)
949 continue;
950
951 builder->color_attachment_formats[i] = pass->attachments[a].format;
952 builder->use_color_attachments = true;
953 }
954 }
955 }
956
957 VkResult
958 tu_CreateGraphicsPipelines(VkDevice device,
959 VkPipelineCache pipelineCache,
960 uint32_t count,
961 const VkGraphicsPipelineCreateInfo *pCreateInfos,
962 const VkAllocationCallbacks *pAllocator,
963 VkPipeline *pPipelines)
964 {
965 TU_FROM_HANDLE(tu_device, dev, device);
966 TU_FROM_HANDLE(tu_pipeline_cache, cache, pipelineCache);
967
968 for (uint32_t i = 0; i < count; i++) {
969 struct tu_pipeline_builder builder;
970 tu_pipeline_builder_init_graphics(&builder, dev, cache,
971 &pCreateInfos[i], pAllocator);
972
973 struct tu_pipeline *pipeline;
974 VkResult result = tu_pipeline_builder_build(&builder, &pipeline);
975
976 if (result != VK_SUCCESS) {
977 for (uint32_t j = 0; j < i; j++) {
978 tu_DestroyPipeline(device, pPipelines[j], pAllocator);
979 pPipelines[j] = VK_NULL_HANDLE;
980 }
981
982 return result;
983 }
984
985 pPipelines[i] = tu_pipeline_to_handle(pipeline);
986 }
987
988 return VK_SUCCESS;
989 }
990
991 static VkResult
992 tu_compute_pipeline_create(VkDevice _device,
993 VkPipelineCache _cache,
994 const VkComputePipelineCreateInfo *pCreateInfo,
995 const VkAllocationCallbacks *pAllocator,
996 VkPipeline *pPipeline)
997 {
998 return VK_SUCCESS;
999 }
1000
1001 VkResult
1002 tu_CreateComputePipelines(VkDevice _device,
1003 VkPipelineCache pipelineCache,
1004 uint32_t count,
1005 const VkComputePipelineCreateInfo *pCreateInfos,
1006 const VkAllocationCallbacks *pAllocator,
1007 VkPipeline *pPipelines)
1008 {
1009 VkResult result = VK_SUCCESS;
1010
1011 unsigned i = 0;
1012 for (; i < count; i++) {
1013 VkResult r;
1014 r = tu_compute_pipeline_create(_device, pipelineCache, &pCreateInfos[i],
1015 pAllocator, &pPipelines[i]);
1016 if (r != VK_SUCCESS) {
1017 result = r;
1018 pPipelines[i] = VK_NULL_HANDLE;
1019 }
1020 }
1021
1022 return result;
1023 }
1024
1025 void
1026 tu_DestroyPipeline(VkDevice _device,
1027 VkPipeline _pipeline,
1028 const VkAllocationCallbacks *pAllocator)
1029 {
1030 TU_FROM_HANDLE(tu_device, dev, _device);
1031 TU_FROM_HANDLE(tu_pipeline, pipeline, _pipeline);
1032
1033 if (!_pipeline)
1034 return;
1035
1036 tu_pipeline_finish(pipeline, dev, pAllocator);
1037 vk_free2(&dev->alloc, pAllocator, pipeline);
1038 }