2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 * DEALINGS IN THE SOFTWARE.
28 #include "tu_private.h"
30 #include "main/menums.h"
32 #include "nir/nir_builder.h"
33 #include "spirv/nir_spirv.h"
34 #include "util/debug.h"
35 #include "util/mesa-sha1.h"
36 #include "util/u_atomic.h"
37 #include "vk_format.h"
42 struct tu_pipeline_builder
44 struct tu_device
*device
;
45 struct tu_pipeline_cache
*cache
;
46 const VkAllocationCallbacks
*alloc
;
47 const VkGraphicsPipelineCreateInfo
*create_info
;
49 bool rasterizer_discard
;
50 /* these states are affectd by rasterizer_discard */
51 VkSampleCountFlagBits samples
;
52 bool use_depth_stencil_attachment
;
53 bool use_color_attachments
;
54 VkFormat color_attachment_formats
[MAX_RTS
];
57 static enum tu_dynamic_state_bits
58 tu_dynamic_state_bit(VkDynamicState state
)
61 case VK_DYNAMIC_STATE_VIEWPORT
:
62 return TU_DYNAMIC_VIEWPORT
;
63 case VK_DYNAMIC_STATE_SCISSOR
:
64 return TU_DYNAMIC_SCISSOR
;
65 case VK_DYNAMIC_STATE_LINE_WIDTH
:
66 return TU_DYNAMIC_LINE_WIDTH
;
67 case VK_DYNAMIC_STATE_DEPTH_BIAS
:
68 return TU_DYNAMIC_DEPTH_BIAS
;
69 case VK_DYNAMIC_STATE_BLEND_CONSTANTS
:
70 return TU_DYNAMIC_BLEND_CONSTANTS
;
71 case VK_DYNAMIC_STATE_DEPTH_BOUNDS
:
72 return TU_DYNAMIC_DEPTH_BOUNDS
;
73 case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK
:
74 return TU_DYNAMIC_STENCIL_COMPARE_MASK
;
75 case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK
:
76 return TU_DYNAMIC_STENCIL_WRITE_MASK
;
77 case VK_DYNAMIC_STATE_STENCIL_REFERENCE
:
78 return TU_DYNAMIC_STENCIL_REFERENCE
;
80 unreachable("invalid dynamic state");
86 tu_logic_op_reads_dst(VkLogicOp op
)
89 case VK_LOGIC_OP_CLEAR
:
90 case VK_LOGIC_OP_COPY
:
91 case VK_LOGIC_OP_COPY_INVERTED
:
100 tu_blend_factor_no_dst_alpha(VkBlendFactor factor
)
102 /* treat dst alpha as 1.0 and avoid reading it */
104 case VK_BLEND_FACTOR_DST_ALPHA
:
105 return VK_BLEND_FACTOR_ONE
;
106 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA
:
107 return VK_BLEND_FACTOR_ZERO
;
113 static enum pc_di_primtype
114 tu6_primtype(VkPrimitiveTopology topology
)
117 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST
:
118 return DI_PT_POINTLIST
;
119 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST
:
120 return DI_PT_LINELIST
;
121 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP
:
122 return DI_PT_LINESTRIP
;
123 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST
:
124 return DI_PT_TRILIST
;
125 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP
:
126 return DI_PT_TRILIST
;
127 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN
:
129 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY
:
130 return DI_PT_LINE_ADJ
;
131 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY
:
132 return DI_PT_LINESTRIP_ADJ
;
133 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY
:
134 return DI_PT_TRI_ADJ
;
135 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY
:
136 return DI_PT_TRISTRIP_ADJ
;
137 case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST
:
139 unreachable("invalid primitive topology");
144 static enum adreno_compare_func
145 tu6_compare_func(VkCompareOp op
)
148 case VK_COMPARE_OP_NEVER
:
150 case VK_COMPARE_OP_LESS
:
152 case VK_COMPARE_OP_EQUAL
:
154 case VK_COMPARE_OP_LESS_OR_EQUAL
:
156 case VK_COMPARE_OP_GREATER
:
158 case VK_COMPARE_OP_NOT_EQUAL
:
159 return FUNC_NOTEQUAL
;
160 case VK_COMPARE_OP_GREATER_OR_EQUAL
:
162 case VK_COMPARE_OP_ALWAYS
:
165 unreachable("invalid VkCompareOp");
170 static enum adreno_stencil_op
171 tu6_stencil_op(VkStencilOp op
)
174 case VK_STENCIL_OP_KEEP
:
176 case VK_STENCIL_OP_ZERO
:
178 case VK_STENCIL_OP_REPLACE
:
179 return STENCIL_REPLACE
;
180 case VK_STENCIL_OP_INCREMENT_AND_CLAMP
:
181 return STENCIL_INCR_CLAMP
;
182 case VK_STENCIL_OP_DECREMENT_AND_CLAMP
:
183 return STENCIL_DECR_CLAMP
;
184 case VK_STENCIL_OP_INVERT
:
185 return STENCIL_INVERT
;
186 case VK_STENCIL_OP_INCREMENT_AND_WRAP
:
187 return STENCIL_INCR_WRAP
;
188 case VK_STENCIL_OP_DECREMENT_AND_WRAP
:
189 return STENCIL_DECR_WRAP
;
191 unreachable("invalid VkStencilOp");
196 static enum a3xx_rop_code
197 tu6_rop(VkLogicOp op
)
200 case VK_LOGIC_OP_CLEAR
:
202 case VK_LOGIC_OP_AND
:
204 case VK_LOGIC_OP_AND_REVERSE
:
205 return ROP_AND_REVERSE
;
206 case VK_LOGIC_OP_COPY
:
208 case VK_LOGIC_OP_AND_INVERTED
:
209 return ROP_AND_INVERTED
;
210 case VK_LOGIC_OP_NO_OP
:
212 case VK_LOGIC_OP_XOR
:
216 case VK_LOGIC_OP_NOR
:
218 case VK_LOGIC_OP_EQUIVALENT
:
220 case VK_LOGIC_OP_INVERT
:
222 case VK_LOGIC_OP_OR_REVERSE
:
223 return ROP_OR_REVERSE
;
224 case VK_LOGIC_OP_COPY_INVERTED
:
225 return ROP_COPY_INVERTED
;
226 case VK_LOGIC_OP_OR_INVERTED
:
227 return ROP_OR_INVERTED
;
228 case VK_LOGIC_OP_NAND
:
230 case VK_LOGIC_OP_SET
:
233 unreachable("invalid VkLogicOp");
238 static enum adreno_rb_blend_factor
239 tu6_blend_factor(VkBlendFactor factor
)
242 case VK_BLEND_FACTOR_ZERO
:
244 case VK_BLEND_FACTOR_ONE
:
246 case VK_BLEND_FACTOR_SRC_COLOR
:
247 return FACTOR_SRC_COLOR
;
248 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR
:
249 return FACTOR_ONE_MINUS_SRC_COLOR
;
250 case VK_BLEND_FACTOR_DST_COLOR
:
251 return FACTOR_DST_COLOR
;
252 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR
:
253 return FACTOR_ONE_MINUS_DST_COLOR
;
254 case VK_BLEND_FACTOR_SRC_ALPHA
:
255 return FACTOR_SRC_ALPHA
;
256 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA
:
257 return FACTOR_ONE_MINUS_SRC_ALPHA
;
258 case VK_BLEND_FACTOR_DST_ALPHA
:
259 return FACTOR_DST_ALPHA
;
260 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA
:
261 return FACTOR_ONE_MINUS_DST_ALPHA
;
262 case VK_BLEND_FACTOR_CONSTANT_COLOR
:
263 return FACTOR_CONSTANT_COLOR
;
264 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR
:
265 return FACTOR_ONE_MINUS_CONSTANT_COLOR
;
266 case VK_BLEND_FACTOR_CONSTANT_ALPHA
:
267 return FACTOR_CONSTANT_ALPHA
;
268 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA
:
269 return FACTOR_ONE_MINUS_CONSTANT_ALPHA
;
270 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE
:
271 return FACTOR_SRC_ALPHA_SATURATE
;
272 case VK_BLEND_FACTOR_SRC1_COLOR
:
273 return FACTOR_SRC1_COLOR
;
274 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR
:
275 return FACTOR_ONE_MINUS_SRC1_COLOR
;
276 case VK_BLEND_FACTOR_SRC1_ALPHA
:
277 return FACTOR_SRC1_ALPHA
;
278 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA
:
279 return FACTOR_ONE_MINUS_SRC1_ALPHA
;
281 unreachable("invalid VkBlendFactor");
286 static enum a3xx_rb_blend_opcode
287 tu6_blend_op(VkBlendOp op
)
290 case VK_BLEND_OP_ADD
:
291 return BLEND_DST_PLUS_SRC
;
292 case VK_BLEND_OP_SUBTRACT
:
293 return BLEND_SRC_MINUS_DST
;
294 case VK_BLEND_OP_REVERSE_SUBTRACT
:
295 return BLEND_DST_MINUS_SRC
;
296 case VK_BLEND_OP_MIN
:
297 return BLEND_MIN_DST_SRC
;
298 case VK_BLEND_OP_MAX
:
299 return BLEND_MAX_DST_SRC
;
301 unreachable("invalid VkBlendOp");
302 return BLEND_DST_PLUS_SRC
;
307 tu6_guardband_adj(uint32_t v
)
310 return (uint32_t)(511.0 - 65.0 * (log2(v
) - 8.0));
316 tu6_emit_viewport(struct tu_cs
*cs
, const VkViewport
*viewport
)
320 scales
[0] = viewport
->width
/ 2.0f
;
321 scales
[1] = viewport
->height
/ 2.0f
;
322 scales
[2] = viewport
->maxDepth
- viewport
->minDepth
;
323 offsets
[0] = viewport
->x
+ scales
[0];
324 offsets
[1] = viewport
->y
+ scales
[1];
325 offsets
[2] = viewport
->minDepth
;
329 min
.x
= (int32_t) viewport
->x
;
330 max
.x
= (int32_t) ceilf(viewport
->x
+ viewport
->width
);
331 if (viewport
->height
>= 0.0f
) {
332 min
.y
= (int32_t) viewport
->y
;
333 max
.y
= (int32_t) ceilf(viewport
->y
+ viewport
->height
);
335 min
.y
= (int32_t)(viewport
->y
+ viewport
->height
);
336 max
.y
= (int32_t) ceilf(viewport
->y
);
338 /* the spec allows viewport->height to be 0.0f */
341 assert(min
.x
>= 0 && min
.x
< max
.x
);
342 assert(min
.y
>= 0 && min
.y
< max
.y
);
344 VkExtent2D guardband_adj
;
345 guardband_adj
.width
= tu6_guardband_adj(max
.x
- min
.x
);
346 guardband_adj
.height
= tu6_guardband_adj(max
.y
- min
.y
);
348 tu_cs_emit_pkt4(cs
, REG_A6XX_GRAS_CL_VPORT_XOFFSET_0
, 6);
349 tu_cs_emit(cs
, A6XX_GRAS_CL_VPORT_XOFFSET_0(offsets
[0]));
350 tu_cs_emit(cs
, A6XX_GRAS_CL_VPORT_XSCALE_0(scales
[0]));
351 tu_cs_emit(cs
, A6XX_GRAS_CL_VPORT_YOFFSET_0(offsets
[1]));
352 tu_cs_emit(cs
, A6XX_GRAS_CL_VPORT_YSCALE_0(scales
[1]));
353 tu_cs_emit(cs
, A6XX_GRAS_CL_VPORT_ZOFFSET_0(offsets
[2]));
354 tu_cs_emit(cs
, A6XX_GRAS_CL_VPORT_ZSCALE_0(scales
[2]));
356 tu_cs_emit_pkt4(cs
, REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0
, 2);
357 tu_cs_emit(cs
, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(min
.x
) |
358 A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(min
.y
));
359 tu_cs_emit(cs
, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(max
.x
- 1) |
360 A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(max
.y
- 1));
362 tu_cs_emit_pkt4(cs
, REG_A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ
, 1);
364 A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(guardband_adj
.width
) |
365 A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(guardband_adj
.height
));
369 tu6_emit_scissor(struct tu_cs
*cs
, const VkRect2D
*scissor
)
371 const VkOffset2D min
= scissor
->offset
;
372 const VkOffset2D max
= {
373 scissor
->offset
.x
+ scissor
->extent
.width
,
374 scissor
->offset
.y
+ scissor
->extent
.height
,
377 tu_cs_emit_pkt4(cs
, REG_A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0
, 2);
378 tu_cs_emit(cs
, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(min
.x
) |
379 A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(min
.y
));
380 tu_cs_emit(cs
, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(max
.x
- 1) |
381 A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(max
.y
- 1));
385 tu6_emit_gras_unknowns(struct tu_cs
*cs
)
387 tu_cs_emit_pkt4(cs
, REG_A6XX_GRAS_UNKNOWN_8000
, 1);
388 tu_cs_emit(cs
, 0x80);
389 tu_cs_emit_pkt4(cs
, REG_A6XX_GRAS_UNKNOWN_8001
, 1);
391 tu_cs_emit_pkt4(cs
, REG_A6XX_GRAS_UNKNOWN_8004
, 1);
396 tu6_emit_point_size(struct tu_cs
*cs
)
398 tu_cs_emit_pkt4(cs
, REG_A6XX_GRAS_SU_POINT_MINMAX
, 2);
399 tu_cs_emit(cs
, A6XX_GRAS_SU_POINT_MINMAX_MIN(1.0f
/ 16.0f
) |
400 A6XX_GRAS_SU_POINT_MINMAX_MAX(4092.0f
));
401 tu_cs_emit(cs
, A6XX_GRAS_SU_POINT_SIZE(1.0f
));
405 tu6_gras_su_cntl(const VkPipelineRasterizationStateCreateInfo
*rast_info
,
406 VkSampleCountFlagBits samples
)
408 uint32_t gras_su_cntl
= 0;
410 if (rast_info
->cullMode
& VK_CULL_MODE_FRONT_BIT
)
411 gras_su_cntl
|= A6XX_GRAS_SU_CNTL_CULL_FRONT
;
412 if (rast_info
->cullMode
& VK_CULL_MODE_BACK_BIT
)
413 gras_su_cntl
|= A6XX_GRAS_SU_CNTL_CULL_BACK
;
415 if (rast_info
->frontFace
== VK_FRONT_FACE_CLOCKWISE
)
416 gras_su_cntl
|= A6XX_GRAS_SU_CNTL_FRONT_CW
;
418 /* don't set A6XX_GRAS_SU_CNTL_LINEHALFWIDTH */
420 if (rast_info
->depthBiasEnable
)
421 gras_su_cntl
|= A6XX_GRAS_SU_CNTL_POLY_OFFSET
;
423 if (samples
> VK_SAMPLE_COUNT_1_BIT
)
424 gras_su_cntl
|= A6XX_GRAS_SU_CNTL_MSAA_ENABLE
;
430 tu6_emit_gras_su_cntl(struct tu_cs
*cs
,
431 uint32_t gras_su_cntl
,
434 assert((gras_su_cntl
& A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK
) == 0);
435 gras_su_cntl
|= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(line_width
/ 2.0f
);
437 tu_cs_emit_pkt4(cs
, REG_A6XX_GRAS_SU_CNTL
, 1);
438 tu_cs_emit(cs
, gras_su_cntl
);
442 tu6_emit_depth_bias(struct tu_cs
*cs
,
443 float constant_factor
,
447 tu_cs_emit_pkt4(cs
, REG_A6XX_GRAS_SU_POLY_OFFSET_SCALE
, 3);
448 tu_cs_emit(cs
, A6XX_GRAS_SU_POLY_OFFSET_SCALE(slope_factor
));
449 tu_cs_emit(cs
, A6XX_GRAS_SU_POLY_OFFSET_OFFSET(constant_factor
));
450 tu_cs_emit(cs
, A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(clamp
));
454 tu6_emit_alpha_control_disable(struct tu_cs
*cs
)
456 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_ALPHA_CONTROL
, 1);
461 tu6_emit_depth_control(struct tu_cs
*cs
,
462 const VkPipelineDepthStencilStateCreateInfo
*ds_info
)
464 assert(!ds_info
->depthBoundsTestEnable
);
466 uint32_t rb_depth_cntl
= 0;
467 if (ds_info
->depthTestEnable
) {
469 A6XX_RB_DEPTH_CNTL_Z_ENABLE
|
470 A6XX_RB_DEPTH_CNTL_ZFUNC(tu6_compare_func(ds_info
->depthCompareOp
)) |
471 A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE
;
473 if (ds_info
->depthWriteEnable
)
474 rb_depth_cntl
|= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE
;
477 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_DEPTH_CNTL
, 1);
478 tu_cs_emit(cs
, rb_depth_cntl
);
482 tu6_emit_stencil_control(struct tu_cs
*cs
,
483 const VkPipelineDepthStencilStateCreateInfo
*ds_info
)
485 uint32_t rb_stencil_control
= 0;
486 if (ds_info
->stencilTestEnable
) {
487 const VkStencilOpState
*front
= &ds_info
->front
;
488 const VkStencilOpState
*back
= &ds_info
->back
;
489 rb_stencil_control
|=
490 A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE
|
491 A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF
|
492 A6XX_RB_STENCIL_CONTROL_STENCIL_READ
|
493 A6XX_RB_STENCIL_CONTROL_FUNC(tu6_compare_func(front
->compareOp
)) |
494 A6XX_RB_STENCIL_CONTROL_FAIL(tu6_stencil_op(front
->failOp
)) |
495 A6XX_RB_STENCIL_CONTROL_ZPASS(tu6_stencil_op(front
->passOp
)) |
496 A6XX_RB_STENCIL_CONTROL_ZFAIL(tu6_stencil_op(front
->depthFailOp
)) |
497 A6XX_RB_STENCIL_CONTROL_FUNC_BF(tu6_compare_func(back
->compareOp
)) |
498 A6XX_RB_STENCIL_CONTROL_FAIL_BF(tu6_stencil_op(back
->failOp
)) |
499 A6XX_RB_STENCIL_CONTROL_ZPASS_BF(tu6_stencil_op(back
->passOp
)) |
500 A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(tu6_stencil_op(back
->depthFailOp
));
503 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_STENCIL_CONTROL
, 1);
504 tu_cs_emit(cs
, rb_stencil_control
);
508 tu6_emit_stencil_compare_mask(struct tu_cs
*cs
, uint32_t front
, uint32_t back
)
510 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_STENCILMASK
, 1);
512 cs
, A6XX_RB_STENCILMASK_MASK(front
) | A6XX_RB_STENCILMASK_BFMASK(back
));
516 tu6_emit_stencil_write_mask(struct tu_cs
*cs
, uint32_t front
, uint32_t back
)
518 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_STENCILWRMASK
, 1);
519 tu_cs_emit(cs
, A6XX_RB_STENCILWRMASK_WRMASK(front
) |
520 A6XX_RB_STENCILWRMASK_BFWRMASK(back
));
524 tu6_emit_stencil_reference(struct tu_cs
*cs
, uint32_t front
, uint32_t back
)
526 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_STENCILREF
, 1);
528 A6XX_RB_STENCILREF_REF(front
) | A6XX_RB_STENCILREF_BFREF(back
));
532 tu6_rb_mrt_blend_control(const VkPipelineColorBlendAttachmentState
*att
,
535 const enum a3xx_rb_blend_opcode color_op
= tu6_blend_op(att
->colorBlendOp
);
536 const enum adreno_rb_blend_factor src_color_factor
= tu6_blend_factor(
537 has_alpha
? att
->srcColorBlendFactor
538 : tu_blend_factor_no_dst_alpha(att
->srcColorBlendFactor
));
539 const enum adreno_rb_blend_factor dst_color_factor
= tu6_blend_factor(
540 has_alpha
? att
->dstColorBlendFactor
541 : tu_blend_factor_no_dst_alpha(att
->dstColorBlendFactor
));
542 const enum a3xx_rb_blend_opcode alpha_op
= tu6_blend_op(att
->alphaBlendOp
);
543 const enum adreno_rb_blend_factor src_alpha_factor
=
544 tu6_blend_factor(att
->srcAlphaBlendFactor
);
545 const enum adreno_rb_blend_factor dst_alpha_factor
=
546 tu6_blend_factor(att
->dstAlphaBlendFactor
);
548 return A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(src_color_factor
) |
549 A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(color_op
) |
550 A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(dst_color_factor
) |
551 A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(src_alpha_factor
) |
552 A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(alpha_op
) |
553 A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(dst_alpha_factor
);
557 tu6_rb_mrt_control(const VkPipelineColorBlendAttachmentState
*att
,
558 uint32_t rb_mrt_control_rop
,
562 uint32_t rb_mrt_control
=
563 A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE(att
->colorWriteMask
);
565 /* ignore blending and logic op for integer attachments */
567 rb_mrt_control
|= A6XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY
);
568 return rb_mrt_control
;
571 rb_mrt_control
|= rb_mrt_control_rop
;
573 if (att
->blendEnable
) {
574 rb_mrt_control
|= A6XX_RB_MRT_CONTROL_BLEND
;
577 rb_mrt_control
|= A6XX_RB_MRT_CONTROL_BLEND2
;
580 return rb_mrt_control
;
584 tu6_emit_rb_mrt_controls(struct tu_cs
*cs
,
585 const VkPipelineColorBlendStateCreateInfo
*blend_info
,
586 const VkFormat attachment_formats
[MAX_RTS
],
587 uint32_t *blend_enable_mask
)
589 *blend_enable_mask
= 0;
591 bool rop_reads_dst
= false;
592 uint32_t rb_mrt_control_rop
= 0;
593 if (blend_info
->logicOpEnable
) {
594 rop_reads_dst
= tu_logic_op_reads_dst(blend_info
->logicOp
);
596 A6XX_RB_MRT_CONTROL_ROP_ENABLE
|
597 A6XX_RB_MRT_CONTROL_ROP_CODE(tu6_rop(blend_info
->logicOp
));
600 for (uint32_t i
= 0; i
< blend_info
->attachmentCount
; i
++) {
601 const VkPipelineColorBlendAttachmentState
*att
=
602 &blend_info
->pAttachments
[i
];
603 const VkFormat format
= attachment_formats
[i
];
605 uint32_t rb_mrt_control
= 0;
606 uint32_t rb_mrt_blend_control
= 0;
607 if (format
!= VK_FORMAT_UNDEFINED
) {
608 const bool is_int
= vk_format_is_int(format
);
609 const bool has_alpha
= vk_format_has_alpha(format
);
612 tu6_rb_mrt_control(att
, rb_mrt_control_rop
, is_int
, has_alpha
);
613 rb_mrt_blend_control
= tu6_rb_mrt_blend_control(att
, has_alpha
);
615 if (att
->blendEnable
|| rop_reads_dst
)
616 *blend_enable_mask
|= 1 << i
;
619 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_MRT_CONTROL(i
), 2);
620 tu_cs_emit(cs
, rb_mrt_control
);
621 tu_cs_emit(cs
, rb_mrt_blend_control
);
624 for (uint32_t i
= blend_info
->attachmentCount
; i
< MAX_RTS
; i
++) {
625 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_MRT_CONTROL(i
), 2);
632 tu6_emit_blend_control(struct tu_cs
*cs
,
633 uint32_t blend_enable_mask
,
634 const VkPipelineMultisampleStateCreateInfo
*msaa_info
)
636 assert(!msaa_info
->sampleShadingEnable
);
637 assert(!msaa_info
->alphaToOneEnable
);
639 uint32_t sp_blend_cntl
= A6XX_SP_BLEND_CNTL_UNK8
;
640 if (blend_enable_mask
)
641 sp_blend_cntl
|= A6XX_SP_BLEND_CNTL_ENABLED
;
642 if (msaa_info
->alphaToCoverageEnable
)
643 sp_blend_cntl
|= A6XX_SP_BLEND_CNTL_ALPHA_TO_COVERAGE
;
645 const uint32_t sample_mask
=
646 msaa_info
->pSampleMask
? *msaa_info
->pSampleMask
647 : ((1 << msaa_info
->rasterizationSamples
) - 1);
649 /* set A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND only when enabled? */
650 uint32_t rb_blend_cntl
=
651 A6XX_RB_BLEND_CNTL_ENABLE_BLEND(blend_enable_mask
) |
652 A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND
|
653 A6XX_RB_BLEND_CNTL_SAMPLE_MASK(sample_mask
);
654 if (msaa_info
->alphaToCoverageEnable
)
655 rb_blend_cntl
|= A6XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE
;
657 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_BLEND_CNTL
, 1);
658 tu_cs_emit(cs
, sp_blend_cntl
);
660 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLEND_CNTL
, 1);
661 tu_cs_emit(cs
, rb_blend_cntl
);
665 tu6_emit_blend_constants(struct tu_cs
*cs
, const float constants
[4])
667 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLEND_RED_F32
, 4);
668 tu_cs_emit_array(cs
, (const uint32_t *) constants
, 4);
672 tu_pipeline_builder_create_pipeline(struct tu_pipeline_builder
*builder
,
673 struct tu_pipeline
**out_pipeline
)
675 struct tu_device
*dev
= builder
->device
;
677 struct tu_pipeline
*pipeline
=
678 vk_zalloc2(&dev
->alloc
, builder
->alloc
, sizeof(*pipeline
), 8,
679 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
681 return VK_ERROR_OUT_OF_HOST_MEMORY
;
683 tu_cs_init(&pipeline
->cs
, TU_CS_MODE_SUB_STREAM
, 2048);
685 /* reserve the space now such that tu_cs_begin_sub_stream never fails */
686 VkResult result
= tu_cs_reserve_space(dev
, &pipeline
->cs
, 2048);
687 if (result
!= VK_SUCCESS
) {
688 vk_free2(&dev
->alloc
, builder
->alloc
, pipeline
);
692 *out_pipeline
= pipeline
;
698 tu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder
*builder
,
699 struct tu_pipeline
*pipeline
)
701 const VkPipelineDynamicStateCreateInfo
*dynamic_info
=
702 builder
->create_info
->pDynamicState
;
707 for (uint32_t i
= 0; i
< dynamic_info
->dynamicStateCount
; i
++) {
708 pipeline
->dynamic_state
.mask
|=
709 tu_dynamic_state_bit(dynamic_info
->pDynamicStates
[i
]);
714 tu_pipeline_builder_parse_input_assembly(struct tu_pipeline_builder
*builder
,
715 struct tu_pipeline
*pipeline
)
717 const VkPipelineInputAssemblyStateCreateInfo
*ia_info
=
718 builder
->create_info
->pInputAssemblyState
;
720 pipeline
->ia
.primtype
= tu6_primtype(ia_info
->topology
);
721 pipeline
->ia
.primitive_restart
= ia_info
->primitiveRestartEnable
;
725 tu_pipeline_builder_parse_viewport(struct tu_pipeline_builder
*builder
,
726 struct tu_pipeline
*pipeline
)
730 * pViewportState is a pointer to an instance of the
731 * VkPipelineViewportStateCreateInfo structure, and is ignored if the
732 * pipeline has rasterization disabled."
734 * We leave the relevant registers stale in that case.
736 if (builder
->rasterizer_discard
)
739 const VkPipelineViewportStateCreateInfo
*vp_info
=
740 builder
->create_info
->pViewportState
;
743 tu_cs_begin_sub_stream(builder
->device
, &pipeline
->cs
, 15, &vp_cs
);
745 if (!(pipeline
->dynamic_state
.mask
& TU_DYNAMIC_VIEWPORT
)) {
746 assert(vp_info
->viewportCount
== 1);
747 tu6_emit_viewport(&vp_cs
, vp_info
->pViewports
);
750 if (!(pipeline
->dynamic_state
.mask
& TU_DYNAMIC_SCISSOR
)) {
751 assert(vp_info
->scissorCount
== 1);
752 tu6_emit_scissor(&vp_cs
, vp_info
->pScissors
);
755 pipeline
->vp
.state_ib
= tu_cs_end_sub_stream(&pipeline
->cs
, &vp_cs
);
759 tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder
*builder
,
760 struct tu_pipeline
*pipeline
)
762 const VkPipelineRasterizationStateCreateInfo
*rast_info
=
763 builder
->create_info
->pRasterizationState
;
765 assert(!rast_info
->depthClampEnable
);
766 assert(rast_info
->polygonMode
== VK_POLYGON_MODE_FILL
);
768 struct tu_cs rast_cs
;
769 tu_cs_begin_sub_stream(builder
->device
, &pipeline
->cs
, 20, &rast_cs
);
771 /* move to hw ctx init? */
772 tu6_emit_gras_unknowns(&rast_cs
);
773 tu6_emit_point_size(&rast_cs
);
775 const uint32_t gras_su_cntl
=
776 tu6_gras_su_cntl(rast_info
, builder
->samples
);
778 if (!(pipeline
->dynamic_state
.mask
& TU_DYNAMIC_LINE_WIDTH
))
779 tu6_emit_gras_su_cntl(&rast_cs
, gras_su_cntl
, rast_info
->lineWidth
);
781 if (!(pipeline
->dynamic_state
.mask
& TU_DYNAMIC_DEPTH_BIAS
)) {
782 tu6_emit_depth_bias(&rast_cs
, rast_info
->depthBiasConstantFactor
,
783 rast_info
->depthBiasClamp
,
784 rast_info
->depthBiasSlopeFactor
);
787 pipeline
->rast
.state_ib
= tu_cs_end_sub_stream(&pipeline
->cs
, &rast_cs
);
789 pipeline
->rast
.gras_su_cntl
= gras_su_cntl
;
793 tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder
*builder
,
794 struct tu_pipeline
*pipeline
)
798 * pDepthStencilState is a pointer to an instance of the
799 * VkPipelineDepthStencilStateCreateInfo structure, and is ignored if
800 * the pipeline has rasterization disabled or if the subpass of the
801 * render pass the pipeline is created against does not use a
802 * depth/stencil attachment.
804 * We disable both depth and stenil tests in those cases.
806 static const VkPipelineDepthStencilStateCreateInfo dummy_ds_info
;
807 const VkPipelineDepthStencilStateCreateInfo
*ds_info
=
808 builder
->use_depth_stencil_attachment
809 ? builder
->create_info
->pDepthStencilState
813 tu_cs_begin_sub_stream(builder
->device
, &pipeline
->cs
, 12, &ds_cs
);
815 /* move to hw ctx init? */
816 tu6_emit_alpha_control_disable(&ds_cs
);
818 tu6_emit_depth_control(&ds_cs
, ds_info
);
819 tu6_emit_stencil_control(&ds_cs
, ds_info
);
821 if (!(pipeline
->dynamic_state
.mask
& TU_DYNAMIC_STENCIL_COMPARE_MASK
)) {
822 tu6_emit_stencil_compare_mask(&ds_cs
, ds_info
->front
.compareMask
,
823 ds_info
->back
.compareMask
);
825 if (!(pipeline
->dynamic_state
.mask
& TU_DYNAMIC_STENCIL_WRITE_MASK
)) {
826 tu6_emit_stencil_write_mask(&ds_cs
, ds_info
->front
.writeMask
,
827 ds_info
->back
.writeMask
);
829 if (!(pipeline
->dynamic_state
.mask
& TU_DYNAMIC_STENCIL_REFERENCE
)) {
830 tu6_emit_stencil_reference(&ds_cs
, ds_info
->front
.reference
,
831 ds_info
->back
.reference
);
834 pipeline
->ds
.state_ib
= tu_cs_end_sub_stream(&pipeline
->cs
, &ds_cs
);
838 tu_pipeline_builder_parse_multisample_and_color_blend(
839 struct tu_pipeline_builder
*builder
, struct tu_pipeline
*pipeline
)
843 * pMultisampleState is a pointer to an instance of the
844 * VkPipelineMultisampleStateCreateInfo, and is ignored if the pipeline
845 * has rasterization disabled.
849 * pColorBlendState is a pointer to an instance of the
850 * VkPipelineColorBlendStateCreateInfo structure, and is ignored if the
851 * pipeline has rasterization disabled or if the subpass of the render
852 * pass the pipeline is created against does not use any color
855 * We leave the relevant registers stale when rasterization is disabled.
857 if (builder
->rasterizer_discard
)
860 static const VkPipelineColorBlendStateCreateInfo dummy_blend_info
;
861 const VkPipelineMultisampleStateCreateInfo
*msaa_info
=
862 builder
->create_info
->pMultisampleState
;
863 const VkPipelineColorBlendStateCreateInfo
*blend_info
=
864 builder
->use_color_attachments
? builder
->create_info
->pColorBlendState
867 struct tu_cs blend_cs
;
868 tu_cs_begin_sub_stream(builder
->device
, &pipeline
->cs
, MAX_RTS
* 3 + 9,
871 uint32_t blend_enable_mask
;
872 tu6_emit_rb_mrt_controls(&blend_cs
, blend_info
,
873 builder
->color_attachment_formats
,
876 if (!(pipeline
->dynamic_state
.mask
& TU_DYNAMIC_BLEND_CONSTANTS
))
877 tu6_emit_blend_constants(&blend_cs
, blend_info
->blendConstants
);
879 tu6_emit_blend_control(&blend_cs
, blend_enable_mask
, msaa_info
);
881 pipeline
->blend
.state_ib
= tu_cs_end_sub_stream(&pipeline
->cs
, &blend_cs
);
885 tu_pipeline_finish(struct tu_pipeline
*pipeline
,
886 struct tu_device
*dev
,
887 const VkAllocationCallbacks
*alloc
)
889 tu_cs_finish(dev
, &pipeline
->cs
);
893 tu_pipeline_builder_build(struct tu_pipeline_builder
*builder
,
894 struct tu_pipeline
**pipeline
)
896 VkResult result
= tu_pipeline_builder_create_pipeline(builder
, pipeline
);
897 if (result
!= VK_SUCCESS
)
900 tu_pipeline_builder_parse_dynamic(builder
, *pipeline
);
901 tu_pipeline_builder_parse_input_assembly(builder
, *pipeline
);
902 tu_pipeline_builder_parse_viewport(builder
, *pipeline
);
903 tu_pipeline_builder_parse_rasterization(builder
, *pipeline
);
904 tu_pipeline_builder_parse_depth_stencil(builder
, *pipeline
);
905 tu_pipeline_builder_parse_multisample_and_color_blend(builder
, *pipeline
);
907 /* we should have reserved enough space upfront such that the CS never
910 assert((*pipeline
)->cs
.bo_count
== 1);
916 tu_pipeline_builder_init_graphics(
917 struct tu_pipeline_builder
*builder
,
918 struct tu_device
*dev
,
919 struct tu_pipeline_cache
*cache
,
920 const VkGraphicsPipelineCreateInfo
*create_info
,
921 const VkAllocationCallbacks
*alloc
)
923 *builder
= (struct tu_pipeline_builder
) {
926 .create_info
= create_info
,
930 builder
->rasterizer_discard
=
931 create_info
->pRasterizationState
->rasterizerDiscardEnable
;
933 if (builder
->rasterizer_discard
) {
934 builder
->samples
= VK_SAMPLE_COUNT_1_BIT
;
936 builder
->samples
= create_info
->pMultisampleState
->rasterizationSamples
;
938 const struct tu_render_pass
*pass
=
939 tu_render_pass_from_handle(create_info
->renderPass
);
940 const struct tu_subpass
*subpass
=
941 &pass
->subpasses
[create_info
->subpass
];
943 builder
->use_depth_stencil_attachment
=
944 subpass
->depth_stencil_attachment
.attachment
!= VK_ATTACHMENT_UNUSED
;
946 for (uint32_t i
= 0; i
< subpass
->color_count
; i
++) {
947 const uint32_t a
= subpass
->color_attachments
[i
].attachment
;
948 if (a
== VK_ATTACHMENT_UNUSED
)
951 builder
->color_attachment_formats
[i
] = pass
->attachments
[a
].format
;
952 builder
->use_color_attachments
= true;
958 tu_CreateGraphicsPipelines(VkDevice device
,
959 VkPipelineCache pipelineCache
,
961 const VkGraphicsPipelineCreateInfo
*pCreateInfos
,
962 const VkAllocationCallbacks
*pAllocator
,
963 VkPipeline
*pPipelines
)
965 TU_FROM_HANDLE(tu_device
, dev
, device
);
966 TU_FROM_HANDLE(tu_pipeline_cache
, cache
, pipelineCache
);
968 for (uint32_t i
= 0; i
< count
; i
++) {
969 struct tu_pipeline_builder builder
;
970 tu_pipeline_builder_init_graphics(&builder
, dev
, cache
,
971 &pCreateInfos
[i
], pAllocator
);
973 struct tu_pipeline
*pipeline
;
974 VkResult result
= tu_pipeline_builder_build(&builder
, &pipeline
);
976 if (result
!= VK_SUCCESS
) {
977 for (uint32_t j
= 0; j
< i
; j
++) {
978 tu_DestroyPipeline(device
, pPipelines
[j
], pAllocator
);
979 pPipelines
[j
] = VK_NULL_HANDLE
;
985 pPipelines
[i
] = tu_pipeline_to_handle(pipeline
);
992 tu_compute_pipeline_create(VkDevice _device
,
993 VkPipelineCache _cache
,
994 const VkComputePipelineCreateInfo
*pCreateInfo
,
995 const VkAllocationCallbacks
*pAllocator
,
996 VkPipeline
*pPipeline
)
1002 tu_CreateComputePipelines(VkDevice _device
,
1003 VkPipelineCache pipelineCache
,
1005 const VkComputePipelineCreateInfo
*pCreateInfos
,
1006 const VkAllocationCallbacks
*pAllocator
,
1007 VkPipeline
*pPipelines
)
1009 VkResult result
= VK_SUCCESS
;
1012 for (; i
< count
; i
++) {
1014 r
= tu_compute_pipeline_create(_device
, pipelineCache
, &pCreateInfos
[i
],
1015 pAllocator
, &pPipelines
[i
]);
1016 if (r
!= VK_SUCCESS
) {
1018 pPipelines
[i
] = VK_NULL_HANDLE
;
1026 tu_DestroyPipeline(VkDevice _device
,
1027 VkPipeline _pipeline
,
1028 const VkAllocationCallbacks
*pAllocator
)
1030 TU_FROM_HANDLE(tu_device
, dev
, _device
);
1031 TU_FROM_HANDLE(tu_pipeline
, pipeline
, _pipeline
);
1036 tu_pipeline_finish(pipeline
, dev
, pAllocator
);
1037 vk_free2(&dev
->alloc
, pAllocator
, pipeline
);