2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "vk_format_info.h"
27 vertex_element_comp_control(enum isl_format format
, unsigned comp
)
31 case 0: bits
= isl_format_layouts
[format
].channels
.r
.bits
; break;
32 case 1: bits
= isl_format_layouts
[format
].channels
.g
.bits
; break;
33 case 2: bits
= isl_format_layouts
[format
].channels
.b
.bits
; break;
34 case 3: bits
= isl_format_layouts
[format
].channels
.a
.bits
; break;
35 default: unreachable("Invalid component");
39 return VFCOMP_STORE_SRC
;
40 } else if (comp
< 3) {
41 return VFCOMP_STORE_0
;
42 } else if (isl_format_layouts
[format
].channels
.r
.type
== ISL_UINT
||
43 isl_format_layouts
[format
].channels
.r
.type
== ISL_SINT
) {
45 return VFCOMP_STORE_1_INT
;
48 return VFCOMP_STORE_1_FP
;
53 emit_vertex_input(struct anv_pipeline
*pipeline
,
54 const VkPipelineVertexInputStateCreateInfo
*info
,
55 const struct anv_graphics_pipeline_create_info
*extra
)
57 const struct brw_vs_prog_data
*vs_prog_data
= get_vs_prog_data(pipeline
);
60 if (extra
&& extra
->disable_vs
) {
61 /* If the VS is disabled, just assume the user knows what they're
62 * doing and apply the layout blindly. This can only come from
63 * meta, so this *should* be safe.
66 for (uint32_t i
= 0; i
< info
->vertexAttributeDescriptionCount
; i
++)
67 elements
|= (1 << info
->pVertexAttributeDescriptions
[i
].location
);
69 /* Pull inputs_read out of the VS prog data */
70 uint64_t inputs_read
= vs_prog_data
->inputs_read
;
71 assert((inputs_read
& ((1 << VERT_ATTRIB_GENERIC0
) - 1)) == 0);
72 elements
= inputs_read
>> VERT_ATTRIB_GENERIC0
;
76 /* On BDW+, we only need to allocate space for base ids. Setting up
77 * the actual vertex and instance id is a separate packet.
79 const bool needs_svgs_elem
= vs_prog_data
->uses_basevertex
||
80 vs_prog_data
->uses_baseinstance
;
82 /* On Haswell and prior, vertex and instance id are created by using the
83 * ComponentControl fields, so we need an element for any of them.
85 const bool needs_svgs_elem
= vs_prog_data
->uses_vertexid
||
86 vs_prog_data
->uses_instanceid
||
87 vs_prog_data
->uses_basevertex
||
88 vs_prog_data
->uses_baseinstance
;
91 uint32_t elem_count
= __builtin_popcount(elements
) + needs_svgs_elem
;
97 const uint32_t num_dwords
= 1 + elem_count
* 2;
98 p
= anv_batch_emitn(&pipeline
->batch
, num_dwords
,
99 GENX(3DSTATE_VERTEX_ELEMENTS
));
100 memset(p
+ 1, 0, (num_dwords
- 1) * 4);
102 for (uint32_t i
= 0; i
< info
->vertexAttributeDescriptionCount
; i
++) {
103 const VkVertexInputAttributeDescription
*desc
=
104 &info
->pVertexAttributeDescriptions
[i
];
105 enum isl_format format
= anv_get_isl_format(&pipeline
->device
->info
,
107 VK_IMAGE_ASPECT_COLOR_BIT
,
108 VK_IMAGE_TILING_LINEAR
);
110 assert(desc
->binding
< 32);
112 if ((elements
& (1 << desc
->location
)) == 0)
113 continue; /* Binding unused */
115 uint32_t slot
= __builtin_popcount(elements
& ((1 << desc
->location
) - 1));
117 struct GENX(VERTEX_ELEMENT_STATE
) element
= {
118 .VertexBufferIndex
= desc
->binding
,
120 .SourceElementFormat
= format
,
121 .EdgeFlagEnable
= false,
122 .SourceElementOffset
= desc
->offset
,
123 .Component0Control
= vertex_element_comp_control(format
, 0),
124 .Component1Control
= vertex_element_comp_control(format
, 1),
125 .Component2Control
= vertex_element_comp_control(format
, 2),
126 .Component3Control
= vertex_element_comp_control(format
, 3),
128 GENX(VERTEX_ELEMENT_STATE_pack
)(NULL
, &p
[1 + slot
* 2], &element
);
131 /* On Broadwell and later, we have a separate VF_INSTANCING packet
132 * that controls instancing. On Haswell and prior, that's part of
133 * VERTEX_BUFFER_STATE which we emit later.
135 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VF_INSTANCING
), vfi
) {
136 vfi
.InstancingEnable
= pipeline
->instancing_enable
[desc
->binding
],
137 vfi
.VertexElementIndex
= slot
,
138 /* Vulkan so far doesn't have an instance divisor, so
139 * this is always 1 (ignored if not instancing). */
140 vfi
.InstanceDataStepRate
= 1;
145 const uint32_t id_slot
= __builtin_popcount(elements
);
146 if (needs_svgs_elem
) {
147 /* From the Broadwell PRM for the 3D_Vertex_Component_Control enum:
148 * "Within a VERTEX_ELEMENT_STATE structure, if a Component
149 * Control field is set to something other than VFCOMP_STORE_SRC,
150 * no higher-numbered Component Control fields may be set to
153 * This means, that if we have BaseInstance, we need BaseVertex as
154 * well. Just do all or nothing.
156 uint32_t base_ctrl
= (vs_prog_data
->uses_basevertex
||
157 vs_prog_data
->uses_baseinstance
) ?
158 VFCOMP_STORE_SRC
: VFCOMP_STORE_0
;
160 struct GENX(VERTEX_ELEMENT_STATE
) element
= {
161 .VertexBufferIndex
= 32, /* Reserved for this */
163 .SourceElementFormat
= ISL_FORMAT_R32G32_UINT
,
164 .Component0Control
= base_ctrl
,
165 .Component1Control
= base_ctrl
,
167 .Component2Control
= VFCOMP_STORE_0
,
168 .Component3Control
= VFCOMP_STORE_0
,
170 .Component2Control
= VFCOMP_STORE_VID
,
171 .Component3Control
= VFCOMP_STORE_IID
,
174 GENX(VERTEX_ELEMENT_STATE_pack
)(NULL
, &p
[1 + id_slot
* 2], &element
);
178 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VF_SGVS
), sgvs
) {
179 sgvs
.VertexIDEnable
= vs_prog_data
->uses_vertexid
;
180 sgvs
.VertexIDComponentNumber
= 2;
181 sgvs
.VertexIDElementOffset
= id_slot
;
182 sgvs
.InstanceIDEnable
= vs_prog_data
->uses_instanceid
;
183 sgvs
.InstanceIDComponentNumber
= 3;
184 sgvs
.InstanceIDElementOffset
= id_slot
;
190 emit_urb_setup(struct anv_pipeline
*pipeline
)
192 #if GEN_GEN == 7 && !GEN_IS_HASWELL
193 struct anv_device
*device
= pipeline
->device
;
195 /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1:
197 * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall
198 * needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
199 * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
200 * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL
201 * needs to be sent before any combination of VS associated 3DSTATE."
203 anv_batch_emit(&pipeline
->batch
, GEN7_PIPE_CONTROL
, pc
) {
204 pc
.DepthStallEnable
= true;
205 pc
.PostSyncOperation
= WriteImmediateData
;
206 pc
.Address
= (struct anv_address
) { &device
->workaround_bo
, 0 };
210 for (int i
= MESA_SHADER_VERTEX
; i
<= MESA_SHADER_GEOMETRY
; i
++) {
211 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_URB_VS
), urb
) {
212 urb
._3DCommandSubOpcode
= 48 + i
;
213 urb
.VSURBStartingAddress
= pipeline
->urb
.start
[i
];
214 urb
.VSURBEntryAllocationSize
= pipeline
->urb
.size
[i
] - 1;
215 urb
.VSNumberofURBEntries
= pipeline
->urb
.entries
[i
];
221 emit_3dstate_sbe(struct anv_pipeline
*pipeline
)
223 const struct brw_vs_prog_data
*vs_prog_data
= get_vs_prog_data(pipeline
);
224 const struct brw_gs_prog_data
*gs_prog_data
= get_gs_prog_data(pipeline
);
225 const struct brw_wm_prog_data
*wm_prog_data
= get_wm_prog_data(pipeline
);
226 const struct brw_vue_map
*fs_input_map
;
228 if (pipeline
->gs_kernel
== NO_KERNEL
)
229 fs_input_map
= &vs_prog_data
->base
.vue_map
;
231 fs_input_map
= &gs_prog_data
->base
.vue_map
;
233 struct GENX(3DSTATE_SBE
) sbe
= {
234 GENX(3DSTATE_SBE_header
),
235 .AttributeSwizzleEnable
= true,
236 .PointSpriteTextureCoordinateOrigin
= UPPERLEFT
,
237 .NumberofSFOutputAttributes
= wm_prog_data
->num_varying_inputs
,
238 .ConstantInterpolationEnable
= wm_prog_data
->flat_inputs
,
241 .Attribute0ActiveComponentFormat
= ACF_XYZW
,
242 .Attribute1ActiveComponentFormat
= ACF_XYZW
,
243 .Attribute2ActiveComponentFormat
= ACF_XYZW
,
244 .Attribute3ActiveComponentFormat
= ACF_XYZW
,
245 .Attribute4ActiveComponentFormat
= ACF_XYZW
,
246 .Attribute5ActiveComponentFormat
= ACF_XYZW
,
247 .Attribute6ActiveComponentFormat
= ACF_XYZW
,
248 .Attribute7ActiveComponentFormat
= ACF_XYZW
,
249 .Attribute8ActiveComponentFormat
= ACF_XYZW
,
250 .Attribute9ActiveComponentFormat
= ACF_XYZW
,
251 .Attribute10ActiveComponentFormat
= ACF_XYZW
,
252 .Attribute11ActiveComponentFormat
= ACF_XYZW
,
253 .Attribute12ActiveComponentFormat
= ACF_XYZW
,
254 .Attribute13ActiveComponentFormat
= ACF_XYZW
,
255 .Attribute14ActiveComponentFormat
= ACF_XYZW
,
256 .Attribute15ActiveComponentFormat
= ACF_XYZW
,
257 /* wow, much field, very attribute */
258 .Attribute16ActiveComponentFormat
= ACF_XYZW
,
259 .Attribute17ActiveComponentFormat
= ACF_XYZW
,
260 .Attribute18ActiveComponentFormat
= ACF_XYZW
,
261 .Attribute19ActiveComponentFormat
= ACF_XYZW
,
262 .Attribute20ActiveComponentFormat
= ACF_XYZW
,
263 .Attribute21ActiveComponentFormat
= ACF_XYZW
,
264 .Attribute22ActiveComponentFormat
= ACF_XYZW
,
265 .Attribute23ActiveComponentFormat
= ACF_XYZW
,
266 .Attribute24ActiveComponentFormat
= ACF_XYZW
,
267 .Attribute25ActiveComponentFormat
= ACF_XYZW
,
268 .Attribute26ActiveComponentFormat
= ACF_XYZW
,
269 .Attribute27ActiveComponentFormat
= ACF_XYZW
,
270 .Attribute28ActiveComponentFormat
= ACF_XYZW
,
271 .Attribute29ActiveComponentFormat
= ACF_XYZW
,
272 .Attribute28ActiveComponentFormat
= ACF_XYZW
,
273 .Attribute29ActiveComponentFormat
= ACF_XYZW
,
274 .Attribute30ActiveComponentFormat
= ACF_XYZW
,
279 /* On Broadwell, they broke 3DSTATE_SBE into two packets */
280 struct GENX(3DSTATE_SBE_SWIZ
) swiz
= {
281 GENX(3DSTATE_SBE_SWIZ_header
),
287 int max_source_attr
= 0;
288 for (int attr
= 0; attr
< VARYING_SLOT_MAX
; attr
++) {
289 int input_index
= wm_prog_data
->urb_setup
[attr
];
294 if (attr
== VARYING_SLOT_PNTC
) {
295 sbe
.PointSpriteTextureCoordinateEnable
= 1 << input_index
;
299 const int slot
= fs_input_map
->varying_to_slot
[attr
];
301 if (input_index
>= 16)
305 /* This attribute does not exist in the VUE--that means that the
306 * vertex shader did not write to it. It could be that it's a
307 * regular varying read by the fragment shader but not written by
308 * the vertex shader or it's gl_PrimitiveID. In the first case the
309 * value is undefined, in the second it needs to be
312 swiz
.Attribute
[input_index
].ConstantSource
= PRIM_ID
;
313 swiz
.Attribute
[input_index
].ComponentOverrideX
= true;
314 swiz
.Attribute
[input_index
].ComponentOverrideY
= true;
315 swiz
.Attribute
[input_index
].ComponentOverrideZ
= true;
316 swiz
.Attribute
[input_index
].ComponentOverrideW
= true;
319 const int source_attr
= slot
- 2;
320 max_source_attr
= MAX2(max_source_attr
, source_attr
);
321 /* We have to subtract two slots to accout for the URB entry output
322 * read offset in the VS and GS stages.
324 swiz
.Attribute
[input_index
].SourceAttribute
= source_attr
;
328 sbe
.VertexURBEntryReadOffset
= 1; /* Skip the VUE header and position slots */
329 sbe
.VertexURBEntryReadLength
= DIV_ROUND_UP(max_source_attr
+ 1, 2);
331 uint32_t *dw
= anv_batch_emit_dwords(&pipeline
->batch
,
332 GENX(3DSTATE_SBE_length
));
333 GENX(3DSTATE_SBE_pack
)(&pipeline
->batch
, dw
, &sbe
);
336 dw
= anv_batch_emit_dwords(&pipeline
->batch
, GENX(3DSTATE_SBE_SWIZ_length
));
337 GENX(3DSTATE_SBE_SWIZ_pack
)(&pipeline
->batch
, dw
, &swiz
);
341 static inline uint32_t
342 scratch_space(const struct brw_stage_prog_data
*prog_data
)
344 return ffs(prog_data
->total_scratch
/ 2048);
347 static const uint32_t vk_to_gen_cullmode
[] = {
348 [VK_CULL_MODE_NONE
] = CULLMODE_NONE
,
349 [VK_CULL_MODE_FRONT_BIT
] = CULLMODE_FRONT
,
350 [VK_CULL_MODE_BACK_BIT
] = CULLMODE_BACK
,
351 [VK_CULL_MODE_FRONT_AND_BACK
] = CULLMODE_BOTH
354 static const uint32_t vk_to_gen_fillmode
[] = {
355 [VK_POLYGON_MODE_FILL
] = FILL_MODE_SOLID
,
356 [VK_POLYGON_MODE_LINE
] = FILL_MODE_WIREFRAME
,
357 [VK_POLYGON_MODE_POINT
] = FILL_MODE_POINT
,
360 static const uint32_t vk_to_gen_front_face
[] = {
361 [VK_FRONT_FACE_COUNTER_CLOCKWISE
] = 1,
362 [VK_FRONT_FACE_CLOCKWISE
] = 0
366 emit_rs_state(struct anv_pipeline
*pipeline
,
367 const VkPipelineRasterizationStateCreateInfo
*rs_info
,
368 const struct anv_render_pass
*pass
,
369 const struct anv_subpass
*subpass
,
370 const struct anv_graphics_pipeline_create_info
*extra
)
372 struct GENX(3DSTATE_SF
) sf
= {
373 GENX(3DSTATE_SF_header
),
376 sf
.ViewportTransformEnable
= !(extra
&& extra
->use_rectlist
);
377 sf
.StatisticsEnable
= true;
378 sf
.TriangleStripListProvokingVertexSelect
= 0;
379 sf
.LineStripListProvokingVertexSelect
= 0;
380 sf
.TriangleFanProvokingVertexSelect
= 1;
381 sf
.PointWidthSource
= Vertex
;
385 struct GENX(3DSTATE_RASTER
) raster
= {
386 GENX(3DSTATE_RASTER_header
),
392 /* For details on 3DSTATE_RASTER multisample state, see the BSpec table
393 * "Multisample Modes State".
396 raster
.DXMultisampleRasterizationEnable
= true;
397 raster
.ForcedSampleCount
= FSC_NUMRASTSAMPLES_0
;
398 raster
.ForceMultisampling
= false;
401 raster
.FrontWinding
= vk_to_gen_front_face
[rs_info
->frontFace
];
402 raster
.CullMode
= vk_to_gen_cullmode
[rs_info
->cullMode
];
403 raster
.FrontFaceFillMode
= vk_to_gen_fillmode
[rs_info
->polygonMode
];
404 raster
.BackFaceFillMode
= vk_to_gen_fillmode
[rs_info
->polygonMode
];
405 raster
.ScissorRectangleEnable
= !(extra
&& extra
->use_rectlist
);
408 /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */
409 raster
.ViewportZFarClipTestEnable
= !pipeline
->depth_clamp_enable
;
410 raster
.ViewportZNearClipTestEnable
= !pipeline
->depth_clamp_enable
;
412 raster
.ViewportZClipTestEnable
= !pipeline
->depth_clamp_enable
;
415 raster
.GlobalDepthOffsetEnableSolid
= rs_info
->depthBiasEnable
;
416 raster
.GlobalDepthOffsetEnableWireframe
= rs_info
->depthBiasEnable
;
417 raster
.GlobalDepthOffsetEnablePoint
= rs_info
->depthBiasEnable
;
420 /* Gen7 requires that we provide the depth format in 3DSTATE_SF so that it
421 * can get the depth offsets correct.
423 if (subpass
->depth_stencil_attachment
< pass
->attachment_count
) {
425 pass
->attachments
[subpass
->depth_stencil_attachment
].format
;
426 assert(vk_format_is_depth_or_stencil(vk_format
));
427 if (vk_format_aspects(vk_format
) & VK_IMAGE_ASPECT_DEPTH_BIT
) {
428 enum isl_format isl_format
=
429 anv_get_isl_format(&pipeline
->device
->info
, vk_format
,
430 VK_IMAGE_ASPECT_DEPTH_BIT
,
431 VK_IMAGE_TILING_OPTIMAL
);
432 sf
.DepthBufferSurfaceFormat
=
433 isl_format_get_depth_format(isl_format
, false);
439 GENX(3DSTATE_SF_pack
)(NULL
, pipeline
->gen8
.sf
, &sf
);
440 GENX(3DSTATE_RASTER_pack
)(NULL
, pipeline
->gen8
.raster
, &raster
);
443 GENX(3DSTATE_SF_pack
)(NULL
, &pipeline
->gen7
.sf
, &sf
);
447 static const uint32_t vk_to_gen_logic_op
[] = {
448 [VK_LOGIC_OP_COPY
] = LOGICOP_COPY
,
449 [VK_LOGIC_OP_CLEAR
] = LOGICOP_CLEAR
,
450 [VK_LOGIC_OP_AND
] = LOGICOP_AND
,
451 [VK_LOGIC_OP_AND_REVERSE
] = LOGICOP_AND_REVERSE
,
452 [VK_LOGIC_OP_AND_INVERTED
] = LOGICOP_AND_INVERTED
,
453 [VK_LOGIC_OP_NO_OP
] = LOGICOP_NOOP
,
454 [VK_LOGIC_OP_XOR
] = LOGICOP_XOR
,
455 [VK_LOGIC_OP_OR
] = LOGICOP_OR
,
456 [VK_LOGIC_OP_NOR
] = LOGICOP_NOR
,
457 [VK_LOGIC_OP_EQUIVALENT
] = LOGICOP_EQUIV
,
458 [VK_LOGIC_OP_INVERT
] = LOGICOP_INVERT
,
459 [VK_LOGIC_OP_OR_REVERSE
] = LOGICOP_OR_REVERSE
,
460 [VK_LOGIC_OP_COPY_INVERTED
] = LOGICOP_COPY_INVERTED
,
461 [VK_LOGIC_OP_OR_INVERTED
] = LOGICOP_OR_INVERTED
,
462 [VK_LOGIC_OP_NAND
] = LOGICOP_NAND
,
463 [VK_LOGIC_OP_SET
] = LOGICOP_SET
,
466 static const uint32_t vk_to_gen_blend
[] = {
467 [VK_BLEND_FACTOR_ZERO
] = BLENDFACTOR_ZERO
,
468 [VK_BLEND_FACTOR_ONE
] = BLENDFACTOR_ONE
,
469 [VK_BLEND_FACTOR_SRC_COLOR
] = BLENDFACTOR_SRC_COLOR
,
470 [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR
] = BLENDFACTOR_INV_SRC_COLOR
,
471 [VK_BLEND_FACTOR_DST_COLOR
] = BLENDFACTOR_DST_COLOR
,
472 [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR
] = BLENDFACTOR_INV_DST_COLOR
,
473 [VK_BLEND_FACTOR_SRC_ALPHA
] = BLENDFACTOR_SRC_ALPHA
,
474 [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA
] = BLENDFACTOR_INV_SRC_ALPHA
,
475 [VK_BLEND_FACTOR_DST_ALPHA
] = BLENDFACTOR_DST_ALPHA
,
476 [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA
] = BLENDFACTOR_INV_DST_ALPHA
,
477 [VK_BLEND_FACTOR_CONSTANT_COLOR
] = BLENDFACTOR_CONST_COLOR
,
478 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR
]= BLENDFACTOR_INV_CONST_COLOR
,
479 [VK_BLEND_FACTOR_CONSTANT_ALPHA
] = BLENDFACTOR_CONST_ALPHA
,
480 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA
]= BLENDFACTOR_INV_CONST_ALPHA
,
481 [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE
] = BLENDFACTOR_SRC_ALPHA_SATURATE
,
482 [VK_BLEND_FACTOR_SRC1_COLOR
] = BLENDFACTOR_SRC1_COLOR
,
483 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR
] = BLENDFACTOR_INV_SRC1_COLOR
,
484 [VK_BLEND_FACTOR_SRC1_ALPHA
] = BLENDFACTOR_SRC1_ALPHA
,
485 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA
] = BLENDFACTOR_INV_SRC1_ALPHA
,
488 static const uint32_t vk_to_gen_blend_op
[] = {
489 [VK_BLEND_OP_ADD
] = BLENDFUNCTION_ADD
,
490 [VK_BLEND_OP_SUBTRACT
] = BLENDFUNCTION_SUBTRACT
,
491 [VK_BLEND_OP_REVERSE_SUBTRACT
] = BLENDFUNCTION_REVERSE_SUBTRACT
,
492 [VK_BLEND_OP_MIN
] = BLENDFUNCTION_MIN
,
493 [VK_BLEND_OP_MAX
] = BLENDFUNCTION_MAX
,
496 static const uint32_t vk_to_gen_compare_op
[] = {
497 [VK_COMPARE_OP_NEVER
] = PREFILTEROPNEVER
,
498 [VK_COMPARE_OP_LESS
] = PREFILTEROPLESS
,
499 [VK_COMPARE_OP_EQUAL
] = PREFILTEROPEQUAL
,
500 [VK_COMPARE_OP_LESS_OR_EQUAL
] = PREFILTEROPLEQUAL
,
501 [VK_COMPARE_OP_GREATER
] = PREFILTEROPGREATER
,
502 [VK_COMPARE_OP_NOT_EQUAL
] = PREFILTEROPNOTEQUAL
,
503 [VK_COMPARE_OP_GREATER_OR_EQUAL
] = PREFILTEROPGEQUAL
,
504 [VK_COMPARE_OP_ALWAYS
] = PREFILTEROPALWAYS
,
507 static const uint32_t vk_to_gen_stencil_op
[] = {
508 [VK_STENCIL_OP_KEEP
] = STENCILOP_KEEP
,
509 [VK_STENCIL_OP_ZERO
] = STENCILOP_ZERO
,
510 [VK_STENCIL_OP_REPLACE
] = STENCILOP_REPLACE
,
511 [VK_STENCIL_OP_INCREMENT_AND_CLAMP
] = STENCILOP_INCRSAT
,
512 [VK_STENCIL_OP_DECREMENT_AND_CLAMP
] = STENCILOP_DECRSAT
,
513 [VK_STENCIL_OP_INVERT
] = STENCILOP_INVERT
,
514 [VK_STENCIL_OP_INCREMENT_AND_WRAP
] = STENCILOP_INCR
,
515 [VK_STENCIL_OP_DECREMENT_AND_WRAP
] = STENCILOP_DECR
,
519 emit_ds_state(struct anv_pipeline
*pipeline
,
520 const VkPipelineDepthStencilStateCreateInfo
*info
,
521 const struct anv_render_pass
*pass
,
522 const struct anv_subpass
*subpass
)
525 # define depth_stencil_dw pipeline->gen7.depth_stencil_state
527 # define depth_stencil_dw pipeline->gen8.wm_depth_stencil
529 # define depth_stencil_dw pipeline->gen9.wm_depth_stencil
533 /* We're going to OR this together with the dynamic state. We need
534 * to make sure it's initialized to something useful.
536 memset(depth_stencil_dw
, 0, sizeof(depth_stencil_dw
));
540 /* VkBool32 depthBoundsTestEnable; // optional (depth_bounds_test) */
543 struct GENX(DEPTH_STENCIL_STATE
) depth_stencil
= {
545 struct GENX(3DSTATE_WM_DEPTH_STENCIL
) depth_stencil
= {
547 .DepthTestEnable
= info
->depthTestEnable
,
548 .DepthBufferWriteEnable
= info
->depthWriteEnable
,
549 .DepthTestFunction
= vk_to_gen_compare_op
[info
->depthCompareOp
],
550 .DoubleSidedStencilEnable
= true,
552 .StencilTestEnable
= info
->stencilTestEnable
,
553 .StencilBufferWriteEnable
= info
->stencilTestEnable
,
554 .StencilFailOp
= vk_to_gen_stencil_op
[info
->front
.failOp
],
555 .StencilPassDepthPassOp
= vk_to_gen_stencil_op
[info
->front
.passOp
],
556 .StencilPassDepthFailOp
= vk_to_gen_stencil_op
[info
->front
.depthFailOp
],
557 .StencilTestFunction
= vk_to_gen_compare_op
[info
->front
.compareOp
],
558 .BackfaceStencilFailOp
= vk_to_gen_stencil_op
[info
->back
.failOp
],
559 .BackfaceStencilPassDepthPassOp
= vk_to_gen_stencil_op
[info
->back
.passOp
],
560 .BackfaceStencilPassDepthFailOp
=vk_to_gen_stencil_op
[info
->back
.depthFailOp
],
561 .BackfaceStencilTestFunction
= vk_to_gen_compare_op
[info
->back
.compareOp
],
564 VkImageAspectFlags aspects
= 0;
565 if (pass
->attachments
== NULL
) {
566 /* This comes from meta. Assume we have verything. */
567 aspects
= VK_IMAGE_ASPECT_DEPTH_BIT
| VK_IMAGE_ASPECT_STENCIL_BIT
;
568 } else if (subpass
->depth_stencil_attachment
!= VK_ATTACHMENT_UNUSED
) {
569 VkFormat depth_stencil_format
=
570 pass
->attachments
[subpass
->depth_stencil_attachment
].format
;
571 aspects
= vk_format_aspects(depth_stencil_format
);
574 /* The Vulkan spec requires that if either depth or stencil is not present,
575 * the pipeline is to act as if the test silently passes.
577 if (!(aspects
& VK_IMAGE_ASPECT_DEPTH_BIT
)) {
578 depth_stencil
.DepthBufferWriteEnable
= false;
579 depth_stencil
.DepthTestFunction
= PREFILTEROPALWAYS
;
582 if (!(aspects
& VK_IMAGE_ASPECT_STENCIL_BIT
)) {
583 depth_stencil
.StencilBufferWriteEnable
= false;
584 depth_stencil
.StencilTestFunction
= PREFILTEROPALWAYS
;
585 depth_stencil
.BackfaceStencilTestFunction
= PREFILTEROPALWAYS
;
588 /* From the Broadwell PRM:
590 * "If Depth_Test_Enable = 1 AND Depth_Test_func = EQUAL, the
591 * Depth_Write_Enable must be set to 0."
593 if (info
->depthTestEnable
&& info
->depthCompareOp
== VK_COMPARE_OP_EQUAL
)
594 depth_stencil
.DepthBufferWriteEnable
= false;
597 GENX(DEPTH_STENCIL_STATE_pack
)(NULL
, depth_stencil_dw
, &depth_stencil
);
599 GENX(3DSTATE_WM_DEPTH_STENCIL_pack
)(NULL
, depth_stencil_dw
, &depth_stencil
);
604 emit_cb_state(struct anv_pipeline
*pipeline
,
605 const VkPipelineColorBlendStateCreateInfo
*info
,
606 const VkPipelineMultisampleStateCreateInfo
*ms_info
)
608 struct anv_device
*device
= pipeline
->device
;
610 const uint32_t num_dwords
= GENX(BLEND_STATE_length
);
611 pipeline
->blend_state
=
612 anv_state_pool_alloc(&device
->dynamic_state_pool
, num_dwords
* 4, 64);
614 struct GENX(BLEND_STATE
) blend_state
= {
616 .AlphaToCoverageEnable
= ms_info
&& ms_info
->alphaToCoverageEnable
,
617 .AlphaToOneEnable
= ms_info
&& ms_info
->alphaToOneEnable
,
619 /* Make sure it gets zeroed */
620 .Entry
= { { 0, }, },
624 /* Default everything to disabled */
625 for (uint32_t i
= 0; i
< 8; i
++) {
626 blend_state
.Entry
[i
].WriteDisableAlpha
= true;
627 blend_state
.Entry
[i
].WriteDisableRed
= true;
628 blend_state
.Entry
[i
].WriteDisableGreen
= true;
629 blend_state
.Entry
[i
].WriteDisableBlue
= true;
632 struct anv_pipeline_bind_map
*map
=
633 &pipeline
->bindings
[MESA_SHADER_FRAGMENT
];
635 bool has_writeable_rt
= false;
636 for (unsigned i
= 0; i
< map
->surface_count
; i
++) {
637 struct anv_pipeline_binding
*binding
= &map
->surface_to_descriptor
[i
];
639 /* All color attachments are at the beginning of the binding table */
640 if (binding
->set
!= ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS
)
643 /* We can have at most 8 attachments */
646 if (binding
->index
>= info
->attachmentCount
)
649 assert(binding
->binding
== 0);
650 const VkPipelineColorBlendAttachmentState
*a
=
651 &info
->pAttachments
[binding
->index
];
653 blend_state
.Entry
[i
] = (struct GENX(BLEND_STATE_ENTRY
)) {
655 .AlphaToCoverageEnable
= ms_info
&& ms_info
->alphaToCoverageEnable
,
656 .AlphaToOneEnable
= ms_info
&& ms_info
->alphaToOneEnable
,
658 .LogicOpEnable
= info
->logicOpEnable
,
659 .LogicOpFunction
= vk_to_gen_logic_op
[info
->logicOp
],
660 .ColorBufferBlendEnable
= a
->blendEnable
,
661 .ColorClampRange
= COLORCLAMP_RTFORMAT
,
662 .PreBlendColorClampEnable
= true,
663 .PostBlendColorClampEnable
= true,
664 .SourceBlendFactor
= vk_to_gen_blend
[a
->srcColorBlendFactor
],
665 .DestinationBlendFactor
= vk_to_gen_blend
[a
->dstColorBlendFactor
],
666 .ColorBlendFunction
= vk_to_gen_blend_op
[a
->colorBlendOp
],
667 .SourceAlphaBlendFactor
= vk_to_gen_blend
[a
->srcAlphaBlendFactor
],
668 .DestinationAlphaBlendFactor
= vk_to_gen_blend
[a
->dstAlphaBlendFactor
],
669 .AlphaBlendFunction
= vk_to_gen_blend_op
[a
->alphaBlendOp
],
670 .WriteDisableAlpha
= !(a
->colorWriteMask
& VK_COLOR_COMPONENT_A_BIT
),
671 .WriteDisableRed
= !(a
->colorWriteMask
& VK_COLOR_COMPONENT_R_BIT
),
672 .WriteDisableGreen
= !(a
->colorWriteMask
& VK_COLOR_COMPONENT_G_BIT
),
673 .WriteDisableBlue
= !(a
->colorWriteMask
& VK_COLOR_COMPONENT_B_BIT
),
676 if (a
->srcColorBlendFactor
!= a
->srcAlphaBlendFactor
||
677 a
->dstColorBlendFactor
!= a
->dstAlphaBlendFactor
||
678 a
->colorBlendOp
!= a
->alphaBlendOp
) {
680 blend_state
.IndependentAlphaBlendEnable
= true;
682 blend_state
.Entry
[i
].IndependentAlphaBlendEnable
= true;
686 if (a
->colorWriteMask
!= 0)
687 has_writeable_rt
= true;
689 /* Our hardware applies the blend factor prior to the blend function
690 * regardless of what function is used. Technically, this means the
691 * hardware can do MORE than GL or Vulkan specify. However, it also
692 * means that, for MIN and MAX, we have to stomp the blend factor to
693 * ONE to make it a no-op.
695 if (a
->colorBlendOp
== VK_BLEND_OP_MIN
||
696 a
->colorBlendOp
== VK_BLEND_OP_MAX
) {
697 blend_state
.Entry
[i
].SourceBlendFactor
= BLENDFACTOR_ONE
;
698 blend_state
.Entry
[i
].DestinationBlendFactor
= BLENDFACTOR_ONE
;
700 if (a
->alphaBlendOp
== VK_BLEND_OP_MIN
||
701 a
->alphaBlendOp
== VK_BLEND_OP_MAX
) {
702 blend_state
.Entry
[i
].SourceAlphaBlendFactor
= BLENDFACTOR_ONE
;
703 blend_state
.Entry
[i
].DestinationAlphaBlendFactor
= BLENDFACTOR_ONE
;
708 struct GENX(BLEND_STATE_ENTRY
) *bs0
= &blend_state
.Entry
[0];
709 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PS_BLEND
), blend
) {
710 blend
.AlphaToCoverageEnable
= blend_state
.AlphaToCoverageEnable
;
711 blend
.HasWriteableRT
= has_writeable_rt
;
712 blend
.ColorBufferBlendEnable
= bs0
->ColorBufferBlendEnable
;
713 blend
.SourceAlphaBlendFactor
= bs0
->SourceAlphaBlendFactor
;
714 blend
.DestinationAlphaBlendFactor
= bs0
->DestinationAlphaBlendFactor
;
715 blend
.SourceBlendFactor
= bs0
->SourceBlendFactor
;
716 blend
.DestinationBlendFactor
= bs0
->DestinationBlendFactor
;
717 blend
.AlphaTestEnable
= false;
718 blend
.IndependentAlphaBlendEnable
=
719 blend_state
.IndependentAlphaBlendEnable
;
722 (void)has_writeable_rt
;
725 GENX(BLEND_STATE_pack
)(NULL
, pipeline
->blend_state
.map
, &blend_state
);
726 if (!device
->info
.has_llc
)
727 anv_state_clflush(pipeline
->blend_state
);
729 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_BLEND_STATE_POINTERS
), bsp
) {
730 bsp
.BlendStatePointer
= pipeline
->blend_state
.offset
;
732 bsp
.BlendStatePointerValid
= true;
738 emit_3dstate_clip(struct anv_pipeline
*pipeline
,
739 const VkPipelineViewportStateCreateInfo
*vp_info
,
740 const VkPipelineRasterizationStateCreateInfo
*rs_info
,
741 const struct anv_graphics_pipeline_create_info
*extra
)
743 const struct brw_wm_prog_data
*wm_prog_data
= get_wm_prog_data(pipeline
);
745 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_CLIP
), clip
) {
746 clip
.ClipEnable
= !(extra
&& extra
->use_rectlist
);
747 clip
.EarlyCullEnable
= true;
748 clip
.APIMode
= APIMODE_D3D
,
749 clip
.ViewportXYClipTestEnable
= true;
751 clip
.ClipMode
= CLIPMODE_NORMAL
;
753 clip
.TriangleStripListProvokingVertexSelect
= 0;
754 clip
.LineStripListProvokingVertexSelect
= 0;
755 clip
.TriangleFanProvokingVertexSelect
= 1;
757 clip
.MinimumPointWidth
= 0.125;
758 clip
.MaximumPointWidth
= 255.875;
759 clip
.MaximumVPIndex
= vp_info
->viewportCount
- 1;
762 clip
.FrontWinding
= vk_to_gen_front_face
[rs_info
->frontFace
];
763 clip
.CullMode
= vk_to_gen_cullmode
[rs_info
->cullMode
];
764 clip
.ViewportZClipTestEnable
= !pipeline
->depth_clamp_enable
;
766 clip
.NonPerspectiveBarycentricEnable
= wm_prog_data
?
767 (wm_prog_data
->barycentric_interp_modes
& 0x38) != 0 : 0;
773 emit_3dstate_streamout(struct anv_pipeline
*pipeline
,
774 const VkPipelineRasterizationStateCreateInfo
*rs_info
)
776 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_STREAMOUT
), so
) {
777 so
.RenderingDisable
= rs_info
->rasterizerDiscardEnable
;