2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "vk_format_info.h"
25 #include "genX_multisample.h"
28 vertex_element_comp_control(enum isl_format format
, unsigned comp
)
32 case 0: bits
= isl_format_layouts
[format
].channels
.r
.bits
; break;
33 case 1: bits
= isl_format_layouts
[format
].channels
.g
.bits
; break;
34 case 2: bits
= isl_format_layouts
[format
].channels
.b
.bits
; break;
35 case 3: bits
= isl_format_layouts
[format
].channels
.a
.bits
; break;
36 default: unreachable("Invalid component");
40 return VFCOMP_STORE_SRC
;
41 } else if (comp
< 3) {
42 return VFCOMP_STORE_0
;
43 } else if (isl_format_layouts
[format
].channels
.r
.type
== ISL_UINT
||
44 isl_format_layouts
[format
].channels
.r
.type
== ISL_SINT
) {
46 return VFCOMP_STORE_1_INT
;
49 return VFCOMP_STORE_1_FP
;
54 emit_vertex_input(struct anv_pipeline
*pipeline
,
55 const VkPipelineVertexInputStateCreateInfo
*info
,
56 const struct anv_graphics_pipeline_create_info
*extra
)
58 const struct brw_vs_prog_data
*vs_prog_data
= get_vs_prog_data(pipeline
);
61 if (extra
&& extra
->disable_vs
) {
62 /* If the VS is disabled, just assume the user knows what they're
63 * doing and apply the layout blindly. This can only come from
64 * meta, so this *should* be safe.
67 for (uint32_t i
= 0; i
< info
->vertexAttributeDescriptionCount
; i
++)
68 elements
|= (1 << info
->pVertexAttributeDescriptions
[i
].location
);
70 /* Pull inputs_read out of the VS prog data */
71 uint64_t inputs_read
= vs_prog_data
->inputs_read
;
72 assert((inputs_read
& ((1 << VERT_ATTRIB_GENERIC0
) - 1)) == 0);
73 elements
= inputs_read
>> VERT_ATTRIB_GENERIC0
;
77 /* On BDW+, we only need to allocate space for base ids. Setting up
78 * the actual vertex and instance id is a separate packet.
80 const bool needs_svgs_elem
= vs_prog_data
->uses_basevertex
||
81 vs_prog_data
->uses_baseinstance
;
83 /* On Haswell and prior, vertex and instance id are created by using the
84 * ComponentControl fields, so we need an element for any of them.
86 const bool needs_svgs_elem
= vs_prog_data
->uses_vertexid
||
87 vs_prog_data
->uses_instanceid
||
88 vs_prog_data
->uses_basevertex
||
89 vs_prog_data
->uses_baseinstance
;
92 uint32_t elem_count
= __builtin_popcount(elements
) + needs_svgs_elem
;
98 const uint32_t num_dwords
= 1 + elem_count
* 2;
99 p
= anv_batch_emitn(&pipeline
->batch
, num_dwords
,
100 GENX(3DSTATE_VERTEX_ELEMENTS
));
101 memset(p
+ 1, 0, (num_dwords
- 1) * 4);
103 for (uint32_t i
= 0; i
< info
->vertexAttributeDescriptionCount
; i
++) {
104 const VkVertexInputAttributeDescription
*desc
=
105 &info
->pVertexAttributeDescriptions
[i
];
106 enum isl_format format
= anv_get_isl_format(&pipeline
->device
->info
,
108 VK_IMAGE_ASPECT_COLOR_BIT
,
109 VK_IMAGE_TILING_LINEAR
);
111 assert(desc
->binding
< 32);
113 if ((elements
& (1 << desc
->location
)) == 0)
114 continue; /* Binding unused */
116 uint32_t slot
= __builtin_popcount(elements
& ((1 << desc
->location
) - 1));
118 struct GENX(VERTEX_ELEMENT_STATE
) element
= {
119 .VertexBufferIndex
= desc
->binding
,
121 .SourceElementFormat
= format
,
122 .EdgeFlagEnable
= false,
123 .SourceElementOffset
= desc
->offset
,
124 .Component0Control
= vertex_element_comp_control(format
, 0),
125 .Component1Control
= vertex_element_comp_control(format
, 1),
126 .Component2Control
= vertex_element_comp_control(format
, 2),
127 .Component3Control
= vertex_element_comp_control(format
, 3),
129 GENX(VERTEX_ELEMENT_STATE_pack
)(NULL
, &p
[1 + slot
* 2], &element
);
132 /* On Broadwell and later, we have a separate VF_INSTANCING packet
133 * that controls instancing. On Haswell and prior, that's part of
134 * VERTEX_BUFFER_STATE which we emit later.
136 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VF_INSTANCING
), vfi
) {
137 vfi
.InstancingEnable
= pipeline
->instancing_enable
[desc
->binding
],
138 vfi
.VertexElementIndex
= slot
,
139 /* Vulkan so far doesn't have an instance divisor, so
140 * this is always 1 (ignored if not instancing). */
141 vfi
.InstanceDataStepRate
= 1;
146 const uint32_t id_slot
= __builtin_popcount(elements
);
147 if (needs_svgs_elem
) {
148 /* From the Broadwell PRM for the 3D_Vertex_Component_Control enum:
149 * "Within a VERTEX_ELEMENT_STATE structure, if a Component
150 * Control field is set to something other than VFCOMP_STORE_SRC,
151 * no higher-numbered Component Control fields may be set to
154 * This means, that if we have BaseInstance, we need BaseVertex as
155 * well. Just do all or nothing.
157 uint32_t base_ctrl
= (vs_prog_data
->uses_basevertex
||
158 vs_prog_data
->uses_baseinstance
) ?
159 VFCOMP_STORE_SRC
: VFCOMP_STORE_0
;
161 struct GENX(VERTEX_ELEMENT_STATE
) element
= {
162 .VertexBufferIndex
= 32, /* Reserved for this */
164 .SourceElementFormat
= ISL_FORMAT_R32G32_UINT
,
165 .Component0Control
= base_ctrl
,
166 .Component1Control
= base_ctrl
,
168 .Component2Control
= VFCOMP_STORE_0
,
169 .Component3Control
= VFCOMP_STORE_0
,
171 .Component2Control
= VFCOMP_STORE_VID
,
172 .Component3Control
= VFCOMP_STORE_IID
,
175 GENX(VERTEX_ELEMENT_STATE_pack
)(NULL
, &p
[1 + id_slot
* 2], &element
);
179 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VF_SGVS
), sgvs
) {
180 sgvs
.VertexIDEnable
= vs_prog_data
->uses_vertexid
;
181 sgvs
.VertexIDComponentNumber
= 2;
182 sgvs
.VertexIDElementOffset
= id_slot
;
183 sgvs
.InstanceIDEnable
= vs_prog_data
->uses_instanceid
;
184 sgvs
.InstanceIDComponentNumber
= 3;
185 sgvs
.InstanceIDElementOffset
= id_slot
;
191 emit_urb_setup(struct anv_pipeline
*pipeline
)
193 #if GEN_GEN == 7 && !GEN_IS_HASWELL
194 struct anv_device
*device
= pipeline
->device
;
196 /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1:
198 * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall
199 * needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
200 * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
201 * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL
202 * needs to be sent before any combination of VS associated 3DSTATE."
204 anv_batch_emit(&pipeline
->batch
, GEN7_PIPE_CONTROL
, pc
) {
205 pc
.DepthStallEnable
= true;
206 pc
.PostSyncOperation
= WriteImmediateData
;
207 pc
.Address
= (struct anv_address
) { &device
->workaround_bo
, 0 };
211 for (int i
= MESA_SHADER_VERTEX
; i
<= MESA_SHADER_GEOMETRY
; i
++) {
212 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_URB_VS
), urb
) {
213 urb
._3DCommandSubOpcode
= 48 + i
;
214 urb
.VSURBStartingAddress
= pipeline
->urb
.start
[i
];
215 urb
.VSURBEntryAllocationSize
= pipeline
->urb
.size
[i
] - 1;
216 urb
.VSNumberofURBEntries
= pipeline
->urb
.entries
[i
];
222 emit_3dstate_sbe(struct anv_pipeline
*pipeline
)
224 const struct brw_vs_prog_data
*vs_prog_data
= get_vs_prog_data(pipeline
);
225 const struct brw_gs_prog_data
*gs_prog_data
= get_gs_prog_data(pipeline
);
226 const struct brw_wm_prog_data
*wm_prog_data
= get_wm_prog_data(pipeline
);
227 const struct brw_vue_map
*fs_input_map
;
229 if (pipeline
->gs_kernel
== NO_KERNEL
)
230 fs_input_map
= &vs_prog_data
->base
.vue_map
;
232 fs_input_map
= &gs_prog_data
->base
.vue_map
;
234 struct GENX(3DSTATE_SBE
) sbe
= {
235 GENX(3DSTATE_SBE_header
),
236 .AttributeSwizzleEnable
= true,
237 .PointSpriteTextureCoordinateOrigin
= UPPERLEFT
,
238 .NumberofSFOutputAttributes
= wm_prog_data
->num_varying_inputs
,
239 .ConstantInterpolationEnable
= wm_prog_data
->flat_inputs
,
243 for (unsigned i
= 0; i
< 32; i
++)
244 sbe
.AttributeActiveComponentFormat
[i
] = ACF_XYZW
;
248 /* On Broadwell, they broke 3DSTATE_SBE into two packets */
249 struct GENX(3DSTATE_SBE_SWIZ
) swiz
= {
250 GENX(3DSTATE_SBE_SWIZ_header
),
256 int max_source_attr
= 0;
257 for (int attr
= 0; attr
< VARYING_SLOT_MAX
; attr
++) {
258 int input_index
= wm_prog_data
->urb_setup
[attr
];
263 if (attr
== VARYING_SLOT_PNTC
) {
264 sbe
.PointSpriteTextureCoordinateEnable
= 1 << input_index
;
268 const int slot
= fs_input_map
->varying_to_slot
[attr
];
270 if (input_index
>= 16)
274 /* This attribute does not exist in the VUE--that means that the
275 * vertex shader did not write to it. It could be that it's a
276 * regular varying read by the fragment shader but not written by
277 * the vertex shader or it's gl_PrimitiveID. In the first case the
278 * value is undefined, in the second it needs to be
281 swiz
.Attribute
[input_index
].ConstantSource
= PRIM_ID
;
282 swiz
.Attribute
[input_index
].ComponentOverrideX
= true;
283 swiz
.Attribute
[input_index
].ComponentOverrideY
= true;
284 swiz
.Attribute
[input_index
].ComponentOverrideZ
= true;
285 swiz
.Attribute
[input_index
].ComponentOverrideW
= true;
288 const int source_attr
= slot
- 2;
289 max_source_attr
= MAX2(max_source_attr
, source_attr
);
290 /* We have to subtract two slots to accout for the URB entry output
291 * read offset in the VS and GS stages.
293 swiz
.Attribute
[input_index
].SourceAttribute
= source_attr
;
297 sbe
.VertexURBEntryReadOffset
= 1; /* Skip the VUE header and position slots */
298 sbe
.VertexURBEntryReadLength
= DIV_ROUND_UP(max_source_attr
+ 1, 2);
300 uint32_t *dw
= anv_batch_emit_dwords(&pipeline
->batch
,
301 GENX(3DSTATE_SBE_length
));
302 GENX(3DSTATE_SBE_pack
)(&pipeline
->batch
, dw
, &sbe
);
305 dw
= anv_batch_emit_dwords(&pipeline
->batch
, GENX(3DSTATE_SBE_SWIZ_length
));
306 GENX(3DSTATE_SBE_SWIZ_pack
)(&pipeline
->batch
, dw
, &swiz
);
310 static inline uint32_t
311 scratch_space(const struct brw_stage_prog_data
*prog_data
)
313 return ffs(prog_data
->total_scratch
/ 2048);
316 static const uint32_t vk_to_gen_cullmode
[] = {
317 [VK_CULL_MODE_NONE
] = CULLMODE_NONE
,
318 [VK_CULL_MODE_FRONT_BIT
] = CULLMODE_FRONT
,
319 [VK_CULL_MODE_BACK_BIT
] = CULLMODE_BACK
,
320 [VK_CULL_MODE_FRONT_AND_BACK
] = CULLMODE_BOTH
323 static const uint32_t vk_to_gen_fillmode
[] = {
324 [VK_POLYGON_MODE_FILL
] = FILL_MODE_SOLID
,
325 [VK_POLYGON_MODE_LINE
] = FILL_MODE_WIREFRAME
,
326 [VK_POLYGON_MODE_POINT
] = FILL_MODE_POINT
,
329 static const uint32_t vk_to_gen_front_face
[] = {
330 [VK_FRONT_FACE_COUNTER_CLOCKWISE
] = 1,
331 [VK_FRONT_FACE_CLOCKWISE
] = 0
335 emit_rs_state(struct anv_pipeline
*pipeline
,
336 const VkPipelineRasterizationStateCreateInfo
*rs_info
,
337 const VkPipelineMultisampleStateCreateInfo
*ms_info
,
338 const struct anv_render_pass
*pass
,
339 const struct anv_subpass
*subpass
,
340 const struct anv_graphics_pipeline_create_info
*extra
)
342 struct GENX(3DSTATE_SF
) sf
= {
343 GENX(3DSTATE_SF_header
),
346 sf
.ViewportTransformEnable
= !(extra
&& extra
->use_rectlist
);
347 sf
.StatisticsEnable
= true;
348 sf
.TriangleStripListProvokingVertexSelect
= 0;
349 sf
.LineStripListProvokingVertexSelect
= 0;
350 sf
.TriangleFanProvokingVertexSelect
= 1;
351 sf
.PointWidthSource
= Vertex
;
355 struct GENX(3DSTATE_RASTER
) raster
= {
356 GENX(3DSTATE_RASTER_header
),
362 /* For details on 3DSTATE_RASTER multisample state, see the BSpec table
363 * "Multisample Modes State".
366 raster
.DXMultisampleRasterizationEnable
= true;
367 raster
.ForcedSampleCount
= FSC_NUMRASTSAMPLES_0
;
368 raster
.ForceMultisampling
= false;
370 raster
.MultisampleRasterizationMode
=
371 (ms_info
&& ms_info
->rasterizationSamples
> 1) ?
372 MSRASTMODE_ON_PATTERN
: MSRASTMODE_OFF_PIXEL
;
375 raster
.FrontWinding
= vk_to_gen_front_face
[rs_info
->frontFace
];
376 raster
.CullMode
= vk_to_gen_cullmode
[rs_info
->cullMode
];
377 raster
.FrontFaceFillMode
= vk_to_gen_fillmode
[rs_info
->polygonMode
];
378 raster
.BackFaceFillMode
= vk_to_gen_fillmode
[rs_info
->polygonMode
];
379 raster
.ScissorRectangleEnable
= !(extra
&& extra
->use_rectlist
);
382 /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */
383 raster
.ViewportZFarClipTestEnable
= !pipeline
->depth_clamp_enable
;
384 raster
.ViewportZNearClipTestEnable
= !pipeline
->depth_clamp_enable
;
386 raster
.ViewportZClipTestEnable
= !pipeline
->depth_clamp_enable
;
389 raster
.GlobalDepthOffsetEnableSolid
= rs_info
->depthBiasEnable
;
390 raster
.GlobalDepthOffsetEnableWireframe
= rs_info
->depthBiasEnable
;
391 raster
.GlobalDepthOffsetEnablePoint
= rs_info
->depthBiasEnable
;
394 /* Gen7 requires that we provide the depth format in 3DSTATE_SF so that it
395 * can get the depth offsets correct.
397 if (subpass
->depth_stencil_attachment
< pass
->attachment_count
) {
399 pass
->attachments
[subpass
->depth_stencil_attachment
].format
;
400 assert(vk_format_is_depth_or_stencil(vk_format
));
401 if (vk_format_aspects(vk_format
) & VK_IMAGE_ASPECT_DEPTH_BIT
) {
402 enum isl_format isl_format
=
403 anv_get_isl_format(&pipeline
->device
->info
, vk_format
,
404 VK_IMAGE_ASPECT_DEPTH_BIT
,
405 VK_IMAGE_TILING_OPTIMAL
);
406 sf
.DepthBufferSurfaceFormat
=
407 isl_format_get_depth_format(isl_format
, false);
413 GENX(3DSTATE_SF_pack
)(NULL
, pipeline
->gen8
.sf
, &sf
);
414 GENX(3DSTATE_RASTER_pack
)(NULL
, pipeline
->gen8
.raster
, &raster
);
417 GENX(3DSTATE_SF_pack
)(NULL
, &pipeline
->gen7
.sf
, &sf
);
422 emit_ms_state(struct anv_pipeline
*pipeline
,
423 const VkPipelineMultisampleStateCreateInfo
*info
)
425 uint32_t samples
= 1;
426 uint32_t log2_samples
= 0;
428 /* From the Vulkan 1.0 spec:
429 * If pSampleMask is NULL, it is treated as if the mask has all bits
430 * enabled, i.e. no coverage is removed from fragments.
432 * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits.
435 uint32_t sample_mask
= 0xffff;
437 uint32_t sample_mask
= 0xff;
441 samples
= info
->rasterizationSamples
;
442 log2_samples
= __builtin_ffs(samples
) - 1;
445 if (info
&& info
->pSampleMask
)
446 sample_mask
&= info
->pSampleMask
[0];
448 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_MULTISAMPLE
), ms
) {
449 ms
.NumberofMultisamples
= log2_samples
;
452 /* The PRM says that this bit is valid only for DX9:
454 * SW can choose to set this bit only for DX9 API. DX10/OGL API's
455 * should not have any effect by setting or not setting this bit.
457 ms
.PixelPositionOffsetEnable
= false;
458 ms
.PixelLocation
= CENTER
;
460 ms
.PixelLocation
= PIXLOC_CENTER
;
464 SAMPLE_POS_1X(ms
.Sample
);
467 SAMPLE_POS_2X(ms
.Sample
);
470 SAMPLE_POS_4X(ms
.Sample
);
473 SAMPLE_POS_8X(ms
.Sample
);
481 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_SAMPLE_MASK
), sm
) {
482 sm
.SampleMask
= sample_mask
;
486 static const uint32_t vk_to_gen_logic_op
[] = {
487 [VK_LOGIC_OP_COPY
] = LOGICOP_COPY
,
488 [VK_LOGIC_OP_CLEAR
] = LOGICOP_CLEAR
,
489 [VK_LOGIC_OP_AND
] = LOGICOP_AND
,
490 [VK_LOGIC_OP_AND_REVERSE
] = LOGICOP_AND_REVERSE
,
491 [VK_LOGIC_OP_AND_INVERTED
] = LOGICOP_AND_INVERTED
,
492 [VK_LOGIC_OP_NO_OP
] = LOGICOP_NOOP
,
493 [VK_LOGIC_OP_XOR
] = LOGICOP_XOR
,
494 [VK_LOGIC_OP_OR
] = LOGICOP_OR
,
495 [VK_LOGIC_OP_NOR
] = LOGICOP_NOR
,
496 [VK_LOGIC_OP_EQUIVALENT
] = LOGICOP_EQUIV
,
497 [VK_LOGIC_OP_INVERT
] = LOGICOP_INVERT
,
498 [VK_LOGIC_OP_OR_REVERSE
] = LOGICOP_OR_REVERSE
,
499 [VK_LOGIC_OP_COPY_INVERTED
] = LOGICOP_COPY_INVERTED
,
500 [VK_LOGIC_OP_OR_INVERTED
] = LOGICOP_OR_INVERTED
,
501 [VK_LOGIC_OP_NAND
] = LOGICOP_NAND
,
502 [VK_LOGIC_OP_SET
] = LOGICOP_SET
,
505 static const uint32_t vk_to_gen_blend
[] = {
506 [VK_BLEND_FACTOR_ZERO
] = BLENDFACTOR_ZERO
,
507 [VK_BLEND_FACTOR_ONE
] = BLENDFACTOR_ONE
,
508 [VK_BLEND_FACTOR_SRC_COLOR
] = BLENDFACTOR_SRC_COLOR
,
509 [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR
] = BLENDFACTOR_INV_SRC_COLOR
,
510 [VK_BLEND_FACTOR_DST_COLOR
] = BLENDFACTOR_DST_COLOR
,
511 [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR
] = BLENDFACTOR_INV_DST_COLOR
,
512 [VK_BLEND_FACTOR_SRC_ALPHA
] = BLENDFACTOR_SRC_ALPHA
,
513 [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA
] = BLENDFACTOR_INV_SRC_ALPHA
,
514 [VK_BLEND_FACTOR_DST_ALPHA
] = BLENDFACTOR_DST_ALPHA
,
515 [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA
] = BLENDFACTOR_INV_DST_ALPHA
,
516 [VK_BLEND_FACTOR_CONSTANT_COLOR
] = BLENDFACTOR_CONST_COLOR
,
517 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR
]= BLENDFACTOR_INV_CONST_COLOR
,
518 [VK_BLEND_FACTOR_CONSTANT_ALPHA
] = BLENDFACTOR_CONST_ALPHA
,
519 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA
]= BLENDFACTOR_INV_CONST_ALPHA
,
520 [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE
] = BLENDFACTOR_SRC_ALPHA_SATURATE
,
521 [VK_BLEND_FACTOR_SRC1_COLOR
] = BLENDFACTOR_SRC1_COLOR
,
522 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR
] = BLENDFACTOR_INV_SRC1_COLOR
,
523 [VK_BLEND_FACTOR_SRC1_ALPHA
] = BLENDFACTOR_SRC1_ALPHA
,
524 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA
] = BLENDFACTOR_INV_SRC1_ALPHA
,
527 static const uint32_t vk_to_gen_blend_op
[] = {
528 [VK_BLEND_OP_ADD
] = BLENDFUNCTION_ADD
,
529 [VK_BLEND_OP_SUBTRACT
] = BLENDFUNCTION_SUBTRACT
,
530 [VK_BLEND_OP_REVERSE_SUBTRACT
] = BLENDFUNCTION_REVERSE_SUBTRACT
,
531 [VK_BLEND_OP_MIN
] = BLENDFUNCTION_MIN
,
532 [VK_BLEND_OP_MAX
] = BLENDFUNCTION_MAX
,
535 static const uint32_t vk_to_gen_compare_op
[] = {
536 [VK_COMPARE_OP_NEVER
] = PREFILTEROPNEVER
,
537 [VK_COMPARE_OP_LESS
] = PREFILTEROPLESS
,
538 [VK_COMPARE_OP_EQUAL
] = PREFILTEROPEQUAL
,
539 [VK_COMPARE_OP_LESS_OR_EQUAL
] = PREFILTEROPLEQUAL
,
540 [VK_COMPARE_OP_GREATER
] = PREFILTEROPGREATER
,
541 [VK_COMPARE_OP_NOT_EQUAL
] = PREFILTEROPNOTEQUAL
,
542 [VK_COMPARE_OP_GREATER_OR_EQUAL
] = PREFILTEROPGEQUAL
,
543 [VK_COMPARE_OP_ALWAYS
] = PREFILTEROPALWAYS
,
546 static const uint32_t vk_to_gen_stencil_op
[] = {
547 [VK_STENCIL_OP_KEEP
] = STENCILOP_KEEP
,
548 [VK_STENCIL_OP_ZERO
] = STENCILOP_ZERO
,
549 [VK_STENCIL_OP_REPLACE
] = STENCILOP_REPLACE
,
550 [VK_STENCIL_OP_INCREMENT_AND_CLAMP
] = STENCILOP_INCRSAT
,
551 [VK_STENCIL_OP_DECREMENT_AND_CLAMP
] = STENCILOP_DECRSAT
,
552 [VK_STENCIL_OP_INVERT
] = STENCILOP_INVERT
,
553 [VK_STENCIL_OP_INCREMENT_AND_WRAP
] = STENCILOP_INCR
,
554 [VK_STENCIL_OP_DECREMENT_AND_WRAP
] = STENCILOP_DECR
,
558 emit_ds_state(struct anv_pipeline
*pipeline
,
559 const VkPipelineDepthStencilStateCreateInfo
*info
,
560 const struct anv_render_pass
*pass
,
561 const struct anv_subpass
*subpass
)
564 # define depth_stencil_dw pipeline->gen7.depth_stencil_state
566 # define depth_stencil_dw pipeline->gen8.wm_depth_stencil
568 # define depth_stencil_dw pipeline->gen9.wm_depth_stencil
572 /* We're going to OR this together with the dynamic state. We need
573 * to make sure it's initialized to something useful.
575 memset(depth_stencil_dw
, 0, sizeof(depth_stencil_dw
));
579 /* VkBool32 depthBoundsTestEnable; // optional (depth_bounds_test) */
582 struct GENX(DEPTH_STENCIL_STATE
) depth_stencil
= {
584 struct GENX(3DSTATE_WM_DEPTH_STENCIL
) depth_stencil
= {
586 .DepthTestEnable
= info
->depthTestEnable
,
587 .DepthBufferWriteEnable
= info
->depthWriteEnable
,
588 .DepthTestFunction
= vk_to_gen_compare_op
[info
->depthCompareOp
],
589 .DoubleSidedStencilEnable
= true,
591 .StencilTestEnable
= info
->stencilTestEnable
,
592 .StencilBufferWriteEnable
= info
->stencilTestEnable
,
593 .StencilFailOp
= vk_to_gen_stencil_op
[info
->front
.failOp
],
594 .StencilPassDepthPassOp
= vk_to_gen_stencil_op
[info
->front
.passOp
],
595 .StencilPassDepthFailOp
= vk_to_gen_stencil_op
[info
->front
.depthFailOp
],
596 .StencilTestFunction
= vk_to_gen_compare_op
[info
->front
.compareOp
],
597 .BackfaceStencilFailOp
= vk_to_gen_stencil_op
[info
->back
.failOp
],
598 .BackfaceStencilPassDepthPassOp
= vk_to_gen_stencil_op
[info
->back
.passOp
],
599 .BackfaceStencilPassDepthFailOp
=vk_to_gen_stencil_op
[info
->back
.depthFailOp
],
600 .BackfaceStencilTestFunction
= vk_to_gen_compare_op
[info
->back
.compareOp
],
603 VkImageAspectFlags aspects
= 0;
604 if (pass
->attachments
== NULL
) {
605 /* This comes from meta. Assume we have verything. */
606 aspects
= VK_IMAGE_ASPECT_DEPTH_BIT
| VK_IMAGE_ASPECT_STENCIL_BIT
;
607 } else if (subpass
->depth_stencil_attachment
!= VK_ATTACHMENT_UNUSED
) {
608 VkFormat depth_stencil_format
=
609 pass
->attachments
[subpass
->depth_stencil_attachment
].format
;
610 aspects
= vk_format_aspects(depth_stencil_format
);
613 /* The Vulkan spec requires that if either depth or stencil is not present,
614 * the pipeline is to act as if the test silently passes.
616 if (!(aspects
& VK_IMAGE_ASPECT_DEPTH_BIT
)) {
617 depth_stencil
.DepthBufferWriteEnable
= false;
618 depth_stencil
.DepthTestFunction
= PREFILTEROPALWAYS
;
621 if (!(aspects
& VK_IMAGE_ASPECT_STENCIL_BIT
)) {
622 depth_stencil
.StencilBufferWriteEnable
= false;
623 depth_stencil
.StencilTestFunction
= PREFILTEROPALWAYS
;
624 depth_stencil
.BackfaceStencilTestFunction
= PREFILTEROPALWAYS
;
627 /* From the Broadwell PRM:
629 * "If Depth_Test_Enable = 1 AND Depth_Test_func = EQUAL, the
630 * Depth_Write_Enable must be set to 0."
632 if (info
->depthTestEnable
&& info
->depthCompareOp
== VK_COMPARE_OP_EQUAL
)
633 depth_stencil
.DepthBufferWriteEnable
= false;
636 GENX(DEPTH_STENCIL_STATE_pack
)(NULL
, depth_stencil_dw
, &depth_stencil
);
638 GENX(3DSTATE_WM_DEPTH_STENCIL_pack
)(NULL
, depth_stencil_dw
, &depth_stencil
);
643 emit_cb_state(struct anv_pipeline
*pipeline
,
644 const VkPipelineColorBlendStateCreateInfo
*info
,
645 const VkPipelineMultisampleStateCreateInfo
*ms_info
)
647 struct anv_device
*device
= pipeline
->device
;
649 const uint32_t num_dwords
= GENX(BLEND_STATE_length
);
650 pipeline
->blend_state
=
651 anv_state_pool_alloc(&device
->dynamic_state_pool
, num_dwords
* 4, 64);
653 struct GENX(BLEND_STATE
) blend_state
= {
655 .AlphaToCoverageEnable
= ms_info
&& ms_info
->alphaToCoverageEnable
,
656 .AlphaToOneEnable
= ms_info
&& ms_info
->alphaToOneEnable
,
658 /* Make sure it gets zeroed */
659 .Entry
= { { 0, }, },
663 /* Default everything to disabled */
664 for (uint32_t i
= 0; i
< 8; i
++) {
665 blend_state
.Entry
[i
].WriteDisableAlpha
= true;
666 blend_state
.Entry
[i
].WriteDisableRed
= true;
667 blend_state
.Entry
[i
].WriteDisableGreen
= true;
668 blend_state
.Entry
[i
].WriteDisableBlue
= true;
671 uint32_t surface_count
= 0;
672 struct anv_pipeline_bind_map
*map
;
673 if (anv_pipeline_has_stage(pipeline
, MESA_SHADER_FRAGMENT
)) {
674 map
= &pipeline
->bindings
[MESA_SHADER_FRAGMENT
];
675 surface_count
= map
->surface_count
;
678 bool has_writeable_rt
= false;
679 for (unsigned i
= 0; i
< surface_count
; i
++) {
680 struct anv_pipeline_binding
*binding
= &map
->surface_to_descriptor
[i
];
682 /* All color attachments are at the beginning of the binding table */
683 if (binding
->set
!= ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS
)
686 /* We can have at most 8 attachments */
689 if (binding
->index
>= info
->attachmentCount
)
692 assert(binding
->binding
== 0);
693 const VkPipelineColorBlendAttachmentState
*a
=
694 &info
->pAttachments
[binding
->index
];
696 blend_state
.Entry
[i
] = (struct GENX(BLEND_STATE_ENTRY
)) {
698 .AlphaToCoverageEnable
= ms_info
&& ms_info
->alphaToCoverageEnable
,
699 .AlphaToOneEnable
= ms_info
&& ms_info
->alphaToOneEnable
,
701 .LogicOpEnable
= info
->logicOpEnable
,
702 .LogicOpFunction
= vk_to_gen_logic_op
[info
->logicOp
],
703 .ColorBufferBlendEnable
= a
->blendEnable
,
704 .ColorClampRange
= COLORCLAMP_RTFORMAT
,
705 .PreBlendColorClampEnable
= true,
706 .PostBlendColorClampEnable
= true,
707 .SourceBlendFactor
= vk_to_gen_blend
[a
->srcColorBlendFactor
],
708 .DestinationBlendFactor
= vk_to_gen_blend
[a
->dstColorBlendFactor
],
709 .ColorBlendFunction
= vk_to_gen_blend_op
[a
->colorBlendOp
],
710 .SourceAlphaBlendFactor
= vk_to_gen_blend
[a
->srcAlphaBlendFactor
],
711 .DestinationAlphaBlendFactor
= vk_to_gen_blend
[a
->dstAlphaBlendFactor
],
712 .AlphaBlendFunction
= vk_to_gen_blend_op
[a
->alphaBlendOp
],
713 .WriteDisableAlpha
= !(a
->colorWriteMask
& VK_COLOR_COMPONENT_A_BIT
),
714 .WriteDisableRed
= !(a
->colorWriteMask
& VK_COLOR_COMPONENT_R_BIT
),
715 .WriteDisableGreen
= !(a
->colorWriteMask
& VK_COLOR_COMPONENT_G_BIT
),
716 .WriteDisableBlue
= !(a
->colorWriteMask
& VK_COLOR_COMPONENT_B_BIT
),
719 if (a
->srcColorBlendFactor
!= a
->srcAlphaBlendFactor
||
720 a
->dstColorBlendFactor
!= a
->dstAlphaBlendFactor
||
721 a
->colorBlendOp
!= a
->alphaBlendOp
) {
723 blend_state
.IndependentAlphaBlendEnable
= true;
725 blend_state
.Entry
[i
].IndependentAlphaBlendEnable
= true;
729 if (a
->colorWriteMask
!= 0)
730 has_writeable_rt
= true;
732 /* Our hardware applies the blend factor prior to the blend function
733 * regardless of what function is used. Technically, this means the
734 * hardware can do MORE than GL or Vulkan specify. However, it also
735 * means that, for MIN and MAX, we have to stomp the blend factor to
736 * ONE to make it a no-op.
738 if (a
->colorBlendOp
== VK_BLEND_OP_MIN
||
739 a
->colorBlendOp
== VK_BLEND_OP_MAX
) {
740 blend_state
.Entry
[i
].SourceBlendFactor
= BLENDFACTOR_ONE
;
741 blend_state
.Entry
[i
].DestinationBlendFactor
= BLENDFACTOR_ONE
;
743 if (a
->alphaBlendOp
== VK_BLEND_OP_MIN
||
744 a
->alphaBlendOp
== VK_BLEND_OP_MAX
) {
745 blend_state
.Entry
[i
].SourceAlphaBlendFactor
= BLENDFACTOR_ONE
;
746 blend_state
.Entry
[i
].DestinationAlphaBlendFactor
= BLENDFACTOR_ONE
;
751 struct GENX(BLEND_STATE_ENTRY
) *bs0
= &blend_state
.Entry
[0];
752 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PS_BLEND
), blend
) {
753 blend
.AlphaToCoverageEnable
= blend_state
.AlphaToCoverageEnable
;
754 blend
.HasWriteableRT
= has_writeable_rt
;
755 blend
.ColorBufferBlendEnable
= bs0
->ColorBufferBlendEnable
;
756 blend
.SourceAlphaBlendFactor
= bs0
->SourceAlphaBlendFactor
;
757 blend
.DestinationAlphaBlendFactor
= bs0
->DestinationAlphaBlendFactor
;
758 blend
.SourceBlendFactor
= bs0
->SourceBlendFactor
;
759 blend
.DestinationBlendFactor
= bs0
->DestinationBlendFactor
;
760 blend
.AlphaTestEnable
= false;
761 blend
.IndependentAlphaBlendEnable
=
762 blend_state
.IndependentAlphaBlendEnable
;
765 (void)has_writeable_rt
;
768 GENX(BLEND_STATE_pack
)(NULL
, pipeline
->blend_state
.map
, &blend_state
);
769 if (!device
->info
.has_llc
)
770 anv_state_clflush(pipeline
->blend_state
);
772 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_BLEND_STATE_POINTERS
), bsp
) {
773 bsp
.BlendStatePointer
= pipeline
->blend_state
.offset
;
775 bsp
.BlendStatePointerValid
= true;
781 emit_3dstate_clip(struct anv_pipeline
*pipeline
,
782 const VkPipelineViewportStateCreateInfo
*vp_info
,
783 const VkPipelineRasterizationStateCreateInfo
*rs_info
,
784 const struct anv_graphics_pipeline_create_info
*extra
)
786 const struct brw_wm_prog_data
*wm_prog_data
= get_wm_prog_data(pipeline
);
788 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_CLIP
), clip
) {
789 clip
.ClipEnable
= !(extra
&& extra
->use_rectlist
);
790 clip
.EarlyCullEnable
= true;
791 clip
.APIMode
= APIMODE_D3D
,
792 clip
.ViewportXYClipTestEnable
= true;
794 clip
.ClipMode
= CLIPMODE_NORMAL
;
796 clip
.TriangleStripListProvokingVertexSelect
= 0;
797 clip
.LineStripListProvokingVertexSelect
= 0;
798 clip
.TriangleFanProvokingVertexSelect
= 1;
800 clip
.MinimumPointWidth
= 0.125;
801 clip
.MaximumPointWidth
= 255.875;
802 clip
.MaximumVPIndex
= vp_info
->viewportCount
- 1;
805 clip
.FrontWinding
= vk_to_gen_front_face
[rs_info
->frontFace
];
806 clip
.CullMode
= vk_to_gen_cullmode
[rs_info
->cullMode
];
807 clip
.ViewportZClipTestEnable
= !pipeline
->depth_clamp_enable
;
809 clip
.NonPerspectiveBarycentricEnable
= wm_prog_data
?
810 (wm_prog_data
->barycentric_interp_modes
& 0x38) != 0 : 0;
816 emit_3dstate_streamout(struct anv_pipeline
*pipeline
,
817 const VkPipelineRasterizationStateCreateInfo
*rs_info
)
819 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_STREAMOUT
), so
) {
820 so
.RenderingDisable
= rs_info
->rasterizerDiscardEnable
;