2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #ifndef GENX_PIPELINE_UTIL_H
25 #define GENX_PIPELINE_UTIL_H
27 #include "common/gen_l3_config.h"
28 #include "common/gen_sample_positions.h"
29 #include "vk_format_info.h"
32 vertex_element_comp_control(enum isl_format format
, unsigned comp
)
36 case 0: bits
= isl_format_layouts
[format
].channels
.r
.bits
; break;
37 case 1: bits
= isl_format_layouts
[format
].channels
.g
.bits
; break;
38 case 2: bits
= isl_format_layouts
[format
].channels
.b
.bits
; break;
39 case 3: bits
= isl_format_layouts
[format
].channels
.a
.bits
; break;
40 default: unreachable("Invalid component");
44 return VFCOMP_STORE_SRC
;
45 } else if (comp
< 3) {
46 return VFCOMP_STORE_0
;
47 } else if (isl_format_layouts
[format
].channels
.r
.type
== ISL_UINT
||
48 isl_format_layouts
[format
].channels
.r
.type
== ISL_SINT
) {
50 return VFCOMP_STORE_1_INT
;
53 return VFCOMP_STORE_1_FP
;
58 emit_vertex_input(struct anv_pipeline
*pipeline
,
59 const VkPipelineVertexInputStateCreateInfo
*info
)
61 const struct brw_vs_prog_data
*vs_prog_data
= get_vs_prog_data(pipeline
);
63 /* Pull inputs_read out of the VS prog data */
64 const uint64_t inputs_read
= vs_prog_data
->inputs_read
;
65 assert((inputs_read
& ((1 << VERT_ATTRIB_GENERIC0
) - 1)) == 0);
66 const uint32_t elements
= inputs_read
>> VERT_ATTRIB_GENERIC0
;
69 /* On BDW+, we only need to allocate space for base ids. Setting up
70 * the actual vertex and instance id is a separate packet.
72 const bool needs_svgs_elem
= vs_prog_data
->uses_basevertex
||
73 vs_prog_data
->uses_baseinstance
;
75 /* On Haswell and prior, vertex and instance id are created by using the
76 * ComponentControl fields, so we need an element for any of them.
78 const bool needs_svgs_elem
= vs_prog_data
->uses_vertexid
||
79 vs_prog_data
->uses_instanceid
||
80 vs_prog_data
->uses_basevertex
||
81 vs_prog_data
->uses_baseinstance
;
84 uint32_t elem_count
= __builtin_popcount(elements
) + needs_svgs_elem
;
90 const uint32_t num_dwords
= 1 + elem_count
* 2;
91 p
= anv_batch_emitn(&pipeline
->batch
, num_dwords
,
92 GENX(3DSTATE_VERTEX_ELEMENTS
));
93 memset(p
+ 1, 0, (num_dwords
- 1) * 4);
95 for (uint32_t i
= 0; i
< info
->vertexAttributeDescriptionCount
; i
++) {
96 const VkVertexInputAttributeDescription
*desc
=
97 &info
->pVertexAttributeDescriptions
[i
];
98 enum isl_format format
= anv_get_isl_format(&pipeline
->device
->info
,
100 VK_IMAGE_ASPECT_COLOR_BIT
,
101 VK_IMAGE_TILING_LINEAR
);
103 assert(desc
->binding
< 32);
105 if ((elements
& (1 << desc
->location
)) == 0)
106 continue; /* Binding unused */
108 uint32_t slot
= __builtin_popcount(elements
& ((1 << desc
->location
) - 1));
110 struct GENX(VERTEX_ELEMENT_STATE
) element
= {
111 .VertexBufferIndex
= desc
->binding
,
113 .SourceElementFormat
= format
,
114 .EdgeFlagEnable
= false,
115 .SourceElementOffset
= desc
->offset
,
116 .Component0Control
= vertex_element_comp_control(format
, 0),
117 .Component1Control
= vertex_element_comp_control(format
, 1),
118 .Component2Control
= vertex_element_comp_control(format
, 2),
119 .Component3Control
= vertex_element_comp_control(format
, 3),
121 GENX(VERTEX_ELEMENT_STATE_pack
)(NULL
, &p
[1 + slot
* 2], &element
);
124 /* On Broadwell and later, we have a separate VF_INSTANCING packet
125 * that controls instancing. On Haswell and prior, that's part of
126 * VERTEX_BUFFER_STATE which we emit later.
128 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VF_INSTANCING
), vfi
) {
129 vfi
.InstancingEnable
= pipeline
->instancing_enable
[desc
->binding
];
130 vfi
.VertexElementIndex
= slot
;
131 /* Vulkan so far doesn't have an instance divisor, so
132 * this is always 1 (ignored if not instancing). */
133 vfi
.InstanceDataStepRate
= 1;
138 const uint32_t id_slot
= __builtin_popcount(elements
);
139 if (needs_svgs_elem
) {
140 /* From the Broadwell PRM for the 3D_Vertex_Component_Control enum:
141 * "Within a VERTEX_ELEMENT_STATE structure, if a Component
142 * Control field is set to something other than VFCOMP_STORE_SRC,
143 * no higher-numbered Component Control fields may be set to
146 * This means, that if we have BaseInstance, we need BaseVertex as
147 * well. Just do all or nothing.
149 uint32_t base_ctrl
= (vs_prog_data
->uses_basevertex
||
150 vs_prog_data
->uses_baseinstance
) ?
151 VFCOMP_STORE_SRC
: VFCOMP_STORE_0
;
153 struct GENX(VERTEX_ELEMENT_STATE
) element
= {
154 .VertexBufferIndex
= 32, /* Reserved for this */
156 .SourceElementFormat
= ISL_FORMAT_R32G32_UINT
,
157 .Component0Control
= base_ctrl
,
158 .Component1Control
= base_ctrl
,
160 .Component2Control
= VFCOMP_STORE_0
,
161 .Component3Control
= VFCOMP_STORE_0
,
163 .Component2Control
= VFCOMP_STORE_VID
,
164 .Component3Control
= VFCOMP_STORE_IID
,
167 GENX(VERTEX_ELEMENT_STATE_pack
)(NULL
, &p
[1 + id_slot
* 2], &element
);
171 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VF_SGVS
), sgvs
) {
172 sgvs
.VertexIDEnable
= vs_prog_data
->uses_vertexid
;
173 sgvs
.VertexIDComponentNumber
= 2;
174 sgvs
.VertexIDElementOffset
= id_slot
;
175 sgvs
.InstanceIDEnable
= vs_prog_data
->uses_instanceid
;
176 sgvs
.InstanceIDComponentNumber
= 3;
177 sgvs
.InstanceIDElementOffset
= id_slot
;
183 genX(emit_urb_setup
)(struct anv_device
*device
, struct anv_batch
*batch
,
184 VkShaderStageFlags active_stages
,
185 unsigned vs_size
, unsigned gs_size
,
186 const struct gen_l3_config
*l3_config
)
188 if (!(active_stages
& VK_SHADER_STAGE_VERTEX_BIT
))
191 if (!(active_stages
& VK_SHADER_STAGE_GEOMETRY_BIT
))
194 unsigned vs_entry_size_bytes
= vs_size
* 64;
195 unsigned gs_entry_size_bytes
= gs_size
* 64;
197 /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS):
199 * VS Number of URB Entries must be divisible by 8 if the VS URB Entry
200 * Allocation Size is less than 9 512-bit URB entries.
202 * Similar text exists for GS.
204 unsigned vs_granularity
= (vs_size
< 9) ? 8 : 1;
205 unsigned gs_granularity
= (gs_size
< 9) ? 8 : 1;
207 /* URB allocations must be done in 8k chunks. */
208 unsigned chunk_size_bytes
= 8192;
210 /* Determine the size of the URB in chunks. */
211 const unsigned total_urb_size
=
212 gen_get_l3_config_urb_size(&device
->info
, l3_config
);
213 const unsigned urb_chunks
= total_urb_size
* 1024 / chunk_size_bytes
;
215 /* Reserve space for push constants */
216 unsigned push_constant_kb
;
217 if (device
->info
.gen
>= 8)
218 push_constant_kb
= 32;
219 else if (device
->info
.is_haswell
)
220 push_constant_kb
= device
->info
.gt
== 3 ? 32 : 16;
222 push_constant_kb
= 16;
224 unsigned push_constant_bytes
= push_constant_kb
* 1024;
225 unsigned push_constant_chunks
=
226 push_constant_bytes
/ chunk_size_bytes
;
228 /* Initially, assign each stage the minimum amount of URB space it needs,
229 * and make a note of how much additional space it "wants" (the amount of
230 * additional space it could actually make use of).
233 /* VS has a lower limit on the number of URB entries */
235 ALIGN(device
->info
.urb
.min_vs_entries
* vs_entry_size_bytes
,
236 chunk_size_bytes
) / chunk_size_bytes
;
238 ALIGN(device
->info
.urb
.max_vs_entries
* vs_entry_size_bytes
,
239 chunk_size_bytes
) / chunk_size_bytes
- vs_chunks
;
241 unsigned gs_chunks
= 0;
242 unsigned gs_wants
= 0;
243 if (active_stages
& VK_SHADER_STAGE_GEOMETRY_BIT
) {
244 /* There are two constraints on the minimum amount of URB space we can
247 * (1) We need room for at least 2 URB entries, since we always operate
248 * the GS in DUAL_OBJECT mode.
250 * (2) We can't allocate less than nr_gs_entries_granularity.
252 gs_chunks
= ALIGN(MAX2(gs_granularity
, 2) * gs_entry_size_bytes
,
253 chunk_size_bytes
) / chunk_size_bytes
;
255 ALIGN(device
->info
.urb
.max_gs_entries
* gs_entry_size_bytes
,
256 chunk_size_bytes
) / chunk_size_bytes
- gs_chunks
;
259 /* There should always be enough URB space to satisfy the minimum
260 * requirements of each stage.
262 unsigned total_needs
= push_constant_chunks
+ vs_chunks
+ gs_chunks
;
263 assert(total_needs
<= urb_chunks
);
265 /* Mete out remaining space (if any) in proportion to "wants". */
266 unsigned total_wants
= vs_wants
+ gs_wants
;
267 unsigned remaining_space
= urb_chunks
- total_needs
;
268 if (remaining_space
> total_wants
)
269 remaining_space
= total_wants
;
270 if (remaining_space
> 0) {
271 unsigned vs_additional
= (unsigned)
272 round(vs_wants
* (((double) remaining_space
) / total_wants
));
273 vs_chunks
+= vs_additional
;
274 remaining_space
-= vs_additional
;
275 gs_chunks
+= remaining_space
;
278 /* Sanity check that we haven't over-allocated. */
279 assert(push_constant_chunks
+ vs_chunks
+ gs_chunks
<= urb_chunks
);
281 /* Finally, compute the number of entries that can fit in the space
282 * allocated to each stage.
284 unsigned nr_vs_entries
= vs_chunks
* chunk_size_bytes
/ vs_entry_size_bytes
;
285 unsigned nr_gs_entries
= gs_chunks
* chunk_size_bytes
/ gs_entry_size_bytes
;
287 /* Since we rounded up when computing *_wants, this may be slightly more
288 * than the maximum allowed amount, so correct for that.
290 nr_vs_entries
= MIN2(nr_vs_entries
, device
->info
.urb
.max_vs_entries
);
291 nr_gs_entries
= MIN2(nr_gs_entries
, device
->info
.urb
.max_gs_entries
);
293 /* Ensure that we program a multiple of the granularity. */
294 nr_vs_entries
= ROUND_DOWN_TO(nr_vs_entries
, vs_granularity
);
295 nr_gs_entries
= ROUND_DOWN_TO(nr_gs_entries
, gs_granularity
);
297 /* Finally, sanity check to make sure we have at least the minimum number
298 * of entries needed for each stage.
300 assert(nr_vs_entries
>= device
->info
.urb
.min_vs_entries
);
301 if (active_stages
& VK_SHADER_STAGE_GEOMETRY_BIT
)
302 assert(nr_gs_entries
>= 2);
304 #if GEN_GEN == 7 && !GEN_IS_HASWELL
305 /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1:
307 * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall
308 * needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
309 * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
310 * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL
311 * needs to be sent before any combination of VS associated 3DSTATE."
313 anv_batch_emit(batch
, GEN7_PIPE_CONTROL
, pc
) {
314 pc
.DepthStallEnable
= true;
315 pc
.PostSyncOperation
= WriteImmediateData
;
316 pc
.Address
= (struct anv_address
) { &device
->workaround_bo
, 0 };
320 /* Lay out the URB in the following order:
325 anv_batch_emit(batch
, GENX(3DSTATE_URB_VS
), urb
) {
326 urb
.VSURBStartingAddress
= push_constant_chunks
;
327 urb
.VSURBEntryAllocationSize
= vs_size
- 1;
328 urb
.VSNumberofURBEntries
= nr_vs_entries
;
331 anv_batch_emit(batch
, GENX(3DSTATE_URB_HS
), urb
) {
332 urb
.HSURBStartingAddress
= push_constant_chunks
;
335 anv_batch_emit(batch
, GENX(3DSTATE_URB_DS
), urb
) {
336 urb
.DSURBStartingAddress
= push_constant_chunks
;
339 anv_batch_emit(batch
, GENX(3DSTATE_URB_GS
), urb
) {
340 urb
.GSURBStartingAddress
= push_constant_chunks
+ vs_chunks
;
341 urb
.GSURBEntryAllocationSize
= gs_size
- 1;
342 urb
.GSNumberofURBEntries
= nr_gs_entries
;
347 emit_urb_setup(struct anv_pipeline
*pipeline
)
349 unsigned vs_entry_size
=
350 (pipeline
->active_stages
& VK_SHADER_STAGE_VERTEX_BIT
) ?
351 get_vs_prog_data(pipeline
)->base
.urb_entry_size
: 0;
352 unsigned gs_entry_size
=
353 (pipeline
->active_stages
& VK_SHADER_STAGE_GEOMETRY_BIT
) ?
354 get_gs_prog_data(pipeline
)->base
.urb_entry_size
: 0;
356 genX(emit_urb_setup
)(pipeline
->device
, &pipeline
->batch
,
357 pipeline
->active_stages
, vs_entry_size
, gs_entry_size
,
358 pipeline
->urb
.l3_config
);
362 emit_3dstate_sbe(struct anv_pipeline
*pipeline
)
364 const struct brw_vs_prog_data
*vs_prog_data
= get_vs_prog_data(pipeline
);
365 const struct brw_gs_prog_data
*gs_prog_data
= get_gs_prog_data(pipeline
);
366 const struct brw_wm_prog_data
*wm_prog_data
= get_wm_prog_data(pipeline
);
367 const struct brw_vue_map
*fs_input_map
;
369 if (pipeline
->gs_kernel
== NO_KERNEL
)
370 fs_input_map
= &vs_prog_data
->base
.vue_map
;
372 fs_input_map
= &gs_prog_data
->base
.vue_map
;
374 struct GENX(3DSTATE_SBE
) sbe
= {
375 GENX(3DSTATE_SBE_header
),
376 .AttributeSwizzleEnable
= true,
377 .PointSpriteTextureCoordinateOrigin
= UPPERLEFT
,
378 .NumberofSFOutputAttributes
= wm_prog_data
->num_varying_inputs
,
379 .ConstantInterpolationEnable
= wm_prog_data
->flat_inputs
,
383 for (unsigned i
= 0; i
< 32; i
++)
384 sbe
.AttributeActiveComponentFormat
[i
] = ACF_XYZW
;
388 /* On Broadwell, they broke 3DSTATE_SBE into two packets */
389 struct GENX(3DSTATE_SBE_SWIZ
) swiz
= {
390 GENX(3DSTATE_SBE_SWIZ_header
),
396 int max_source_attr
= 0;
397 for (int attr
= 0; attr
< VARYING_SLOT_MAX
; attr
++) {
398 int input_index
= wm_prog_data
->urb_setup
[attr
];
403 if (attr
== VARYING_SLOT_PNTC
) {
404 sbe
.PointSpriteTextureCoordinateEnable
= 1 << input_index
;
408 const int slot
= fs_input_map
->varying_to_slot
[attr
];
410 if (input_index
>= 16)
414 /* This attribute does not exist in the VUE--that means that the
415 * vertex shader did not write to it. It could be that it's a
416 * regular varying read by the fragment shader but not written by
417 * the vertex shader or it's gl_PrimitiveID. In the first case the
418 * value is undefined, in the second it needs to be
421 swiz
.Attribute
[input_index
].ConstantSource
= PRIM_ID
;
422 swiz
.Attribute
[input_index
].ComponentOverrideX
= true;
423 swiz
.Attribute
[input_index
].ComponentOverrideY
= true;
424 swiz
.Attribute
[input_index
].ComponentOverrideZ
= true;
425 swiz
.Attribute
[input_index
].ComponentOverrideW
= true;
428 const int source_attr
= slot
- 2;
429 max_source_attr
= MAX2(max_source_attr
, source_attr
);
430 /* We have to subtract two slots to accout for the URB entry output
431 * read offset in the VS and GS stages.
433 swiz
.Attribute
[input_index
].SourceAttribute
= source_attr
;
437 sbe
.VertexURBEntryReadOffset
= 1; /* Skip the VUE header and position slots */
438 sbe
.VertexURBEntryReadLength
= DIV_ROUND_UP(max_source_attr
+ 1, 2);
440 uint32_t *dw
= anv_batch_emit_dwords(&pipeline
->batch
,
441 GENX(3DSTATE_SBE_length
));
442 GENX(3DSTATE_SBE_pack
)(&pipeline
->batch
, dw
, &sbe
);
445 dw
= anv_batch_emit_dwords(&pipeline
->batch
, GENX(3DSTATE_SBE_SWIZ_length
));
446 GENX(3DSTATE_SBE_SWIZ_pack
)(&pipeline
->batch
, dw
, &swiz
);
450 static inline uint32_t
451 scratch_space(const struct brw_stage_prog_data
*prog_data
)
453 return ffs(prog_data
->total_scratch
/ 2048);
456 static const uint32_t vk_to_gen_cullmode
[] = {
457 [VK_CULL_MODE_NONE
] = CULLMODE_NONE
,
458 [VK_CULL_MODE_FRONT_BIT
] = CULLMODE_FRONT
,
459 [VK_CULL_MODE_BACK_BIT
] = CULLMODE_BACK
,
460 [VK_CULL_MODE_FRONT_AND_BACK
] = CULLMODE_BOTH
463 static const uint32_t vk_to_gen_fillmode
[] = {
464 [VK_POLYGON_MODE_FILL
] = FILL_MODE_SOLID
,
465 [VK_POLYGON_MODE_LINE
] = FILL_MODE_WIREFRAME
,
466 [VK_POLYGON_MODE_POINT
] = FILL_MODE_POINT
,
469 static const uint32_t vk_to_gen_front_face
[] = {
470 [VK_FRONT_FACE_COUNTER_CLOCKWISE
] = 1,
471 [VK_FRONT_FACE_CLOCKWISE
] = 0
475 emit_rs_state(struct anv_pipeline
*pipeline
,
476 const VkPipelineRasterizationStateCreateInfo
*rs_info
,
477 const VkPipelineMultisampleStateCreateInfo
*ms_info
,
478 const struct anv_render_pass
*pass
,
479 const struct anv_subpass
*subpass
)
481 struct GENX(3DSTATE_SF
) sf
= {
482 GENX(3DSTATE_SF_header
),
485 sf
.ViewportTransformEnable
= true;
486 sf
.StatisticsEnable
= true;
487 sf
.TriangleStripListProvokingVertexSelect
= 0;
488 sf
.LineStripListProvokingVertexSelect
= 0;
489 sf
.TriangleFanProvokingVertexSelect
= 1;
490 sf
.PointWidthSource
= Vertex
;
494 struct GENX(3DSTATE_RASTER
) raster
= {
495 GENX(3DSTATE_RASTER_header
),
501 /* For details on 3DSTATE_RASTER multisample state, see the BSpec table
502 * "Multisample Modes State".
505 raster
.DXMultisampleRasterizationEnable
= true;
506 raster
.ForcedSampleCount
= FSC_NUMRASTSAMPLES_0
;
507 raster
.ForceMultisampling
= false;
509 raster
.MultisampleRasterizationMode
=
510 (ms_info
&& ms_info
->rasterizationSamples
> 1) ?
511 MSRASTMODE_ON_PATTERN
: MSRASTMODE_OFF_PIXEL
;
514 raster
.FrontWinding
= vk_to_gen_front_face
[rs_info
->frontFace
];
515 raster
.CullMode
= vk_to_gen_cullmode
[rs_info
->cullMode
];
516 raster
.FrontFaceFillMode
= vk_to_gen_fillmode
[rs_info
->polygonMode
];
517 raster
.BackFaceFillMode
= vk_to_gen_fillmode
[rs_info
->polygonMode
];
518 raster
.ScissorRectangleEnable
= true;
521 /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */
522 raster
.ViewportZFarClipTestEnable
= !pipeline
->depth_clamp_enable
;
523 raster
.ViewportZNearClipTestEnable
= !pipeline
->depth_clamp_enable
;
525 raster
.ViewportZClipTestEnable
= !pipeline
->depth_clamp_enable
;
528 raster
.GlobalDepthOffsetEnableSolid
= rs_info
->depthBiasEnable
;
529 raster
.GlobalDepthOffsetEnableWireframe
= rs_info
->depthBiasEnable
;
530 raster
.GlobalDepthOffsetEnablePoint
= rs_info
->depthBiasEnable
;
533 /* Gen7 requires that we provide the depth format in 3DSTATE_SF so that it
534 * can get the depth offsets correct.
536 if (subpass
->depth_stencil_attachment
< pass
->attachment_count
) {
538 pass
->attachments
[subpass
->depth_stencil_attachment
].format
;
539 assert(vk_format_is_depth_or_stencil(vk_format
));
540 if (vk_format_aspects(vk_format
) & VK_IMAGE_ASPECT_DEPTH_BIT
) {
541 enum isl_format isl_format
=
542 anv_get_isl_format(&pipeline
->device
->info
, vk_format
,
543 VK_IMAGE_ASPECT_DEPTH_BIT
,
544 VK_IMAGE_TILING_OPTIMAL
);
545 sf
.DepthBufferSurfaceFormat
=
546 isl_format_get_depth_format(isl_format
, false);
552 GENX(3DSTATE_SF_pack
)(NULL
, pipeline
->gen8
.sf
, &sf
);
553 GENX(3DSTATE_RASTER_pack
)(NULL
, pipeline
->gen8
.raster
, &raster
);
556 GENX(3DSTATE_SF_pack
)(NULL
, &pipeline
->gen7
.sf
, &sf
);
561 emit_ms_state(struct anv_pipeline
*pipeline
,
562 const VkPipelineMultisampleStateCreateInfo
*info
)
564 uint32_t samples
= 1;
565 uint32_t log2_samples
= 0;
567 /* From the Vulkan 1.0 spec:
568 * If pSampleMask is NULL, it is treated as if the mask has all bits
569 * enabled, i.e. no coverage is removed from fragments.
571 * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits.
574 uint32_t sample_mask
= 0xffff;
576 uint32_t sample_mask
= 0xff;
580 samples
= info
->rasterizationSamples
;
581 log2_samples
= __builtin_ffs(samples
) - 1;
584 if (info
&& info
->pSampleMask
)
585 sample_mask
&= info
->pSampleMask
[0];
587 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_MULTISAMPLE
), ms
) {
588 ms
.NumberofMultisamples
= log2_samples
;
591 /* The PRM says that this bit is valid only for DX9:
593 * SW can choose to set this bit only for DX9 API. DX10/OGL API's
594 * should not have any effect by setting or not setting this bit.
596 ms
.PixelPositionOffsetEnable
= false;
597 ms
.PixelLocation
= CENTER
;
599 ms
.PixelLocation
= PIXLOC_CENTER
;
603 GEN_SAMPLE_POS_1X(ms
.Sample
);
606 GEN_SAMPLE_POS_2X(ms
.Sample
);
609 GEN_SAMPLE_POS_4X(ms
.Sample
);
612 GEN_SAMPLE_POS_8X(ms
.Sample
);
620 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_SAMPLE_MASK
), sm
) {
621 sm
.SampleMask
= sample_mask
;
625 static const uint32_t vk_to_gen_logic_op
[] = {
626 [VK_LOGIC_OP_COPY
] = LOGICOP_COPY
,
627 [VK_LOGIC_OP_CLEAR
] = LOGICOP_CLEAR
,
628 [VK_LOGIC_OP_AND
] = LOGICOP_AND
,
629 [VK_LOGIC_OP_AND_REVERSE
] = LOGICOP_AND_REVERSE
,
630 [VK_LOGIC_OP_AND_INVERTED
] = LOGICOP_AND_INVERTED
,
631 [VK_LOGIC_OP_NO_OP
] = LOGICOP_NOOP
,
632 [VK_LOGIC_OP_XOR
] = LOGICOP_XOR
,
633 [VK_LOGIC_OP_OR
] = LOGICOP_OR
,
634 [VK_LOGIC_OP_NOR
] = LOGICOP_NOR
,
635 [VK_LOGIC_OP_EQUIVALENT
] = LOGICOP_EQUIV
,
636 [VK_LOGIC_OP_INVERT
] = LOGICOP_INVERT
,
637 [VK_LOGIC_OP_OR_REVERSE
] = LOGICOP_OR_REVERSE
,
638 [VK_LOGIC_OP_COPY_INVERTED
] = LOGICOP_COPY_INVERTED
,
639 [VK_LOGIC_OP_OR_INVERTED
] = LOGICOP_OR_INVERTED
,
640 [VK_LOGIC_OP_NAND
] = LOGICOP_NAND
,
641 [VK_LOGIC_OP_SET
] = LOGICOP_SET
,
644 static const uint32_t vk_to_gen_blend
[] = {
645 [VK_BLEND_FACTOR_ZERO
] = BLENDFACTOR_ZERO
,
646 [VK_BLEND_FACTOR_ONE
] = BLENDFACTOR_ONE
,
647 [VK_BLEND_FACTOR_SRC_COLOR
] = BLENDFACTOR_SRC_COLOR
,
648 [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR
] = BLENDFACTOR_INV_SRC_COLOR
,
649 [VK_BLEND_FACTOR_DST_COLOR
] = BLENDFACTOR_DST_COLOR
,
650 [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR
] = BLENDFACTOR_INV_DST_COLOR
,
651 [VK_BLEND_FACTOR_SRC_ALPHA
] = BLENDFACTOR_SRC_ALPHA
,
652 [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA
] = BLENDFACTOR_INV_SRC_ALPHA
,
653 [VK_BLEND_FACTOR_DST_ALPHA
] = BLENDFACTOR_DST_ALPHA
,
654 [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA
] = BLENDFACTOR_INV_DST_ALPHA
,
655 [VK_BLEND_FACTOR_CONSTANT_COLOR
] = BLENDFACTOR_CONST_COLOR
,
656 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR
]= BLENDFACTOR_INV_CONST_COLOR
,
657 [VK_BLEND_FACTOR_CONSTANT_ALPHA
] = BLENDFACTOR_CONST_ALPHA
,
658 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA
]= BLENDFACTOR_INV_CONST_ALPHA
,
659 [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE
] = BLENDFACTOR_SRC_ALPHA_SATURATE
,
660 [VK_BLEND_FACTOR_SRC1_COLOR
] = BLENDFACTOR_SRC1_COLOR
,
661 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR
] = BLENDFACTOR_INV_SRC1_COLOR
,
662 [VK_BLEND_FACTOR_SRC1_ALPHA
] = BLENDFACTOR_SRC1_ALPHA
,
663 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA
] = BLENDFACTOR_INV_SRC1_ALPHA
,
666 static const uint32_t vk_to_gen_blend_op
[] = {
667 [VK_BLEND_OP_ADD
] = BLENDFUNCTION_ADD
,
668 [VK_BLEND_OP_SUBTRACT
] = BLENDFUNCTION_SUBTRACT
,
669 [VK_BLEND_OP_REVERSE_SUBTRACT
] = BLENDFUNCTION_REVERSE_SUBTRACT
,
670 [VK_BLEND_OP_MIN
] = BLENDFUNCTION_MIN
,
671 [VK_BLEND_OP_MAX
] = BLENDFUNCTION_MAX
,
674 static const uint32_t vk_to_gen_compare_op
[] = {
675 [VK_COMPARE_OP_NEVER
] = PREFILTEROPNEVER
,
676 [VK_COMPARE_OP_LESS
] = PREFILTEROPLESS
,
677 [VK_COMPARE_OP_EQUAL
] = PREFILTEROPEQUAL
,
678 [VK_COMPARE_OP_LESS_OR_EQUAL
] = PREFILTEROPLEQUAL
,
679 [VK_COMPARE_OP_GREATER
] = PREFILTEROPGREATER
,
680 [VK_COMPARE_OP_NOT_EQUAL
] = PREFILTEROPNOTEQUAL
,
681 [VK_COMPARE_OP_GREATER_OR_EQUAL
] = PREFILTEROPGEQUAL
,
682 [VK_COMPARE_OP_ALWAYS
] = PREFILTEROPALWAYS
,
685 static const uint32_t vk_to_gen_stencil_op
[] = {
686 [VK_STENCIL_OP_KEEP
] = STENCILOP_KEEP
,
687 [VK_STENCIL_OP_ZERO
] = STENCILOP_ZERO
,
688 [VK_STENCIL_OP_REPLACE
] = STENCILOP_REPLACE
,
689 [VK_STENCIL_OP_INCREMENT_AND_CLAMP
] = STENCILOP_INCRSAT
,
690 [VK_STENCIL_OP_DECREMENT_AND_CLAMP
] = STENCILOP_DECRSAT
,
691 [VK_STENCIL_OP_INVERT
] = STENCILOP_INVERT
,
692 [VK_STENCIL_OP_INCREMENT_AND_WRAP
] = STENCILOP_INCR
,
693 [VK_STENCIL_OP_DECREMENT_AND_WRAP
] = STENCILOP_DECR
,
697 emit_ds_state(struct anv_pipeline
*pipeline
,
698 const VkPipelineDepthStencilStateCreateInfo
*info
,
699 const struct anv_render_pass
*pass
,
700 const struct anv_subpass
*subpass
)
703 # define depth_stencil_dw pipeline->gen7.depth_stencil_state
705 # define depth_stencil_dw pipeline->gen8.wm_depth_stencil
707 # define depth_stencil_dw pipeline->gen9.wm_depth_stencil
711 /* We're going to OR this together with the dynamic state. We need
712 * to make sure it's initialized to something useful.
714 memset(depth_stencil_dw
, 0, sizeof(depth_stencil_dw
));
718 /* VkBool32 depthBoundsTestEnable; // optional (depth_bounds_test) */
721 struct GENX(DEPTH_STENCIL_STATE
) depth_stencil
= {
723 struct GENX(3DSTATE_WM_DEPTH_STENCIL
) depth_stencil
= {
725 .DepthTestEnable
= info
->depthTestEnable
,
726 .DepthBufferWriteEnable
= info
->depthWriteEnable
,
727 .DepthTestFunction
= vk_to_gen_compare_op
[info
->depthCompareOp
],
728 .DoubleSidedStencilEnable
= true,
730 .StencilTestEnable
= info
->stencilTestEnable
,
731 .StencilBufferWriteEnable
= info
->stencilTestEnable
,
732 .StencilFailOp
= vk_to_gen_stencil_op
[info
->front
.failOp
],
733 .StencilPassDepthPassOp
= vk_to_gen_stencil_op
[info
->front
.passOp
],
734 .StencilPassDepthFailOp
= vk_to_gen_stencil_op
[info
->front
.depthFailOp
],
735 .StencilTestFunction
= vk_to_gen_compare_op
[info
->front
.compareOp
],
736 .BackfaceStencilFailOp
= vk_to_gen_stencil_op
[info
->back
.failOp
],
737 .BackfaceStencilPassDepthPassOp
= vk_to_gen_stencil_op
[info
->back
.passOp
],
738 .BackfaceStencilPassDepthFailOp
=vk_to_gen_stencil_op
[info
->back
.depthFailOp
],
739 .BackfaceStencilTestFunction
= vk_to_gen_compare_op
[info
->back
.compareOp
],
742 VkImageAspectFlags aspects
= 0;
743 if (subpass
->depth_stencil_attachment
!= VK_ATTACHMENT_UNUSED
) {
744 VkFormat depth_stencil_format
=
745 pass
->attachments
[subpass
->depth_stencil_attachment
].format
;
746 aspects
= vk_format_aspects(depth_stencil_format
);
749 /* The Vulkan spec requires that if either depth or stencil is not present,
750 * the pipeline is to act as if the test silently passes.
752 if (!(aspects
& VK_IMAGE_ASPECT_DEPTH_BIT
)) {
753 depth_stencil
.DepthBufferWriteEnable
= false;
754 depth_stencil
.DepthTestFunction
= PREFILTEROPALWAYS
;
757 if (!(aspects
& VK_IMAGE_ASPECT_STENCIL_BIT
)) {
758 depth_stencil
.StencilBufferWriteEnable
= false;
759 depth_stencil
.StencilTestFunction
= PREFILTEROPALWAYS
;
760 depth_stencil
.BackfaceStencilTestFunction
= PREFILTEROPALWAYS
;
763 /* From the Broadwell PRM:
765 * "If Depth_Test_Enable = 1 AND Depth_Test_func = EQUAL, the
766 * Depth_Write_Enable must be set to 0."
768 if (info
->depthTestEnable
&& info
->depthCompareOp
== VK_COMPARE_OP_EQUAL
)
769 depth_stencil
.DepthBufferWriteEnable
= false;
772 GENX(DEPTH_STENCIL_STATE_pack
)(NULL
, depth_stencil_dw
, &depth_stencil
);
774 GENX(3DSTATE_WM_DEPTH_STENCIL_pack
)(NULL
, depth_stencil_dw
, &depth_stencil
);
779 emit_cb_state(struct anv_pipeline
*pipeline
,
780 const VkPipelineColorBlendStateCreateInfo
*info
,
781 const VkPipelineMultisampleStateCreateInfo
*ms_info
)
783 struct anv_device
*device
= pipeline
->device
;
785 const uint32_t num_dwords
= GENX(BLEND_STATE_length
);
786 pipeline
->blend_state
=
787 anv_state_pool_alloc(&device
->dynamic_state_pool
, num_dwords
* 4, 64);
789 struct GENX(BLEND_STATE
) blend_state
= {
791 .AlphaToCoverageEnable
= ms_info
&& ms_info
->alphaToCoverageEnable
,
792 .AlphaToOneEnable
= ms_info
&& ms_info
->alphaToOneEnable
,
794 /* Make sure it gets zeroed */
795 .Entry
= { { 0, }, },
799 /* Default everything to disabled */
800 for (uint32_t i
= 0; i
< 8; i
++) {
801 blend_state
.Entry
[i
].WriteDisableAlpha
= true;
802 blend_state
.Entry
[i
].WriteDisableRed
= true;
803 blend_state
.Entry
[i
].WriteDisableGreen
= true;
804 blend_state
.Entry
[i
].WriteDisableBlue
= true;
807 uint32_t surface_count
= 0;
808 struct anv_pipeline_bind_map
*map
;
809 if (anv_pipeline_has_stage(pipeline
, MESA_SHADER_FRAGMENT
)) {
810 map
= &pipeline
->shaders
[MESA_SHADER_FRAGMENT
]->bind_map
;
811 surface_count
= map
->surface_count
;
814 bool has_writeable_rt
= false;
815 for (unsigned i
= 0; i
< surface_count
; i
++) {
816 struct anv_pipeline_binding
*binding
= &map
->surface_to_descriptor
[i
];
818 /* All color attachments are at the beginning of the binding table */
819 if (binding
->set
!= ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS
)
822 /* We can have at most 8 attachments */
825 if (binding
->index
>= info
->attachmentCount
)
828 assert(binding
->binding
== 0);
829 const VkPipelineColorBlendAttachmentState
*a
=
830 &info
->pAttachments
[binding
->index
];
832 blend_state
.Entry
[i
] = (struct GENX(BLEND_STATE_ENTRY
)) {
834 .AlphaToCoverageEnable
= ms_info
&& ms_info
->alphaToCoverageEnable
,
835 .AlphaToOneEnable
= ms_info
&& ms_info
->alphaToOneEnable
,
837 .LogicOpEnable
= info
->logicOpEnable
,
838 .LogicOpFunction
= vk_to_gen_logic_op
[info
->logicOp
],
839 .ColorBufferBlendEnable
= a
->blendEnable
,
840 .ColorClampRange
= COLORCLAMP_RTFORMAT
,
841 .PreBlendColorClampEnable
= true,
842 .PostBlendColorClampEnable
= true,
843 .SourceBlendFactor
= vk_to_gen_blend
[a
->srcColorBlendFactor
],
844 .DestinationBlendFactor
= vk_to_gen_blend
[a
->dstColorBlendFactor
],
845 .ColorBlendFunction
= vk_to_gen_blend_op
[a
->colorBlendOp
],
846 .SourceAlphaBlendFactor
= vk_to_gen_blend
[a
->srcAlphaBlendFactor
],
847 .DestinationAlphaBlendFactor
= vk_to_gen_blend
[a
->dstAlphaBlendFactor
],
848 .AlphaBlendFunction
= vk_to_gen_blend_op
[a
->alphaBlendOp
],
849 .WriteDisableAlpha
= !(a
->colorWriteMask
& VK_COLOR_COMPONENT_A_BIT
),
850 .WriteDisableRed
= !(a
->colorWriteMask
& VK_COLOR_COMPONENT_R_BIT
),
851 .WriteDisableGreen
= !(a
->colorWriteMask
& VK_COLOR_COMPONENT_G_BIT
),
852 .WriteDisableBlue
= !(a
->colorWriteMask
& VK_COLOR_COMPONENT_B_BIT
),
855 if (a
->srcColorBlendFactor
!= a
->srcAlphaBlendFactor
||
856 a
->dstColorBlendFactor
!= a
->dstAlphaBlendFactor
||
857 a
->colorBlendOp
!= a
->alphaBlendOp
) {
859 blend_state
.IndependentAlphaBlendEnable
= true;
861 blend_state
.Entry
[i
].IndependentAlphaBlendEnable
= true;
865 if (a
->colorWriteMask
!= 0)
866 has_writeable_rt
= true;
868 /* Our hardware applies the blend factor prior to the blend function
869 * regardless of what function is used. Technically, this means the
870 * hardware can do MORE than GL or Vulkan specify. However, it also
871 * means that, for MIN and MAX, we have to stomp the blend factor to
872 * ONE to make it a no-op.
874 if (a
->colorBlendOp
== VK_BLEND_OP_MIN
||
875 a
->colorBlendOp
== VK_BLEND_OP_MAX
) {
876 blend_state
.Entry
[i
].SourceBlendFactor
= BLENDFACTOR_ONE
;
877 blend_state
.Entry
[i
].DestinationBlendFactor
= BLENDFACTOR_ONE
;
879 if (a
->alphaBlendOp
== VK_BLEND_OP_MIN
||
880 a
->alphaBlendOp
== VK_BLEND_OP_MAX
) {
881 blend_state
.Entry
[i
].SourceAlphaBlendFactor
= BLENDFACTOR_ONE
;
882 blend_state
.Entry
[i
].DestinationAlphaBlendFactor
= BLENDFACTOR_ONE
;
887 struct GENX(BLEND_STATE_ENTRY
) *bs0
= &blend_state
.Entry
[0];
888 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PS_BLEND
), blend
) {
889 blend
.AlphaToCoverageEnable
= blend_state
.AlphaToCoverageEnable
;
890 blend
.HasWriteableRT
= has_writeable_rt
;
891 blend
.ColorBufferBlendEnable
= bs0
->ColorBufferBlendEnable
;
892 blend
.SourceAlphaBlendFactor
= bs0
->SourceAlphaBlendFactor
;
893 blend
.DestinationAlphaBlendFactor
= bs0
->DestinationAlphaBlendFactor
;
894 blend
.SourceBlendFactor
= bs0
->SourceBlendFactor
;
895 blend
.DestinationBlendFactor
= bs0
->DestinationBlendFactor
;
896 blend
.AlphaTestEnable
= false;
897 blend
.IndependentAlphaBlendEnable
=
898 blend_state
.IndependentAlphaBlendEnable
;
901 (void)has_writeable_rt
;
904 GENX(BLEND_STATE_pack
)(NULL
, pipeline
->blend_state
.map
, &blend_state
);
905 if (!device
->info
.has_llc
)
906 anv_state_clflush(pipeline
->blend_state
);
908 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_BLEND_STATE_POINTERS
), bsp
) {
909 bsp
.BlendStatePointer
= pipeline
->blend_state
.offset
;
911 bsp
.BlendStatePointerValid
= true;
917 emit_3dstate_clip(struct anv_pipeline
*pipeline
,
918 const VkPipelineViewportStateCreateInfo
*vp_info
,
919 const VkPipelineRasterizationStateCreateInfo
*rs_info
)
921 const struct brw_wm_prog_data
*wm_prog_data
= get_wm_prog_data(pipeline
);
923 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_CLIP
), clip
) {
924 clip
.ClipEnable
= true;
925 clip
.EarlyCullEnable
= true;
926 clip
.APIMode
= APIMODE_D3D
,
927 clip
.ViewportXYClipTestEnable
= true;
929 clip
.ClipMode
= CLIPMODE_NORMAL
;
931 clip
.TriangleStripListProvokingVertexSelect
= 0;
932 clip
.LineStripListProvokingVertexSelect
= 0;
933 clip
.TriangleFanProvokingVertexSelect
= 1;
935 clip
.MinimumPointWidth
= 0.125;
936 clip
.MaximumPointWidth
= 255.875;
937 clip
.MaximumVPIndex
= (vp_info
? vp_info
->viewportCount
: 1) - 1;
940 clip
.FrontWinding
= vk_to_gen_front_face
[rs_info
->frontFace
];
941 clip
.CullMode
= vk_to_gen_cullmode
[rs_info
->cullMode
];
942 clip
.ViewportZClipTestEnable
= !pipeline
->depth_clamp_enable
;
944 clip
.NonPerspectiveBarycentricEnable
= wm_prog_data
?
945 (wm_prog_data
->barycentric_interp_modes
& 0x38) != 0 : 0;
951 emit_3dstate_streamout(struct anv_pipeline
*pipeline
,
952 const VkPipelineRasterizationStateCreateInfo
*rs_info
)
954 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_STREAMOUT
), so
) {
955 so
.RenderingDisable
= rs_info
->rasterizerDiscardEnable
;
959 #endif /* GENX_PIPELINE_UTIL_H */