2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #include "anv_private.h"
32 #include "gen8_pack.h"
33 #include "gen9_pack.h"
35 #include "genX_pipeline_util.h"
38 emit_vertex_input(struct anv_pipeline
*pipeline
,
39 const VkPipelineVertexInputStateCreateInfo
*info
,
40 const struct anv_graphics_pipeline_create_info
*extra
)
42 static_assert(ANV_GEN
>= 8, "should be compiling this for gen < 8");
45 if (extra
&& extra
->disable_vs
) {
46 /* If the VS is disabled, just assume the user knows what they're
47 * doing and apply the layout blindly. This can only come from
48 * meta, so this *should* be safe.
51 for (uint32_t i
= 0; i
< info
->vertexAttributeDescriptionCount
; i
++)
52 elements
|= (1 << info
->pVertexAttributeDescriptions
[i
].location
);
54 /* Pull inputs_read out of the VS prog data */
55 uint64_t inputs_read
= pipeline
->vs_prog_data
.inputs_read
;
56 assert((inputs_read
& ((1 << VERT_ATTRIB_GENERIC0
) - 1)) == 0);
57 elements
= inputs_read
>> VERT_ATTRIB_GENERIC0
;
60 const uint32_t num_dwords
= 1 + __builtin_popcount(elements
) * 2;
64 p
= anv_batch_emitn(&pipeline
->batch
, num_dwords
,
65 GENX(3DSTATE_VERTEX_ELEMENTS
));
66 memset(p
+ 1, 0, (num_dwords
- 1) * 4);
69 for (uint32_t i
= 0; i
< info
->vertexAttributeDescriptionCount
; i
++) {
70 const VkVertexInputAttributeDescription
*desc
=
71 &info
->pVertexAttributeDescriptions
[i
];
72 enum isl_format format
= anv_get_isl_format(desc
->format
,
73 VK_IMAGE_ASPECT_COLOR_BIT
,
74 VK_IMAGE_TILING_LINEAR
);
76 assert(desc
->binding
< 32);
78 if ((elements
& (1 << desc
->location
)) == 0)
79 continue; /* Binding unused */
81 uint32_t slot
= __builtin_popcount(elements
& ((1 << desc
->location
) - 1));
83 struct GENX(VERTEX_ELEMENT_STATE
) element
= {
84 .VertexBufferIndex
= desc
->binding
,
86 .SourceElementFormat
= format
,
87 .EdgeFlagEnable
= false,
88 .SourceElementOffset
= desc
->offset
,
89 .Component0Control
= vertex_element_comp_control(format
, 0),
90 .Component1Control
= vertex_element_comp_control(format
, 1),
91 .Component2Control
= vertex_element_comp_control(format
, 2),
92 .Component3Control
= vertex_element_comp_control(format
, 3),
94 GENX(VERTEX_ELEMENT_STATE_pack
)(NULL
, &p
[1 + slot
* 2], &element
);
96 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VF_INSTANCING
),
97 .InstancingEnable
= pipeline
->instancing_enable
[desc
->binding
],
98 .VertexElementIndex
= slot
,
99 /* Vulkan so far doesn't have an instance divisor, so
100 * this is always 1 (ignored if not instancing). */
101 .InstanceDataStepRate
= 1);
104 const uint32_t id_slot
= __builtin_popcount(elements
);
105 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VF_SGVS
),
106 .VertexIDEnable
= pipeline
->vs_prog_data
.uses_vertexid
,
107 .VertexIDComponentNumber
= 2,
108 .VertexIDElementOffset
= id_slot
,
109 .InstanceIDEnable
= pipeline
->vs_prog_data
.uses_instanceid
,
110 .InstanceIDComponentNumber
= 3,
111 .InstanceIDElementOffset
= id_slot
);
115 emit_ia_state(struct anv_pipeline
*pipeline
,
116 const VkPipelineInputAssemblyStateCreateInfo
*info
,
117 const struct anv_graphics_pipeline_create_info
*extra
)
119 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VF_TOPOLOGY
),
120 .PrimitiveTopologyType
= pipeline
->topology
);
124 emit_rs_state(struct anv_pipeline
*pipeline
,
125 const VkPipelineRasterizationStateCreateInfo
*info
,
126 const struct anv_graphics_pipeline_create_info
*extra
)
128 struct GENX(3DSTATE_SF
) sf
= {
129 GENX(3DSTATE_SF_header
),
130 .ViewportTransformEnable
= !(extra
&& extra
->disable_viewport
),
131 .TriangleStripListProvokingVertexSelect
= 0,
132 .LineStripListProvokingVertexSelect
= 0,
133 .TriangleFanProvokingVertexSelect
= 0,
134 .PointWidthSource
= pipeline
->writes_point_size
? Vertex
: State
,
138 /* FINISHME: VkBool32 rasterizerDiscardEnable; */
140 GENX(3DSTATE_SF_pack
)(NULL
, pipeline
->gen8
.sf
, &sf
);
142 struct GENX(3DSTATE_RASTER
) raster
= {
143 GENX(3DSTATE_RASTER_header
),
144 .FrontWinding
= vk_to_gen_front_face
[info
->frontFace
],
145 .CullMode
= vk_to_gen_cullmode
[info
->cullMode
],
146 .FrontFaceFillMode
= vk_to_gen_fillmode
[info
->polygonMode
],
147 .BackFaceFillMode
= vk_to_gen_fillmode
[info
->polygonMode
],
148 .ScissorRectangleEnable
= !(extra
&& extra
->disable_scissor
),
150 .ViewportZClipTestEnable
= true,
152 /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */
153 .ViewportZFarClipTestEnable
= true,
154 .ViewportZNearClipTestEnable
= true,
158 GENX(3DSTATE_RASTER_pack
)(NULL
, pipeline
->gen8
.raster
, &raster
);
162 emit_cb_state(struct anv_pipeline
*pipeline
,
163 const VkPipelineColorBlendStateCreateInfo
*info
,
164 const VkPipelineMultisampleStateCreateInfo
*ms_info
)
166 struct anv_device
*device
= pipeline
->device
;
168 uint32_t num_dwords
= GENX(BLEND_STATE_length
);
169 pipeline
->blend_state
=
170 anv_state_pool_alloc(&device
->dynamic_state_pool
, num_dwords
* 4, 64);
172 struct GENX(BLEND_STATE
) blend_state
= {
173 .AlphaToCoverageEnable
= ms_info
&& ms_info
->alphaToCoverageEnable
,
174 .AlphaToOneEnable
= ms_info
&& ms_info
->alphaToOneEnable
,
177 for (uint32_t i
= 0; i
< info
->attachmentCount
; i
++) {
178 const VkPipelineColorBlendAttachmentState
*a
= &info
->pAttachments
[i
];
180 if (a
->srcColorBlendFactor
!= a
->srcAlphaBlendFactor
||
181 a
->dstColorBlendFactor
!= a
->dstAlphaBlendFactor
||
182 a
->colorBlendOp
!= a
->alphaBlendOp
) {
183 blend_state
.IndependentAlphaBlendEnable
= true;
186 blend_state
.Entry
[i
] = (struct GENX(BLEND_STATE_ENTRY
)) {
187 .LogicOpEnable
= info
->logicOpEnable
,
188 .LogicOpFunction
= vk_to_gen_logic_op
[info
->logicOp
],
189 .ColorBufferBlendEnable
= a
->blendEnable
,
190 .PreBlendSourceOnlyClampEnable
= false,
191 .ColorClampRange
= COLORCLAMP_RTFORMAT
,
192 .PreBlendColorClampEnable
= true,
193 .PostBlendColorClampEnable
= true,
194 .SourceBlendFactor
= vk_to_gen_blend
[a
->srcColorBlendFactor
],
195 .DestinationBlendFactor
= vk_to_gen_blend
[a
->dstColorBlendFactor
],
196 .ColorBlendFunction
= vk_to_gen_blend_op
[a
->colorBlendOp
],
197 .SourceAlphaBlendFactor
= vk_to_gen_blend
[a
->srcAlphaBlendFactor
],
198 .DestinationAlphaBlendFactor
= vk_to_gen_blend
[a
->dstAlphaBlendFactor
],
199 .AlphaBlendFunction
= vk_to_gen_blend_op
[a
->alphaBlendOp
],
200 .WriteDisableAlpha
= !(a
->colorWriteMask
& VK_COLOR_COMPONENT_A_BIT
),
201 .WriteDisableRed
= !(a
->colorWriteMask
& VK_COLOR_COMPONENT_R_BIT
),
202 .WriteDisableGreen
= !(a
->colorWriteMask
& VK_COLOR_COMPONENT_G_BIT
),
203 .WriteDisableBlue
= !(a
->colorWriteMask
& VK_COLOR_COMPONENT_B_BIT
),
206 /* Our hardware applies the blend factor prior to the blend function
207 * regardless of what function is used. Technically, this means the
208 * hardware can do MORE than GL or Vulkan specify. However, it also
209 * means that, for MIN and MAX, we have to stomp the blend factor to
210 * ONE to make it a no-op.
212 if (a
->colorBlendOp
== VK_BLEND_OP_MIN
||
213 a
->colorBlendOp
== VK_BLEND_OP_MAX
) {
214 blend_state
.Entry
[i
].SourceBlendFactor
= BLENDFACTOR_ONE
;
215 blend_state
.Entry
[i
].DestinationBlendFactor
= BLENDFACTOR_ONE
;
217 if (a
->alphaBlendOp
== VK_BLEND_OP_MIN
||
218 a
->alphaBlendOp
== VK_BLEND_OP_MAX
) {
219 blend_state
.Entry
[i
].SourceAlphaBlendFactor
= BLENDFACTOR_ONE
;
220 blend_state
.Entry
[i
].DestinationAlphaBlendFactor
= BLENDFACTOR_ONE
;
224 GENX(BLEND_STATE_pack
)(NULL
, pipeline
->blend_state
.map
, &blend_state
);
225 if (!device
->info
.has_llc
)
226 anv_state_clflush(pipeline
->blend_state
);
228 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_BLEND_STATE_POINTERS
),
229 .BlendStatePointer
= pipeline
->blend_state
.offset
,
230 .BlendStatePointerValid
= true);
234 emit_ds_state(struct anv_pipeline
*pipeline
,
235 const VkPipelineDepthStencilStateCreateInfo
*info
)
237 uint32_t *dw
= ANV_GEN
== 8 ?
238 pipeline
->gen8
.wm_depth_stencil
: pipeline
->gen9
.wm_depth_stencil
;
241 /* We're going to OR this together with the dynamic state. We need
242 * to make sure it's initialized to something useful.
244 memset(pipeline
->gen8
.wm_depth_stencil
, 0,
245 sizeof(pipeline
->gen8
.wm_depth_stencil
));
246 memset(pipeline
->gen9
.wm_depth_stencil
, 0,
247 sizeof(pipeline
->gen9
.wm_depth_stencil
));
251 /* VkBool32 depthBoundsTestEnable; // optional (depth_bounds_test) */
253 struct GENX(3DSTATE_WM_DEPTH_STENCIL
) wm_depth_stencil
= {
254 .DepthTestEnable
= info
->depthTestEnable
,
255 .DepthBufferWriteEnable
= info
->depthWriteEnable
,
256 .DepthTestFunction
= vk_to_gen_compare_op
[info
->depthCompareOp
],
257 .DoubleSidedStencilEnable
= true,
259 .StencilTestEnable
= info
->stencilTestEnable
,
260 .StencilFailOp
= vk_to_gen_stencil_op
[info
->front
.failOp
],
261 .StencilPassDepthPassOp
= vk_to_gen_stencil_op
[info
->front
.passOp
],
262 .StencilPassDepthFailOp
= vk_to_gen_stencil_op
[info
->front
.depthFailOp
],
263 .StencilTestFunction
= vk_to_gen_compare_op
[info
->front
.compareOp
],
264 .BackfaceStencilFailOp
= vk_to_gen_stencil_op
[info
->back
.failOp
],
265 .BackfaceStencilPassDepthPassOp
= vk_to_gen_stencil_op
[info
->back
.passOp
],
266 .BackfaceStencilPassDepthFailOp
=vk_to_gen_stencil_op
[info
->back
.depthFailOp
],
267 .BackfaceStencilTestFunction
= vk_to_gen_compare_op
[info
->back
.compareOp
],
270 GENX(3DSTATE_WM_DEPTH_STENCIL_pack
)(NULL
, dw
, &wm_depth_stencil
);
274 genX(graphics_pipeline_create
)(
276 struct anv_pipeline_cache
* cache
,
277 const VkGraphicsPipelineCreateInfo
* pCreateInfo
,
278 const struct anv_graphics_pipeline_create_info
*extra
,
279 const VkAllocationCallbacks
* pAllocator
,
280 VkPipeline
* pPipeline
)
282 ANV_FROM_HANDLE(anv_device
, device
, _device
);
283 struct anv_pipeline
*pipeline
;
285 uint32_t offset
, length
;
287 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO
);
289 pipeline
= anv_alloc2(&device
->alloc
, pAllocator
, sizeof(*pipeline
), 8,
290 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
291 if (pipeline
== NULL
)
292 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
294 result
= anv_pipeline_init(pipeline
, device
, cache
,
295 pCreateInfo
, extra
, pAllocator
);
296 if (result
!= VK_SUCCESS
) {
297 anv_free2(&device
->alloc
, pAllocator
, pipeline
);
301 assert(pCreateInfo
->pVertexInputState
);
302 emit_vertex_input(pipeline
, pCreateInfo
->pVertexInputState
, extra
);
303 assert(pCreateInfo
->pInputAssemblyState
);
304 emit_ia_state(pipeline
, pCreateInfo
->pInputAssemblyState
, extra
);
305 assert(pCreateInfo
->pRasterizationState
);
306 emit_rs_state(pipeline
, pCreateInfo
->pRasterizationState
, extra
);
307 emit_ds_state(pipeline
, pCreateInfo
->pDepthStencilState
);
308 emit_cb_state(pipeline
, pCreateInfo
->pColorBlendState
,
309 pCreateInfo
->pMultisampleState
);
311 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VF_STATISTICS
),
312 .StatisticsEnable
= true);
313 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_HS
), .Enable
= false);
314 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_TE
), .TEEnable
= false);
315 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_DS
), .FunctionEnable
= false);
316 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_STREAMOUT
), .SOFunctionEnable
= false);
318 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS
),
319 .ConstantBufferOffset
= 0,
320 .ConstantBufferSize
= 4);
321 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_GS
),
322 .ConstantBufferOffset
= 4,
323 .ConstantBufferSize
= 4);
324 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_PS
),
325 .ConstantBufferOffset
= 8,
326 .ConstantBufferSize
= 4);
328 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_WM_CHROMAKEY
),
329 .ChromaKeyKillEnable
= false);
330 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_AA_LINE_PARAMETERS
));
332 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_CLIP
),
334 .ViewportXYClipTestEnable
= !(extra
&& extra
->disable_viewport
),
335 .MinimumPointWidth
= 0.125,
336 .MaximumPointWidth
= 255.875);
338 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_WM
),
339 .StatisticsEnable
= true,
340 .LineEndCapAntialiasingRegionWidth
= _05pixels
,
341 .LineAntialiasingRegionWidth
= _10pixels
,
342 .EarlyDepthStencilControl
= NORMAL
,
343 .ForceThreadDispatchEnable
= NORMAL
,
344 .PointRasterizationRule
= RASTRULE_UPPER_RIGHT
,
345 .BarycentricInterpolationMode
=
346 pipeline
->wm_prog_data
.barycentric_interp_modes
);
348 uint32_t samples
= 1;
349 uint32_t log2_samples
= __builtin_ffs(samples
) - 1;
350 bool enable_sampling
= samples
> 1 ? true : false;
352 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_MULTISAMPLE
),
353 .PixelPositionOffsetEnable
= enable_sampling
,
354 .PixelLocation
= CENTER
,
355 .NumberofMultisamples
= log2_samples
);
357 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_SAMPLE_MASK
),
358 .SampleMask
= 0xffff);
360 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_URB_VS
),
361 .VSURBStartingAddress
= pipeline
->urb
.vs_start
,
362 .VSURBEntryAllocationSize
= pipeline
->urb
.vs_size
- 1,
363 .VSNumberofURBEntries
= pipeline
->urb
.nr_vs_entries
);
365 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_URB_GS
),
366 .GSURBStartingAddress
= pipeline
->urb
.gs_start
,
367 .GSURBEntryAllocationSize
= pipeline
->urb
.gs_size
- 1,
368 .GSNumberofURBEntries
= pipeline
->urb
.nr_gs_entries
);
370 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_URB_HS
),
371 .HSURBStartingAddress
= pipeline
->urb
.vs_start
,
372 .HSURBEntryAllocationSize
= 0,
373 .HSNumberofURBEntries
= 0);
375 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_URB_DS
),
376 .DSURBStartingAddress
= pipeline
->urb
.vs_start
,
377 .DSURBEntryAllocationSize
= 0,
378 .DSNumberofURBEntries
= 0);
380 const struct brw_gs_prog_data
*gs_prog_data
= &pipeline
->gs_prog_data
;
382 length
= (gs_prog_data
->base
.vue_map
.num_slots
+ 1) / 2 - offset
;
384 if (pipeline
->gs_kernel
== NO_KERNEL
)
385 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_GS
), .Enable
= false);
387 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_GS
),
388 .SingleProgramFlow
= false,
389 .KernelStartPointer
= pipeline
->gs_kernel
,
390 .VectorMaskEnable
= Dmask
,
392 .BindingTableEntryCount
= 0,
393 .ExpectedVertexCount
= pipeline
->gs_vertex_count
,
395 .ScratchSpaceBasePointer
= pipeline
->scratch_start
[MESA_SHADER_GEOMETRY
],
396 .PerThreadScratchSpace
= ffs(gs_prog_data
->base
.base
.total_scratch
/ 2048),
398 .OutputVertexSize
= gs_prog_data
->output_vertex_size_hwords
* 2 - 1,
399 .OutputTopology
= gs_prog_data
->output_topology
,
400 .VertexURBEntryReadLength
= gs_prog_data
->base
.urb_read_length
,
401 .IncludeVertexHandles
= gs_prog_data
->base
.include_vue_handles
,
402 .DispatchGRFStartRegisterForURBData
=
403 gs_prog_data
->base
.base
.dispatch_grf_start_reg
,
405 .MaximumNumberofThreads
= device
->info
.max_gs_threads
/ 2 - 1,
406 .ControlDataHeaderSize
= gs_prog_data
->control_data_header_size_hwords
,
407 .DispatchMode
= gs_prog_data
->base
.dispatch_mode
,
408 .StatisticsEnable
= true,
409 .IncludePrimitiveID
= gs_prog_data
->include_primitive_id
,
410 .ReorderMode
= TRAILING
,
413 .ControlDataFormat
= gs_prog_data
->control_data_format
,
415 .StaticOutput
= gs_prog_data
->static_vertex_count
>= 0,
416 .StaticOutputVertexCount
=
417 gs_prog_data
->static_vertex_count
>= 0 ?
418 gs_prog_data
->static_vertex_count
: 0,
420 /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled:
421 * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v)
422 * UserClipDistanceCullTestEnableBitmask(v)
425 .VertexURBEntryOutputReadOffset
= offset
,
426 .VertexURBEntryOutputLength
= length
);
428 const struct brw_vue_prog_data
*vue_prog_data
= &pipeline
->vs_prog_data
.base
;
429 /* Skip the VUE header and position slots */
431 length
= (vue_prog_data
->vue_map
.num_slots
+ 1) / 2 - offset
;
433 uint32_t vs_start
= pipeline
->vs_simd8
!= NO_KERNEL
? pipeline
->vs_simd8
:
436 if (vs_start
== NO_KERNEL
|| (extra
&& extra
->disable_vs
))
437 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VS
),
438 .FunctionEnable
= false,
439 /* Even if VS is disabled, SBE still gets the amount of
440 * vertex data to read from this field. */
441 .VertexURBEntryOutputReadOffset
= offset
,
442 .VertexURBEntryOutputLength
= length
);
444 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VS
),
445 .KernelStartPointer
= vs_start
,
446 .SingleVertexDispatch
= Multiple
,
447 .VectorMaskEnable
= Dmask
,
449 .BindingTableEntryCount
=
450 vue_prog_data
->base
.binding_table
.size_bytes
/ 4,
451 .ThreadDispatchPriority
= Normal
,
452 .FloatingPointMode
= IEEE754
,
453 .IllegalOpcodeExceptionEnable
= false,
454 .AccessesUAV
= false,
455 .SoftwareExceptionEnable
= false,
457 .ScratchSpaceBasePointer
= pipeline
->scratch_start
[MESA_SHADER_VERTEX
],
458 .PerThreadScratchSpace
= ffs(vue_prog_data
->base
.total_scratch
/ 2048),
460 .DispatchGRFStartRegisterForURBData
=
461 vue_prog_data
->base
.dispatch_grf_start_reg
,
462 .VertexURBEntryReadLength
= vue_prog_data
->urb_read_length
,
463 .VertexURBEntryReadOffset
= 0,
465 .MaximumNumberofThreads
= device
->info
.max_vs_threads
- 1,
466 .StatisticsEnable
= false,
467 .SIMD8DispatchEnable
= pipeline
->vs_simd8
!= NO_KERNEL
,
468 .VertexCacheDisable
= false,
469 .FunctionEnable
= true,
471 .VertexURBEntryOutputReadOffset
= offset
,
472 .VertexURBEntryOutputLength
= length
,
473 .UserClipDistanceClipTestEnableBitmask
= 0,
474 .UserClipDistanceCullTestEnableBitmask
= 0);
476 const struct brw_wm_prog_data
*wm_prog_data
= &pipeline
->wm_prog_data
;
478 /* TODO: We should clean this up. Among other things, this is mostly
479 * shared with other gens.
481 const struct brw_vue_map
*fs_input_map
;
482 if (pipeline
->gs_kernel
== NO_KERNEL
)
483 fs_input_map
= &vue_prog_data
->vue_map
;
485 fs_input_map
= &gs_prog_data
->base
.vue_map
;
487 struct GENX(3DSTATE_SBE_SWIZ
) swiz
= {
488 GENX(3DSTATE_SBE_SWIZ_header
),
491 int max_source_attr
= 0;
492 for (int attr
= 0; attr
< VARYING_SLOT_MAX
; attr
++) {
493 int input_index
= wm_prog_data
->urb_setup
[attr
];
498 int source_attr
= fs_input_map
->varying_to_slot
[attr
];
499 max_source_attr
= MAX2(max_source_attr
, source_attr
);
501 if (input_index
>= 16)
504 if (source_attr
== -1) {
505 /* This attribute does not exist in the VUE--that means that the
506 * vertex shader did not write to it. It could be that it's a
507 * regular varying read by the fragment shader but not written by the
508 * vertex shader or it's gl_PrimitiveID. In the first case the value
509 * is undefined, in the second it needs to be gl_PrimitiveID.
511 swiz
.Attribute
[input_index
].ConstantSource
= PRIM_ID
;
512 swiz
.Attribute
[input_index
].ComponentOverrideX
= true;
513 swiz
.Attribute
[input_index
].ComponentOverrideY
= true;
514 swiz
.Attribute
[input_index
].ComponentOverrideZ
= true;
515 swiz
.Attribute
[input_index
].ComponentOverrideW
= true;
517 /* We have to subtract two slots to accout for the URB entry output
518 * read offset in the VS and GS stages.
520 swiz
.Attribute
[input_index
].SourceAttribute
= source_attr
- 2;
524 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_SBE
),
525 .AttributeSwizzleEnable
= true,
526 .ForceVertexURBEntryReadLength
= false,
527 .ForceVertexURBEntryReadOffset
= false,
528 .VertexURBEntryReadLength
= DIV_ROUND_UP(max_source_attr
+ 1, 2),
529 .PointSpriteTextureCoordinateOrigin
= UPPERLEFT
,
530 .NumberofSFOutputAttributes
=
531 wm_prog_data
->num_varying_inputs
,
534 .Attribute0ActiveComponentFormat
= ACF_XYZW
,
535 .Attribute1ActiveComponentFormat
= ACF_XYZW
,
536 .Attribute2ActiveComponentFormat
= ACF_XYZW
,
537 .Attribute3ActiveComponentFormat
= ACF_XYZW
,
538 .Attribute4ActiveComponentFormat
= ACF_XYZW
,
539 .Attribute5ActiveComponentFormat
= ACF_XYZW
,
540 .Attribute6ActiveComponentFormat
= ACF_XYZW
,
541 .Attribute7ActiveComponentFormat
= ACF_XYZW
,
542 .Attribute8ActiveComponentFormat
= ACF_XYZW
,
543 .Attribute9ActiveComponentFormat
= ACF_XYZW
,
544 .Attribute10ActiveComponentFormat
= ACF_XYZW
,
545 .Attribute11ActiveComponentFormat
= ACF_XYZW
,
546 .Attribute12ActiveComponentFormat
= ACF_XYZW
,
547 .Attribute13ActiveComponentFormat
= ACF_XYZW
,
548 .Attribute14ActiveComponentFormat
= ACF_XYZW
,
549 .Attribute15ActiveComponentFormat
= ACF_XYZW
,
550 /* wow, much field, very attribute */
551 .Attribute16ActiveComponentFormat
= ACF_XYZW
,
552 .Attribute17ActiveComponentFormat
= ACF_XYZW
,
553 .Attribute18ActiveComponentFormat
= ACF_XYZW
,
554 .Attribute19ActiveComponentFormat
= ACF_XYZW
,
555 .Attribute20ActiveComponentFormat
= ACF_XYZW
,
556 .Attribute21ActiveComponentFormat
= ACF_XYZW
,
557 .Attribute22ActiveComponentFormat
= ACF_XYZW
,
558 .Attribute23ActiveComponentFormat
= ACF_XYZW
,
559 .Attribute24ActiveComponentFormat
= ACF_XYZW
,
560 .Attribute25ActiveComponentFormat
= ACF_XYZW
,
561 .Attribute26ActiveComponentFormat
= ACF_XYZW
,
562 .Attribute27ActiveComponentFormat
= ACF_XYZW
,
563 .Attribute28ActiveComponentFormat
= ACF_XYZW
,
564 .Attribute29ActiveComponentFormat
= ACF_XYZW
,
565 .Attribute28ActiveComponentFormat
= ACF_XYZW
,
566 .Attribute29ActiveComponentFormat
= ACF_XYZW
,
567 .Attribute30ActiveComponentFormat
= ACF_XYZW
,
571 uint32_t *dw
= anv_batch_emit_dwords(&pipeline
->batch
,
572 GENX(3DSTATE_SBE_SWIZ_length
));
573 GENX(3DSTATE_SBE_SWIZ_pack
)(&pipeline
->batch
, dw
, &swiz
);
575 const int num_thread_bias
= ANV_GEN
== 8 ? 2 : 1;
576 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PS
),
577 .KernelStartPointer0
= pipeline
->ps_ksp0
,
579 .SingleProgramFlow
= false,
580 .VectorMaskEnable
= true,
583 .ScratchSpaceBasePointer
= pipeline
->scratch_start
[MESA_SHADER_FRAGMENT
],
584 .PerThreadScratchSpace
= ffs(wm_prog_data
->base
.total_scratch
/ 2048),
586 .MaximumNumberofThreadsPerPSD
= 64 - num_thread_bias
,
587 .PositionXYOffsetSelect
= wm_prog_data
->uses_pos_offset
?
588 POSOFFSET_SAMPLE
: POSOFFSET_NONE
,
589 .PushConstantEnable
= wm_prog_data
->base
.nr_params
> 0,
590 ._8PixelDispatchEnable
= pipeline
->ps_simd8
!= NO_KERNEL
,
591 ._16PixelDispatchEnable
= pipeline
->ps_simd16
!= NO_KERNEL
,
592 ._32PixelDispatchEnable
= false,
594 .DispatchGRFStartRegisterForConstantSetupData0
= pipeline
->ps_grf_start0
,
595 .DispatchGRFStartRegisterForConstantSetupData1
= 0,
596 .DispatchGRFStartRegisterForConstantSetupData2
= pipeline
->ps_grf_start2
,
598 .KernelStartPointer1
= 0,
599 .KernelStartPointer2
= pipeline
->ps_ksp2
);
601 bool per_sample_ps
= false;
602 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PS_EXTRA
),
603 .PixelShaderValid
= true,
604 .PixelShaderKillsPixel
= wm_prog_data
->uses_kill
,
605 .PixelShaderComputedDepthMode
= wm_prog_data
->computed_depth_mode
,
606 .AttributeEnable
= wm_prog_data
->num_varying_inputs
> 0,
607 .oMaskPresenttoRenderTarget
= wm_prog_data
->uses_omask
,
608 .PixelShaderIsPerSample
= per_sample_ps
,
610 .PixelShaderPullsBary
= wm_prog_data
->pulls_bary
,
611 .InputCoverageMaskState
= ICMS_NONE
615 *pPipeline
= anv_pipeline_to_handle(pipeline
);
620 VkResult
genX(compute_pipeline_create
)(
622 struct anv_pipeline_cache
* cache
,
623 const VkComputePipelineCreateInfo
* pCreateInfo
,
624 const VkAllocationCallbacks
* pAllocator
,
625 VkPipeline
* pPipeline
)
627 ANV_FROM_HANDLE(anv_device
, device
, _device
);
628 struct anv_pipeline
*pipeline
;
631 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO
);
633 pipeline
= anv_alloc2(&device
->alloc
, pAllocator
, sizeof(*pipeline
), 8,
634 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
635 if (pipeline
== NULL
)
636 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
638 pipeline
->device
= device
;
639 pipeline
->layout
= anv_pipeline_layout_from_handle(pCreateInfo
->layout
);
641 pipeline
->blend_state
.map
= NULL
;
643 result
= anv_reloc_list_init(&pipeline
->batch_relocs
,
644 pAllocator
? pAllocator
: &device
->alloc
);
645 if (result
!= VK_SUCCESS
) {
646 anv_free2(&device
->alloc
, pAllocator
, pipeline
);
649 pipeline
->batch
.next
= pipeline
->batch
.start
= pipeline
->batch_data
;
650 pipeline
->batch
.end
= pipeline
->batch
.start
+ sizeof(pipeline
->batch_data
);
651 pipeline
->batch
.relocs
= &pipeline
->batch_relocs
;
653 /* When we free the pipeline, we detect stages based on the NULL status
654 * of various prog_data pointers. Make them NULL by default.
656 memset(pipeline
->prog_data
, 0, sizeof(pipeline
->prog_data
));
657 memset(pipeline
->scratch_start
, 0, sizeof(pipeline
->scratch_start
));
659 pipeline
->vs_simd8
= NO_KERNEL
;
660 pipeline
->vs_vec4
= NO_KERNEL
;
661 pipeline
->gs_kernel
= NO_KERNEL
;
663 pipeline
->active_stages
= 0;
664 pipeline
->total_scratch
= 0;
666 assert(pCreateInfo
->stage
.stage
== VK_SHADER_STAGE_COMPUTE_BIT
);
667 ANV_FROM_HANDLE(anv_shader_module
, module
, pCreateInfo
->stage
.module
);
668 anv_pipeline_compile_cs(pipeline
, cache
, pCreateInfo
, module
,
669 pCreateInfo
->stage
.pName
);
671 pipeline
->use_repclear
= false;
673 const struct brw_cs_prog_data
*cs_prog_data
= &pipeline
->cs_prog_data
;
675 anv_batch_emit(&pipeline
->batch
, GENX(MEDIA_VFE_STATE
),
676 .ScratchSpaceBasePointer
= pipeline
->scratch_start
[MESA_SHADER_COMPUTE
],
677 .PerThreadScratchSpace
= ffs(cs_prog_data
->base
.total_scratch
/ 2048),
678 .ScratchSpaceBasePointerHigh
= 0,
681 .MaximumNumberofThreads
= device
->info
.max_cs_threads
- 1,
682 .NumberofURBEntries
= 2,
683 .ResetGatewayTimer
= true,
685 .BypassGatewayControl
= true,
687 .URBEntryAllocationSize
= 2,
688 .CURBEAllocationSize
= 0);
690 struct brw_cs_prog_data
*prog_data
= &pipeline
->cs_prog_data
;
691 uint32_t group_size
= prog_data
->local_size
[0] *
692 prog_data
->local_size
[1] * prog_data
->local_size
[2];
693 pipeline
->cs_thread_width_max
= DIV_ROUND_UP(group_size
, prog_data
->simd_size
);
694 uint32_t remainder
= group_size
& (prog_data
->simd_size
- 1);
697 pipeline
->cs_right_mask
= ~0u >> (32 - remainder
);
699 pipeline
->cs_right_mask
= ~0u >> (32 - prog_data
->simd_size
);
702 *pPipeline
= anv_pipeline_to_handle(pipeline
);