2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #include "anv_private.h"
32 #include "gen8_pack.h"
33 #include "gen9_pack.h"
36 emit_vertex_input(struct anv_pipeline
*pipeline
,
37 const VkPipelineVertexInputStateCreateInfo
*info
)
39 const uint32_t num_dwords
= 1 + info
->attributeCount
* 2;
42 static_assert(ANV_GEN
>= 8, "should be compiling this for gen < 8");
44 if (info
->attributeCount
> 0) {
45 p
= anv_batch_emitn(&pipeline
->batch
, num_dwords
,
46 GENX(3DSTATE_VERTEX_ELEMENTS
));
49 for (uint32_t i
= 0; i
< info
->attributeCount
; i
++) {
50 const VkVertexInputAttributeDescription
*desc
=
51 &info
->pVertexAttributeDescriptions
[i
];
52 const struct anv_format
*format
= anv_format_for_vk_format(desc
->format
);
54 struct GENX(VERTEX_ELEMENT_STATE
) element
= {
55 .VertexBufferIndex
= desc
->binding
,
57 .SourceElementFormat
= format
->surface_format
,
58 .EdgeFlagEnable
= false,
59 .SourceElementOffset
= desc
->offsetInBytes
,
60 .Component0Control
= VFCOMP_STORE_SRC
,
61 .Component1Control
= format
->num_channels
>= 2 ? VFCOMP_STORE_SRC
: VFCOMP_STORE_0
,
62 .Component2Control
= format
->num_channels
>= 3 ? VFCOMP_STORE_SRC
: VFCOMP_STORE_0
,
63 .Component3Control
= format
->num_channels
>= 4 ? VFCOMP_STORE_SRC
: VFCOMP_STORE_1_FP
65 GENX(VERTEX_ELEMENT_STATE_pack
)(NULL
, &p
[1 + i
* 2], &element
);
67 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VF_INSTANCING
),
68 .InstancingEnable
= pipeline
->instancing_enable
[desc
->binding
],
69 .VertexElementIndex
= i
,
70 /* Vulkan so far doesn't have an instance divisor, so
71 * this is always 1 (ignored if not instancing). */
72 .InstanceDataStepRate
= 1);
75 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VF_SGVS
),
76 .VertexIDEnable
= pipeline
->vs_prog_data
.uses_vertexid
,
77 .VertexIDComponentNumber
= 2,
78 .VertexIDElementOffset
= info
->bindingCount
,
79 .InstanceIDEnable
= pipeline
->vs_prog_data
.uses_instanceid
,
80 .InstanceIDComponentNumber
= 3,
81 .InstanceIDElementOffset
= info
->bindingCount
);
85 emit_ia_state(struct anv_pipeline
*pipeline
,
86 const VkPipelineInputAssemblyStateCreateInfo
*info
,
87 const struct anv_graphics_pipeline_create_info
*extra
)
89 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VF_TOPOLOGY
),
90 .PrimitiveTopologyType
= pipeline
->topology
);
94 emit_rs_state(struct anv_pipeline
*pipeline
,
95 const VkPipelineRasterStateCreateInfo
*info
,
96 const struct anv_graphics_pipeline_create_info
*extra
)
98 static const uint32_t vk_to_gen_cullmode
[] = {
99 [VK_CULL_MODE_NONE
] = CULLMODE_NONE
,
100 [VK_CULL_MODE_FRONT
] = CULLMODE_FRONT
,
101 [VK_CULL_MODE_BACK
] = CULLMODE_BACK
,
102 [VK_CULL_MODE_FRONT_AND_BACK
] = CULLMODE_BOTH
105 static const uint32_t vk_to_gen_fillmode
[] = {
106 [VK_FILL_MODE_POINTS
] = RASTER_POINT
,
107 [VK_FILL_MODE_WIREFRAME
] = RASTER_WIREFRAME
,
108 [VK_FILL_MODE_SOLID
] = RASTER_SOLID
111 static const uint32_t vk_to_gen_front_face
[] = {
112 [VK_FRONT_FACE_CCW
] = CounterClockwise
,
113 [VK_FRONT_FACE_CW
] = Clockwise
116 struct GENX(3DSTATE_SF
) sf
= {
117 GENX(3DSTATE_SF_header
),
118 .ViewportTransformEnable
= !(extra
&& extra
->disable_viewport
),
119 .TriangleStripListProvokingVertexSelect
= 0,
120 .LineStripListProvokingVertexSelect
= 0,
121 .TriangleFanProvokingVertexSelect
= 0,
122 .PointWidthSource
= pipeline
->writes_point_size
? Vertex
: State
,
126 /* FINISHME: VkBool32 rasterizerDiscardEnable; */
128 GENX(3DSTATE_SF_pack
)(NULL
, pipeline
->gen8
.sf
, &sf
);
130 struct GENX(3DSTATE_RASTER
) raster
= {
131 GENX(3DSTATE_RASTER_header
),
132 .FrontWinding
= vk_to_gen_front_face
[info
->frontFace
],
133 .CullMode
= vk_to_gen_cullmode
[info
->cullMode
],
134 .FrontFaceFillMode
= vk_to_gen_fillmode
[info
->fillMode
],
135 .BackFaceFillMode
= vk_to_gen_fillmode
[info
->fillMode
],
136 .ScissorRectangleEnable
= !(extra
&& extra
->disable_scissor
),
138 .ViewportZClipTestEnable
= info
->depthClipEnable
140 /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */
141 .ViewportZFarClipTestEnable
= info
->depthClipEnable
,
142 .ViewportZNearClipTestEnable
= info
->depthClipEnable
,
146 GENX(3DSTATE_RASTER_pack
)(NULL
, pipeline
->gen8
.raster
, &raster
);
150 emit_cb_state(struct anv_pipeline
*pipeline
,
151 const VkPipelineColorBlendStateCreateInfo
*info
)
153 struct anv_device
*device
= pipeline
->device
;
155 static const uint32_t vk_to_gen_logic_op
[] = {
156 [VK_LOGIC_OP_COPY
] = LOGICOP_COPY
,
157 [VK_LOGIC_OP_CLEAR
] = LOGICOP_CLEAR
,
158 [VK_LOGIC_OP_AND
] = LOGICOP_AND
,
159 [VK_LOGIC_OP_AND_REVERSE
] = LOGICOP_AND_REVERSE
,
160 [VK_LOGIC_OP_AND_INVERTED
] = LOGICOP_AND_INVERTED
,
161 [VK_LOGIC_OP_NOOP
] = LOGICOP_NOOP
,
162 [VK_LOGIC_OP_XOR
] = LOGICOP_XOR
,
163 [VK_LOGIC_OP_OR
] = LOGICOP_OR
,
164 [VK_LOGIC_OP_NOR
] = LOGICOP_NOR
,
165 [VK_LOGIC_OP_EQUIV
] = LOGICOP_EQUIV
,
166 [VK_LOGIC_OP_INVERT
] = LOGICOP_INVERT
,
167 [VK_LOGIC_OP_OR_REVERSE
] = LOGICOP_OR_REVERSE
,
168 [VK_LOGIC_OP_COPY_INVERTED
] = LOGICOP_COPY_INVERTED
,
169 [VK_LOGIC_OP_OR_INVERTED
] = LOGICOP_OR_INVERTED
,
170 [VK_LOGIC_OP_NAND
] = LOGICOP_NAND
,
171 [VK_LOGIC_OP_SET
] = LOGICOP_SET
,
174 static const uint32_t vk_to_gen_blend
[] = {
175 [VK_BLEND_ZERO
] = BLENDFACTOR_ZERO
,
176 [VK_BLEND_ONE
] = BLENDFACTOR_ONE
,
177 [VK_BLEND_SRC_COLOR
] = BLENDFACTOR_SRC_COLOR
,
178 [VK_BLEND_ONE_MINUS_SRC_COLOR
] = BLENDFACTOR_INV_SRC_COLOR
,
179 [VK_BLEND_DEST_COLOR
] = BLENDFACTOR_DST_COLOR
,
180 [VK_BLEND_ONE_MINUS_DEST_COLOR
] = BLENDFACTOR_INV_DST_COLOR
,
181 [VK_BLEND_SRC_ALPHA
] = BLENDFACTOR_SRC_ALPHA
,
182 [VK_BLEND_ONE_MINUS_SRC_ALPHA
] = BLENDFACTOR_INV_SRC_ALPHA
,
183 [VK_BLEND_DEST_ALPHA
] = BLENDFACTOR_DST_ALPHA
,
184 [VK_BLEND_ONE_MINUS_DEST_ALPHA
] = BLENDFACTOR_INV_DST_ALPHA
,
185 [VK_BLEND_CONSTANT_COLOR
] = BLENDFACTOR_CONST_COLOR
,
186 [VK_BLEND_ONE_MINUS_CONSTANT_COLOR
] = BLENDFACTOR_INV_CONST_COLOR
,
187 [VK_BLEND_CONSTANT_ALPHA
] = BLENDFACTOR_CONST_ALPHA
,
188 [VK_BLEND_ONE_MINUS_CONSTANT_ALPHA
] = BLENDFACTOR_INV_CONST_ALPHA
,
189 [VK_BLEND_SRC_ALPHA_SATURATE
] = BLENDFACTOR_SRC_ALPHA_SATURATE
,
190 [VK_BLEND_SRC1_COLOR
] = BLENDFACTOR_SRC1_COLOR
,
191 [VK_BLEND_ONE_MINUS_SRC1_COLOR
] = BLENDFACTOR_INV_SRC1_COLOR
,
192 [VK_BLEND_SRC1_ALPHA
] = BLENDFACTOR_SRC1_ALPHA
,
193 [VK_BLEND_ONE_MINUS_SRC1_ALPHA
] = BLENDFACTOR_INV_SRC1_ALPHA
,
196 static const uint32_t vk_to_gen_blend_op
[] = {
197 [VK_BLEND_OP_ADD
] = BLENDFUNCTION_ADD
,
198 [VK_BLEND_OP_SUBTRACT
] = BLENDFUNCTION_SUBTRACT
,
199 [VK_BLEND_OP_REVERSE_SUBTRACT
] = BLENDFUNCTION_REVERSE_SUBTRACT
,
200 [VK_BLEND_OP_MIN
] = BLENDFUNCTION_MIN
,
201 [VK_BLEND_OP_MAX
] = BLENDFUNCTION_MAX
,
204 uint32_t num_dwords
= GENX(BLEND_STATE_length
);
205 pipeline
->blend_state
=
206 anv_state_pool_alloc(&device
->dynamic_state_pool
, num_dwords
* 4, 64);
208 struct GENX(BLEND_STATE
) blend_state
= {
209 .AlphaToCoverageEnable
= info
->alphaToCoverageEnable
,
210 .AlphaToOneEnable
= info
->alphaToOneEnable
,
213 for (uint32_t i
= 0; i
< info
->attachmentCount
; i
++) {
214 const VkPipelineColorBlendAttachmentState
*a
= &info
->pAttachments
[i
];
216 if (a
->srcBlendColor
!= a
->srcBlendAlpha
||
217 a
->destBlendColor
!= a
->destBlendAlpha
||
218 a
->blendOpColor
!= a
->blendOpAlpha
) {
219 blend_state
.IndependentAlphaBlendEnable
= true;
222 blend_state
.Entry
[i
] = (struct GENX(BLEND_STATE_ENTRY
)) {
223 .LogicOpEnable
= info
->logicOpEnable
,
224 .LogicOpFunction
= vk_to_gen_logic_op
[info
->logicOp
],
225 .ColorBufferBlendEnable
= a
->blendEnable
,
226 .PreBlendSourceOnlyClampEnable
= false,
227 .ColorClampRange
= COLORCLAMP_RTFORMAT
,
228 .PreBlendColorClampEnable
= true,
229 .PostBlendColorClampEnable
= true,
230 .SourceBlendFactor
= vk_to_gen_blend
[a
->srcBlendColor
],
231 .DestinationBlendFactor
= vk_to_gen_blend
[a
->destBlendColor
],
232 .ColorBlendFunction
= vk_to_gen_blend_op
[a
->blendOpColor
],
233 .SourceAlphaBlendFactor
= vk_to_gen_blend
[a
->srcBlendAlpha
],
234 .DestinationAlphaBlendFactor
= vk_to_gen_blend
[a
->destBlendAlpha
],
235 .AlphaBlendFunction
= vk_to_gen_blend_op
[a
->blendOpAlpha
],
236 .WriteDisableAlpha
= !(a
->channelWriteMask
& VK_CHANNEL_A_BIT
),
237 .WriteDisableRed
= !(a
->channelWriteMask
& VK_CHANNEL_R_BIT
),
238 .WriteDisableGreen
= !(a
->channelWriteMask
& VK_CHANNEL_G_BIT
),
239 .WriteDisableBlue
= !(a
->channelWriteMask
& VK_CHANNEL_B_BIT
),
242 /* Our hardware applies the blend factor prior to the blend function
243 * regardless of what function is used. Technically, this means the
244 * hardware can do MORE than GL or Vulkan specify. However, it also
245 * means that, for MIN and MAX, we have to stomp the blend factor to
246 * ONE to make it a no-op.
248 if (a
->blendOpColor
== VK_BLEND_OP_MIN
||
249 a
->blendOpColor
== VK_BLEND_OP_MAX
) {
250 blend_state
.Entry
[i
].SourceBlendFactor
= BLENDFACTOR_ONE
;
251 blend_state
.Entry
[i
].DestinationBlendFactor
= BLENDFACTOR_ONE
;
253 if (a
->blendOpAlpha
== VK_BLEND_OP_MIN
||
254 a
->blendOpAlpha
== VK_BLEND_OP_MAX
) {
255 blend_state
.Entry
[i
].SourceAlphaBlendFactor
= BLENDFACTOR_ONE
;
256 blend_state
.Entry
[i
].DestinationAlphaBlendFactor
= BLENDFACTOR_ONE
;
260 GENX(BLEND_STATE_pack
)(NULL
, pipeline
->blend_state
.map
, &blend_state
);
262 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_BLEND_STATE_POINTERS
),
263 .BlendStatePointer
= pipeline
->blend_state
.offset
,
264 .BlendStatePointerValid
= true);
267 static const uint32_t vk_to_gen_compare_op
[] = {
268 [VK_COMPARE_OP_NEVER
] = COMPAREFUNCTION_NEVER
,
269 [VK_COMPARE_OP_LESS
] = COMPAREFUNCTION_LESS
,
270 [VK_COMPARE_OP_EQUAL
] = COMPAREFUNCTION_EQUAL
,
271 [VK_COMPARE_OP_LESS_EQUAL
] = COMPAREFUNCTION_LEQUAL
,
272 [VK_COMPARE_OP_GREATER
] = COMPAREFUNCTION_GREATER
,
273 [VK_COMPARE_OP_NOT_EQUAL
] = COMPAREFUNCTION_NOTEQUAL
,
274 [VK_COMPARE_OP_GREATER_EQUAL
] = COMPAREFUNCTION_GEQUAL
,
275 [VK_COMPARE_OP_ALWAYS
] = COMPAREFUNCTION_ALWAYS
,
278 static const uint32_t vk_to_gen_stencil_op
[] = {
279 [VK_STENCIL_OP_KEEP
] = STENCILOP_KEEP
,
280 [VK_STENCIL_OP_ZERO
] = STENCILOP_ZERO
,
281 [VK_STENCIL_OP_REPLACE
] = STENCILOP_REPLACE
,
282 [VK_STENCIL_OP_INC_CLAMP
] = STENCILOP_INCRSAT
,
283 [VK_STENCIL_OP_DEC_CLAMP
] = STENCILOP_DECRSAT
,
284 [VK_STENCIL_OP_INVERT
] = STENCILOP_INVERT
,
285 [VK_STENCIL_OP_INC_WRAP
] = STENCILOP_INCR
,
286 [VK_STENCIL_OP_DEC_WRAP
] = STENCILOP_DECR
,
290 emit_ds_state(struct anv_pipeline
*pipeline
,
291 const VkPipelineDepthStencilStateCreateInfo
*info
)
294 /* We're going to OR this together with the dynamic state. We need
295 * to make sure it's initialized to something useful.
297 /* FIXME: gen9 wm_depth_stencil */
298 memset(pipeline
->gen8
.wm_depth_stencil
, 0,
299 sizeof(pipeline
->gen8
.wm_depth_stencil
));
303 /* VkBool32 depthBoundsTestEnable; // optional (depth_bounds_test) */
305 struct GENX(3DSTATE_WM_DEPTH_STENCIL
) wm_depth_stencil
= {
306 .DepthTestEnable
= info
->depthTestEnable
,
307 .DepthBufferWriteEnable
= info
->depthWriteEnable
,
308 .DepthTestFunction
= vk_to_gen_compare_op
[info
->depthCompareOp
],
309 .DoubleSidedStencilEnable
= true,
311 .StencilTestEnable
= info
->stencilTestEnable
,
312 .StencilFailOp
= vk_to_gen_stencil_op
[info
->front
.stencilFailOp
],
313 .StencilPassDepthPassOp
= vk_to_gen_stencil_op
[info
->front
.stencilPassOp
],
314 .StencilPassDepthFailOp
= vk_to_gen_stencil_op
[info
->front
.stencilDepthFailOp
],
315 .StencilTestFunction
= vk_to_gen_compare_op
[info
->front
.stencilCompareOp
],
316 .BackfaceStencilFailOp
= vk_to_gen_stencil_op
[info
->back
.stencilFailOp
],
317 .BackfaceStencilPassDepthPassOp
= vk_to_gen_stencil_op
[info
->back
.stencilPassOp
],
318 .BackfaceStencilPassDepthFailOp
=vk_to_gen_stencil_op
[info
->back
.stencilDepthFailOp
],
319 .BackfaceStencilTestFunction
= vk_to_gen_compare_op
[info
->back
.stencilCompareOp
],
322 GENX(3DSTATE_WM_DEPTH_STENCIL_pack
)(NULL
, pipeline
->gen8
.wm_depth_stencil
, &wm_depth_stencil
);
326 genX(graphics_pipeline_create
)(
328 const VkGraphicsPipelineCreateInfo
* pCreateInfo
,
329 const struct anv_graphics_pipeline_create_info
*extra
,
330 VkPipeline
* pPipeline
)
332 ANV_FROM_HANDLE(anv_device
, device
, _device
);
333 struct anv_pipeline
*pipeline
;
335 uint32_t offset
, length
;
337 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO
);
339 pipeline
= anv_device_alloc(device
, sizeof(*pipeline
), 8,
340 VK_SYSTEM_ALLOC_TYPE_API_OBJECT
);
341 if (pipeline
== NULL
)
342 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
344 result
= anv_pipeline_init(pipeline
, device
, pCreateInfo
, extra
);
345 if (result
!= VK_SUCCESS
)
348 /* FIXME: The compiler dead-codes FS inputs when we don't have a VS, so we
349 * hard code this to num_attributes - 2. This is because the attributes
350 * include VUE header and position, which aren't counted as varying
352 if (pipeline
->vs_simd8
== NO_KERNEL
) {
353 pipeline
->wm_prog_data
.num_varying_inputs
=
354 pCreateInfo
->pVertexInputState
->attributeCount
- 2;
357 assert(pCreateInfo
->pVertexInputState
);
358 emit_vertex_input(pipeline
, pCreateInfo
->pVertexInputState
);
359 assert(pCreateInfo
->pInputAssemblyState
);
360 emit_ia_state(pipeline
, pCreateInfo
->pInputAssemblyState
, extra
);
361 assert(pCreateInfo
->pRasterState
);
362 emit_rs_state(pipeline
, pCreateInfo
->pRasterState
, extra
);
363 emit_ds_state(pipeline
, pCreateInfo
->pDepthStencilState
);
364 emit_cb_state(pipeline
, pCreateInfo
->pColorBlendState
);
366 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VF_STATISTICS
),
367 .StatisticsEnable
= true);
368 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_HS
), .Enable
= false);
369 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_TE
), .TEEnable
= false);
370 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_DS
), .FunctionEnable
= false);
371 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_STREAMOUT
), .SOFunctionEnable
= false);
373 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS
),
374 .ConstantBufferOffset
= 0,
375 .ConstantBufferSize
= 4);
376 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_GS
),
377 .ConstantBufferOffset
= 4,
378 .ConstantBufferSize
= 4);
379 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_PS
),
380 .ConstantBufferOffset
= 8,
381 .ConstantBufferSize
= 4);
383 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_WM_CHROMAKEY
),
384 .ChromaKeyKillEnable
= false);
385 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_AA_LINE_PARAMETERS
));
387 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_CLIP
),
389 .ViewportXYClipTestEnable
= !(extra
&& extra
->disable_viewport
),
390 .MinimumPointWidth
= 0.125,
391 .MaximumPointWidth
= 255.875);
393 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_WM
),
394 .StatisticsEnable
= true,
395 .LineEndCapAntialiasingRegionWidth
= _05pixels
,
396 .LineAntialiasingRegionWidth
= _10pixels
,
397 .EarlyDepthStencilControl
= NORMAL
,
398 .ForceThreadDispatchEnable
= NORMAL
,
399 .PointRasterizationRule
= RASTRULE_UPPER_RIGHT
,
400 .BarycentricInterpolationMode
=
401 pipeline
->wm_prog_data
.barycentric_interp_modes
);
403 uint32_t samples
= 1;
404 uint32_t log2_samples
= __builtin_ffs(samples
) - 1;
405 bool enable_sampling
= samples
> 1 ? true : false;
407 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_MULTISAMPLE
),
408 .PixelPositionOffsetEnable
= enable_sampling
,
409 .PixelLocation
= CENTER
,
410 .NumberofMultisamples
= log2_samples
);
412 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_SAMPLE_MASK
),
413 .SampleMask
= 0xffff);
415 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_URB_VS
),
416 .VSURBStartingAddress
= pipeline
->urb
.vs_start
,
417 .VSURBEntryAllocationSize
= pipeline
->urb
.vs_size
- 1,
418 .VSNumberofURBEntries
= pipeline
->urb
.nr_vs_entries
);
420 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_URB_GS
),
421 .GSURBStartingAddress
= pipeline
->urb
.gs_start
,
422 .GSURBEntryAllocationSize
= pipeline
->urb
.gs_size
- 1,
423 .GSNumberofURBEntries
= pipeline
->urb
.nr_gs_entries
);
425 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_URB_HS
),
426 .HSURBStartingAddress
= pipeline
->urb
.vs_start
,
427 .HSURBEntryAllocationSize
= 0,
428 .HSNumberofURBEntries
= 0);
430 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_URB_DS
),
431 .DSURBStartingAddress
= pipeline
->urb
.vs_start
,
432 .DSURBEntryAllocationSize
= 0,
433 .DSNumberofURBEntries
= 0);
435 const struct brw_gs_prog_data
*gs_prog_data
= &pipeline
->gs_prog_data
;
437 length
= (gs_prog_data
->base
.vue_map
.num_slots
+ 1) / 2 - offset
;
439 if (pipeline
->gs_vec4
== NO_KERNEL
)
440 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_GS
), .Enable
= false);
442 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_GS
),
443 .SingleProgramFlow
= false,
444 .KernelStartPointer
= pipeline
->gs_vec4
,
445 .VectorMaskEnable
= Dmask
,
447 .BindingTableEntryCount
= 0,
448 .ExpectedVertexCount
= pipeline
->gs_vertex_count
,
450 .ScratchSpaceBasePointer
= pipeline
->scratch_start
[VK_SHADER_STAGE_GEOMETRY
],
451 .PerThreadScratchSpace
= ffs(gs_prog_data
->base
.base
.total_scratch
/ 2048),
453 .OutputVertexSize
= gs_prog_data
->output_vertex_size_hwords
* 2 - 1,
454 .OutputTopology
= gs_prog_data
->output_topology
,
455 .VertexURBEntryReadLength
= gs_prog_data
->base
.urb_read_length
,
456 .DispatchGRFStartRegisterForURBData
=
457 gs_prog_data
->base
.base
.dispatch_grf_start_reg
,
459 .MaximumNumberofThreads
= device
->info
.max_gs_threads
/ 2 - 1,
460 .ControlDataHeaderSize
= gs_prog_data
->control_data_header_size_hwords
,
461 .DispatchMode
= gs_prog_data
->base
.dispatch_mode
,
462 .StatisticsEnable
= true,
463 .IncludePrimitiveID
= gs_prog_data
->include_primitive_id
,
464 .ReorderMode
= TRAILING
,
467 .ControlDataFormat
= gs_prog_data
->control_data_format
,
469 .StaticOutput
= gs_prog_data
->static_vertex_count
>= 0,
470 .StaticOutputVertexCount
=
471 gs_prog_data
->static_vertex_count
>= 0 ?
472 gs_prog_data
->static_vertex_count
: 0,
474 /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled:
475 * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v)
476 * UserClipDistanceCullTestEnableBitmask(v)
479 .VertexURBEntryOutputReadOffset
= offset
,
480 .VertexURBEntryOutputLength
= length
);
482 const struct brw_vue_prog_data
*vue_prog_data
= &pipeline
->vs_prog_data
.base
;
483 /* Skip the VUE header and position slots */
485 length
= (vue_prog_data
->vue_map
.num_slots
+ 1) / 2 - offset
;
487 if (pipeline
->vs_simd8
== NO_KERNEL
|| (extra
&& extra
->disable_vs
))
488 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VS
),
489 .FunctionEnable
= false,
490 /* Even if VS is disabled, SBE still gets the amount of
491 * vertex data to read from this field. */
492 .VertexURBEntryOutputReadOffset
= offset
,
493 .VertexURBEntryOutputLength
= length
);
495 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VS
),
496 .KernelStartPointer
= pipeline
->vs_simd8
,
497 .SingleVertexDispatch
= Multiple
,
498 .VectorMaskEnable
= Dmask
,
500 .BindingTableEntryCount
=
501 vue_prog_data
->base
.binding_table
.size_bytes
/ 4,
502 .ThreadDispatchPriority
= Normal
,
503 .FloatingPointMode
= IEEE754
,
504 .IllegalOpcodeExceptionEnable
= false,
505 .AccessesUAV
= false,
506 .SoftwareExceptionEnable
= false,
508 .ScratchSpaceBasePointer
= pipeline
->scratch_start
[VK_SHADER_STAGE_VERTEX
],
509 .PerThreadScratchSpace
= ffs(vue_prog_data
->base
.total_scratch
/ 2048),
511 .DispatchGRFStartRegisterForURBData
=
512 vue_prog_data
->base
.dispatch_grf_start_reg
,
513 .VertexURBEntryReadLength
= vue_prog_data
->urb_read_length
,
514 .VertexURBEntryReadOffset
= 0,
516 .MaximumNumberofThreads
= device
->info
.max_vs_threads
- 1,
517 .StatisticsEnable
= false,
518 .SIMD8DispatchEnable
= true,
519 .VertexCacheDisable
= false,
520 .FunctionEnable
= true,
522 .VertexURBEntryOutputReadOffset
= offset
,
523 .VertexURBEntryOutputLength
= length
,
524 .UserClipDistanceClipTestEnableBitmask
= 0,
525 .UserClipDistanceCullTestEnableBitmask
= 0);
527 const struct brw_wm_prog_data
*wm_prog_data
= &pipeline
->wm_prog_data
;
529 /* TODO: We should clean this up. Among other things, this is mostly
530 * shared with other gens.
532 const struct brw_vue_map
*fs_input_map
;
533 if (pipeline
->gs_vec4
== NO_KERNEL
)
534 fs_input_map
= &vue_prog_data
->vue_map
;
536 fs_input_map
= &gs_prog_data
->base
.vue_map
;
538 struct GENX(3DSTATE_SBE_SWIZ
) swiz
= {
539 GENX(3DSTATE_SBE_SWIZ_header
),
542 int max_source_attr
= 0;
543 for (int attr
= 0; attr
< VARYING_SLOT_MAX
; attr
++) {
544 int input_index
= wm_prog_data
->urb_setup
[attr
];
549 /* We have to subtract two slots to accout for the URB entry output
550 * read offset in the VS and GS stages.
552 int source_attr
= fs_input_map
->varying_to_slot
[attr
] - 2;
553 max_source_attr
= MAX2(max_source_attr
, source_attr
);
555 if (input_index
>= 16)
558 swiz
.Attribute
[input_index
].SourceAttribute
= source_attr
;
561 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_SBE
),
562 .AttributeSwizzleEnable
= true,
563 .ForceVertexURBEntryReadLength
= false,
564 .ForceVertexURBEntryReadOffset
= false,
565 .VertexURBEntryReadLength
= DIV_ROUND_UP(max_source_attr
+ 1, 2),
566 .PointSpriteTextureCoordinateOrigin
= UPPERLEFT
,
567 .NumberofSFOutputAttributes
=
568 wm_prog_data
->num_varying_inputs
,
571 .Attribute0ActiveComponentFormat
= ACF_XYZW
,
572 .Attribute1ActiveComponentFormat
= ACF_XYZW
,
573 .Attribute2ActiveComponentFormat
= ACF_XYZW
,
574 .Attribute3ActiveComponentFormat
= ACF_XYZW
,
575 .Attribute4ActiveComponentFormat
= ACF_XYZW
,
576 .Attribute5ActiveComponentFormat
= ACF_XYZW
,
577 .Attribute6ActiveComponentFormat
= ACF_XYZW
,
578 .Attribute7ActiveComponentFormat
= ACF_XYZW
,
579 .Attribute8ActiveComponentFormat
= ACF_XYZW
,
580 .Attribute9ActiveComponentFormat
= ACF_XYZW
,
581 .Attribute10ActiveComponentFormat
= ACF_XYZW
,
582 .Attribute11ActiveComponentFormat
= ACF_XYZW
,
583 .Attribute12ActiveComponentFormat
= ACF_XYZW
,
584 .Attribute13ActiveComponentFormat
= ACF_XYZW
,
585 .Attribute14ActiveComponentFormat
= ACF_XYZW
,
586 .Attribute15ActiveComponentFormat
= ACF_XYZW
,
587 /* wow, much field, very attribute */
588 .Attribute16ActiveComponentFormat
= ACF_XYZW
,
589 .Attribute17ActiveComponentFormat
= ACF_XYZW
,
590 .Attribute18ActiveComponentFormat
= ACF_XYZW
,
591 .Attribute19ActiveComponentFormat
= ACF_XYZW
,
592 .Attribute20ActiveComponentFormat
= ACF_XYZW
,
593 .Attribute21ActiveComponentFormat
= ACF_XYZW
,
594 .Attribute22ActiveComponentFormat
= ACF_XYZW
,
595 .Attribute23ActiveComponentFormat
= ACF_XYZW
,
596 .Attribute24ActiveComponentFormat
= ACF_XYZW
,
597 .Attribute25ActiveComponentFormat
= ACF_XYZW
,
598 .Attribute26ActiveComponentFormat
= ACF_XYZW
,
599 .Attribute27ActiveComponentFormat
= ACF_XYZW
,
600 .Attribute28ActiveComponentFormat
= ACF_XYZW
,
601 .Attribute29ActiveComponentFormat
= ACF_XYZW
,
602 .Attribute28ActiveComponentFormat
= ACF_XYZW
,
603 .Attribute29ActiveComponentFormat
= ACF_XYZW
,
604 .Attribute30ActiveComponentFormat
= ACF_XYZW
,
608 uint32_t *dw
= anv_batch_emit_dwords(&pipeline
->batch
,
609 GENX(3DSTATE_SBE_SWIZ_length
));
610 GENX(3DSTATE_SBE_SWIZ_pack
)(&pipeline
->batch
, dw
, &swiz
);
612 const int num_thread_bias
= ANV_GEN
== 8 ? 2 : 1;
613 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PS
),
614 .KernelStartPointer0
= pipeline
->ps_ksp0
,
616 .SingleProgramFlow
= false,
617 .VectorMaskEnable
= true,
620 .ScratchSpaceBasePointer
= pipeline
->scratch_start
[VK_SHADER_STAGE_FRAGMENT
],
621 .PerThreadScratchSpace
= ffs(wm_prog_data
->base
.total_scratch
/ 2048),
623 .MaximumNumberofThreadsPerPSD
= 64 - num_thread_bias
,
624 .PositionXYOffsetSelect
= wm_prog_data
->uses_pos_offset
?
625 POSOFFSET_SAMPLE
: POSOFFSET_NONE
,
626 .PushConstantEnable
= wm_prog_data
->base
.nr_params
> 0,
627 ._8PixelDispatchEnable
= pipeline
->ps_simd8
!= NO_KERNEL
,
628 ._16PixelDispatchEnable
= pipeline
->ps_simd16
!= NO_KERNEL
,
629 ._32PixelDispatchEnable
= false,
631 .DispatchGRFStartRegisterForConstantSetupData0
= pipeline
->ps_grf_start0
,
632 .DispatchGRFStartRegisterForConstantSetupData1
= 0,
633 .DispatchGRFStartRegisterForConstantSetupData2
= pipeline
->ps_grf_start2
,
635 .KernelStartPointer1
= 0,
636 .KernelStartPointer2
= pipeline
->ps_ksp2
);
638 bool per_sample_ps
= false;
639 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PS_EXTRA
),
640 .PixelShaderValid
= true,
641 .PixelShaderKillsPixel
= wm_prog_data
->uses_kill
,
642 .PixelShaderComputedDepthMode
= wm_prog_data
->computed_depth_mode
,
643 .AttributeEnable
= wm_prog_data
->num_varying_inputs
> 0,
644 .oMaskPresenttoRenderTarget
= wm_prog_data
->uses_omask
,
645 .PixelShaderIsPerSample
= per_sample_ps
,
647 .PixelShaderPullsBary
= wm_prog_data
->pulls_bary
,
648 .InputCoverageMaskState
= ICMS_NONE
652 *pPipeline
= anv_pipeline_to_handle(pipeline
);
657 VkResult
genX(compute_pipeline_create
)(
659 const VkComputePipelineCreateInfo
* pCreateInfo
,
660 VkPipeline
* pPipeline
)
662 ANV_FROM_HANDLE(anv_device
, device
, _device
);
663 struct anv_pipeline
*pipeline
;
666 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO
);
668 pipeline
= anv_device_alloc(device
, sizeof(*pipeline
), 8,
669 VK_SYSTEM_ALLOC_TYPE_API_OBJECT
);
670 if (pipeline
== NULL
)
671 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
673 pipeline
->device
= device
;
674 pipeline
->layout
= anv_pipeline_layout_from_handle(pCreateInfo
->layout
);
676 pipeline
->blend_state
.map
= NULL
;
678 result
= anv_reloc_list_init(&pipeline
->batch_relocs
, device
);
679 if (result
!= VK_SUCCESS
) {
680 anv_device_free(device
, pipeline
);
683 pipeline
->batch
.next
= pipeline
->batch
.start
= pipeline
->batch_data
;
684 pipeline
->batch
.end
= pipeline
->batch
.start
+ sizeof(pipeline
->batch_data
);
685 pipeline
->batch
.relocs
= &pipeline
->batch_relocs
;
687 anv_state_stream_init(&pipeline
->program_stream
,
688 &device
->instruction_block_pool
);
690 /* When we free the pipeline, we detect stages based on the NULL status
691 * of various prog_data pointers. Make them NULL by default.
693 memset(pipeline
->prog_data
, 0, sizeof(pipeline
->prog_data
));
694 memset(pipeline
->scratch_start
, 0, sizeof(pipeline
->scratch_start
));
696 pipeline
->vs_simd8
= NO_KERNEL
;
697 pipeline
->vs_vec4
= NO_KERNEL
;
698 pipeline
->gs_vec4
= NO_KERNEL
;
700 pipeline
->active_stages
= 0;
701 pipeline
->total_scratch
= 0;
703 assert(pCreateInfo
->stage
.stage
== VK_SHADER_STAGE_COMPUTE
);
704 ANV_FROM_HANDLE(anv_shader
, shader
, pCreateInfo
->stage
.shader
);
705 anv_pipeline_compile_cs(pipeline
, pCreateInfo
, shader
);
707 pipeline
->use_repclear
= false;
709 const struct brw_cs_prog_data
*cs_prog_data
= &pipeline
->cs_prog_data
;
711 anv_batch_emit(&pipeline
->batch
, GENX(MEDIA_VFE_STATE
),
712 .ScratchSpaceBasePointer
= pipeline
->scratch_start
[VK_SHADER_STAGE_COMPUTE
],
713 .PerThreadScratchSpace
= ffs(cs_prog_data
->base
.total_scratch
/ 2048),
714 .ScratchSpaceBasePointerHigh
= 0,
717 .MaximumNumberofThreads
= device
->info
.max_cs_threads
- 1,
718 .NumberofURBEntries
= 2,
719 .ResetGatewayTimer
= true,
721 .BypassGatewayControl
= true,
723 .URBEntryAllocationSize
= 2,
724 .CURBEAllocationSize
= 0);
726 struct brw_cs_prog_data
*prog_data
= &pipeline
->cs_prog_data
;
727 uint32_t group_size
= prog_data
->local_size
[0] *
728 prog_data
->local_size
[1] * prog_data
->local_size
[2];
729 pipeline
->cs_thread_width_max
= DIV_ROUND_UP(group_size
, prog_data
->simd_size
);
730 uint32_t remainder
= group_size
& (prog_data
->simd_size
- 1);
733 pipeline
->cs_right_mask
= ~0u >> (32 - remainder
);
735 pipeline
->cs_right_mask
= ~0u >> (32 - prog_data
->simd_size
);
738 *pPipeline
= anv_pipeline_to_handle(pipeline
);