2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #include "anv_private.h"
32 #include "gen8_pack.h"
33 #include "gen9_pack.h"
36 emit_vertex_input(struct anv_pipeline
*pipeline
,
37 const VkPipelineVertexInputStateCreateInfo
*info
,
38 const struct anv_graphics_pipeline_create_info
*extra
)
41 static_assert(ANV_GEN
>= 8, "should be compiling this for gen < 8");
44 if (extra
&& extra
->disable_vs
) {
45 /* If the VS is disabled, just assume the user knows what they're
46 * doing and apply the layout blindly. This can only come from
47 * meta, so this *should* be safe.
50 for (uint32_t i
= 0; i
< info
->vertexAttributeDescriptionCount
; i
++)
51 vb_used
|= (1 << info
->pVertexAttributeDescriptions
[i
].location
);
53 /* Pull inputs_read out of the VS prog data */
54 uint64_t inputs_read
= pipeline
->vs_prog_data
.inputs_read
;
55 assert((inputs_read
& ((1 << VERT_ATTRIB_GENERIC0
) - 1)) == 0);
56 vb_used
= inputs_read
>> VERT_ATTRIB_GENERIC0
;
59 const uint32_t num_dwords
= 1 + __builtin_popcount(vb_used
) * 2;
63 p
= anv_batch_emitn(&pipeline
->batch
, num_dwords
,
64 GENX(3DSTATE_VERTEX_ELEMENTS
));
65 memset(p
+ 1, 0, (num_dwords
- 1) * 4);
68 for (uint32_t i
= 0; i
< info
->vertexAttributeDescriptionCount
; i
++) {
69 const VkVertexInputAttributeDescription
*desc
=
70 &info
->pVertexAttributeDescriptions
[i
];
71 const struct anv_format
*format
= anv_format_for_vk_format(desc
->format
);
73 assert(desc
->binding
< 32);
75 if ((vb_used
& (1 << desc
->location
)) == 0)
76 continue; /* Binding unused */
78 uint32_t slot
= __builtin_popcount(vb_used
& ((1 << desc
->location
) - 1));
80 struct GENX(VERTEX_ELEMENT_STATE
) element
= {
81 .VertexBufferIndex
= desc
->binding
,
83 .SourceElementFormat
= format
->surface_format
,
84 .EdgeFlagEnable
= false,
85 .SourceElementOffset
= desc
->offset
,
86 .Component0Control
= VFCOMP_STORE_SRC
,
87 .Component1Control
= format
->num_channels
>= 2 ? VFCOMP_STORE_SRC
: VFCOMP_STORE_0
,
88 .Component2Control
= format
->num_channels
>= 3 ? VFCOMP_STORE_SRC
: VFCOMP_STORE_0
,
89 .Component3Control
= format
->num_channels
>= 4 ? VFCOMP_STORE_SRC
: VFCOMP_STORE_1_FP
91 GENX(VERTEX_ELEMENT_STATE_pack
)(NULL
, &p
[1 + slot
* 2], &element
);
93 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VF_INSTANCING
),
94 .InstancingEnable
= pipeline
->instancing_enable
[desc
->binding
],
95 .VertexElementIndex
= slot
,
96 /* Vulkan so far doesn't have an instance divisor, so
97 * this is always 1 (ignored if not instancing). */
98 .InstanceDataStepRate
= 1);
101 const uint32_t id_slot
= __builtin_popcount(vb_used
);
102 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VF_SGVS
),
103 .VertexIDEnable
= pipeline
->vs_prog_data
.uses_vertexid
,
104 .VertexIDComponentNumber
= 2,
105 .VertexIDElementOffset
= id_slot
,
106 .InstanceIDEnable
= pipeline
->vs_prog_data
.uses_instanceid
,
107 .InstanceIDComponentNumber
= 3,
108 .InstanceIDElementOffset
= id_slot
);
112 emit_ia_state(struct anv_pipeline
*pipeline
,
113 const VkPipelineInputAssemblyStateCreateInfo
*info
,
114 const struct anv_graphics_pipeline_create_info
*extra
)
116 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VF_TOPOLOGY
),
117 .PrimitiveTopologyType
= pipeline
->topology
);
121 emit_rs_state(struct anv_pipeline
*pipeline
,
122 const VkPipelineRasterizationStateCreateInfo
*info
,
123 const struct anv_graphics_pipeline_create_info
*extra
)
125 static const uint32_t vk_to_gen_cullmode
[] = {
126 [VK_CULL_MODE_NONE
] = CULLMODE_NONE
,
127 [VK_CULL_MODE_FRONT_BIT
] = CULLMODE_FRONT
,
128 [VK_CULL_MODE_BACK_BIT
] = CULLMODE_BACK
,
129 [VK_CULL_MODE_FRONT_AND_BACK
] = CULLMODE_BOTH
132 static const uint32_t vk_to_gen_fillmode
[] = {
133 [VK_POLYGON_MODE_FILL
] = RASTER_SOLID
,
134 [VK_POLYGON_MODE_LINE
] = RASTER_WIREFRAME
,
135 [VK_POLYGON_MODE_POINT
] = RASTER_POINT
,
138 static const uint32_t vk_to_gen_front_face
[] = {
139 [VK_FRONT_FACE_COUNTER_CLOCKWISE
] = 1,
140 [VK_FRONT_FACE_CLOCKWISE
] = 0
143 struct GENX(3DSTATE_SF
) sf
= {
144 GENX(3DSTATE_SF_header
),
145 .ViewportTransformEnable
= !(extra
&& extra
->disable_viewport
),
146 .TriangleStripListProvokingVertexSelect
= 0,
147 .LineStripListProvokingVertexSelect
= 0,
148 .TriangleFanProvokingVertexSelect
= 0,
149 .PointWidthSource
= pipeline
->writes_point_size
? Vertex
: State
,
153 /* FINISHME: VkBool32 rasterizerDiscardEnable; */
155 GENX(3DSTATE_SF_pack
)(NULL
, pipeline
->gen8
.sf
, &sf
);
157 struct GENX(3DSTATE_RASTER
) raster
= {
158 GENX(3DSTATE_RASTER_header
),
159 .FrontWinding
= vk_to_gen_front_face
[info
->frontFace
],
160 .CullMode
= vk_to_gen_cullmode
[info
->cullMode
],
161 .FrontFaceFillMode
= vk_to_gen_fillmode
[info
->polygonMode
],
162 .BackFaceFillMode
= vk_to_gen_fillmode
[info
->polygonMode
],
163 .ScissorRectangleEnable
= !(extra
&& extra
->disable_scissor
),
165 .ViewportZClipTestEnable
= true,
167 /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */
168 .ViewportZFarClipTestEnable
= true,
169 .ViewportZNearClipTestEnable
= true,
173 GENX(3DSTATE_RASTER_pack
)(NULL
, pipeline
->gen8
.raster
, &raster
);
177 emit_cb_state(struct anv_pipeline
*pipeline
,
178 const VkPipelineColorBlendStateCreateInfo
*info
,
179 const VkPipelineMultisampleStateCreateInfo
*ms_info
)
181 struct anv_device
*device
= pipeline
->device
;
183 static const uint32_t vk_to_gen_logic_op
[] = {
184 [VK_LOGIC_OP_COPY
] = LOGICOP_COPY
,
185 [VK_LOGIC_OP_CLEAR
] = LOGICOP_CLEAR
,
186 [VK_LOGIC_OP_AND
] = LOGICOP_AND
,
187 [VK_LOGIC_OP_AND_REVERSE
] = LOGICOP_AND_REVERSE
,
188 [VK_LOGIC_OP_AND_INVERTED
] = LOGICOP_AND_INVERTED
,
189 [VK_LOGIC_OP_NO_OP
] = LOGICOP_NOOP
,
190 [VK_LOGIC_OP_XOR
] = LOGICOP_XOR
,
191 [VK_LOGIC_OP_OR
] = LOGICOP_OR
,
192 [VK_LOGIC_OP_NOR
] = LOGICOP_NOR
,
193 [VK_LOGIC_OP_EQUIVALENT
] = LOGICOP_EQUIV
,
194 [VK_LOGIC_OP_INVERT
] = LOGICOP_INVERT
,
195 [VK_LOGIC_OP_OR_REVERSE
] = LOGICOP_OR_REVERSE
,
196 [VK_LOGIC_OP_COPY_INVERTED
] = LOGICOP_COPY_INVERTED
,
197 [VK_LOGIC_OP_OR_INVERTED
] = LOGICOP_OR_INVERTED
,
198 [VK_LOGIC_OP_NAND
] = LOGICOP_NAND
,
199 [VK_LOGIC_OP_SET
] = LOGICOP_SET
,
202 static const uint32_t vk_to_gen_blend
[] = {
203 [VK_BLEND_FACTOR_ZERO
] = BLENDFACTOR_ZERO
,
204 [VK_BLEND_FACTOR_ONE
] = BLENDFACTOR_ONE
,
205 [VK_BLEND_FACTOR_SRC_COLOR
] = BLENDFACTOR_SRC_COLOR
,
206 [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR
] = BLENDFACTOR_INV_SRC_COLOR
,
207 [VK_BLEND_FACTOR_DST_COLOR
] = BLENDFACTOR_DST_COLOR
,
208 [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR
] = BLENDFACTOR_INV_DST_COLOR
,
209 [VK_BLEND_FACTOR_SRC_ALPHA
] = BLENDFACTOR_SRC_ALPHA
,
210 [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA
] = BLENDFACTOR_INV_SRC_ALPHA
,
211 [VK_BLEND_FACTOR_DST_ALPHA
] = BLENDFACTOR_DST_ALPHA
,
212 [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA
] = BLENDFACTOR_INV_DST_ALPHA
,
213 [VK_BLEND_FACTOR_CONSTANT_COLOR
] = BLENDFACTOR_CONST_COLOR
,
214 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR
]= BLENDFACTOR_INV_CONST_COLOR
,
215 [VK_BLEND_FACTOR_CONSTANT_ALPHA
] = BLENDFACTOR_CONST_ALPHA
,
216 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA
]= BLENDFACTOR_INV_CONST_ALPHA
,
217 [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE
] = BLENDFACTOR_SRC_ALPHA_SATURATE
,
218 [VK_BLEND_FACTOR_SRC1_COLOR
] = BLENDFACTOR_SRC1_COLOR
,
219 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR
] = BLENDFACTOR_INV_SRC1_COLOR
,
220 [VK_BLEND_FACTOR_SRC1_ALPHA
] = BLENDFACTOR_SRC1_ALPHA
,
221 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA
] = BLENDFACTOR_INV_SRC1_ALPHA
,
224 static const uint32_t vk_to_gen_blend_op
[] = {
225 [VK_BLEND_OP_ADD
] = BLENDFUNCTION_ADD
,
226 [VK_BLEND_OP_SUBTRACT
] = BLENDFUNCTION_SUBTRACT
,
227 [VK_BLEND_OP_REVERSE_SUBTRACT
] = BLENDFUNCTION_REVERSE_SUBTRACT
,
228 [VK_BLEND_OP_MIN
] = BLENDFUNCTION_MIN
,
229 [VK_BLEND_OP_MAX
] = BLENDFUNCTION_MAX
,
232 uint32_t num_dwords
= GENX(BLEND_STATE_length
);
233 pipeline
->blend_state
=
234 anv_state_pool_alloc(&device
->dynamic_state_pool
, num_dwords
* 4, 64);
236 struct GENX(BLEND_STATE
) blend_state
= {
237 .AlphaToCoverageEnable
= ms_info
&& ms_info
->alphaToCoverageEnable
,
238 .AlphaToOneEnable
= ms_info
&& ms_info
->alphaToOneEnable
,
241 for (uint32_t i
= 0; i
< info
->attachmentCount
; i
++) {
242 const VkPipelineColorBlendAttachmentState
*a
= &info
->pAttachments
[i
];
244 if (a
->srcColorBlendFactor
!= a
->srcAlphaBlendFactor
||
245 a
->dstColorBlendFactor
!= a
->dstAlphaBlendFactor
||
246 a
->colorBlendOp
!= a
->alphaBlendOp
) {
247 blend_state
.IndependentAlphaBlendEnable
= true;
250 blend_state
.Entry
[i
] = (struct GENX(BLEND_STATE_ENTRY
)) {
251 .LogicOpEnable
= info
->logicOpEnable
,
252 .LogicOpFunction
= vk_to_gen_logic_op
[info
->logicOp
],
253 .ColorBufferBlendEnable
= a
->blendEnable
,
254 .PreBlendSourceOnlyClampEnable
= false,
255 .ColorClampRange
= COLORCLAMP_RTFORMAT
,
256 .PreBlendColorClampEnable
= true,
257 .PostBlendColorClampEnable
= true,
258 .SourceBlendFactor
= vk_to_gen_blend
[a
->srcColorBlendFactor
],
259 .DestinationBlendFactor
= vk_to_gen_blend
[a
->dstColorBlendFactor
],
260 .ColorBlendFunction
= vk_to_gen_blend_op
[a
->colorBlendOp
],
261 .SourceAlphaBlendFactor
= vk_to_gen_blend
[a
->srcAlphaBlendFactor
],
262 .DestinationAlphaBlendFactor
= vk_to_gen_blend
[a
->dstAlphaBlendFactor
],
263 .AlphaBlendFunction
= vk_to_gen_blend_op
[a
->alphaBlendOp
],
264 .WriteDisableAlpha
= !(a
->colorWriteMask
& VK_COLOR_COMPONENT_A_BIT
),
265 .WriteDisableRed
= !(a
->colorWriteMask
& VK_COLOR_COMPONENT_R_BIT
),
266 .WriteDisableGreen
= !(a
->colorWriteMask
& VK_COLOR_COMPONENT_G_BIT
),
267 .WriteDisableBlue
= !(a
->colorWriteMask
& VK_COLOR_COMPONENT_B_BIT
),
270 /* Our hardware applies the blend factor prior to the blend function
271 * regardless of what function is used. Technically, this means the
272 * hardware can do MORE than GL or Vulkan specify. However, it also
273 * means that, for MIN and MAX, we have to stomp the blend factor to
274 * ONE to make it a no-op.
276 if (a
->colorBlendOp
== VK_BLEND_OP_MIN
||
277 a
->colorBlendOp
== VK_BLEND_OP_MAX
) {
278 blend_state
.Entry
[i
].SourceBlendFactor
= BLENDFACTOR_ONE
;
279 blend_state
.Entry
[i
].DestinationBlendFactor
= BLENDFACTOR_ONE
;
281 if (a
->alphaBlendOp
== VK_BLEND_OP_MIN
||
282 a
->alphaBlendOp
== VK_BLEND_OP_MAX
) {
283 blend_state
.Entry
[i
].SourceAlphaBlendFactor
= BLENDFACTOR_ONE
;
284 blend_state
.Entry
[i
].DestinationAlphaBlendFactor
= BLENDFACTOR_ONE
;
288 GENX(BLEND_STATE_pack
)(NULL
, pipeline
->blend_state
.map
, &blend_state
);
289 if (!device
->info
.has_llc
)
290 anv_state_clflush(pipeline
->blend_state
);
292 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_BLEND_STATE_POINTERS
),
293 .BlendStatePointer
= pipeline
->blend_state
.offset
,
294 .BlendStatePointerValid
= true);
297 static const uint32_t vk_to_gen_compare_op
[] = {
298 [VK_COMPARE_OP_NEVER
] = PREFILTEROPNEVER
,
299 [VK_COMPARE_OP_LESS
] = PREFILTEROPLESS
,
300 [VK_COMPARE_OP_EQUAL
] = PREFILTEROPEQUAL
,
301 [VK_COMPARE_OP_LESS_OR_EQUAL
] = PREFILTEROPLEQUAL
,
302 [VK_COMPARE_OP_GREATER
] = PREFILTEROPGREATER
,
303 [VK_COMPARE_OP_NOT_EQUAL
] = PREFILTEROPNOTEQUAL
,
304 [VK_COMPARE_OP_GREATER_OR_EQUAL
] = PREFILTEROPGEQUAL
,
305 [VK_COMPARE_OP_ALWAYS
] = PREFILTEROPALWAYS
,
308 static const uint32_t vk_to_gen_stencil_op
[] = {
309 [VK_STENCIL_OP_KEEP
] = STENCILOP_KEEP
,
310 [VK_STENCIL_OP_ZERO
] = STENCILOP_ZERO
,
311 [VK_STENCIL_OP_REPLACE
] = STENCILOP_REPLACE
,
312 [VK_STENCIL_OP_INCREMENT_AND_CLAMP
] = STENCILOP_INCRSAT
,
313 [VK_STENCIL_OP_DECREMENT_AND_CLAMP
] = STENCILOP_DECRSAT
,
314 [VK_STENCIL_OP_INVERT
] = STENCILOP_INVERT
,
315 [VK_STENCIL_OP_INCREMENT_AND_WRAP
] = STENCILOP_INCR
,
316 [VK_STENCIL_OP_DECREMENT_AND_WRAP
] = STENCILOP_DECR
,
320 emit_ds_state(struct anv_pipeline
*pipeline
,
321 const VkPipelineDepthStencilStateCreateInfo
*info
)
323 uint32_t *dw
= ANV_GEN
== 8 ?
324 pipeline
->gen8
.wm_depth_stencil
: pipeline
->gen9
.wm_depth_stencil
;
327 /* We're going to OR this together with the dynamic state. We need
328 * to make sure it's initialized to something useful.
330 memset(pipeline
->gen8
.wm_depth_stencil
, 0,
331 sizeof(pipeline
->gen8
.wm_depth_stencil
));
332 memset(pipeline
->gen9
.wm_depth_stencil
, 0,
333 sizeof(pipeline
->gen9
.wm_depth_stencil
));
337 /* VkBool32 depthBoundsTestEnable; // optional (depth_bounds_test) */
339 struct GENX(3DSTATE_WM_DEPTH_STENCIL
) wm_depth_stencil
= {
340 .DepthTestEnable
= info
->depthTestEnable
,
341 .DepthBufferWriteEnable
= info
->depthWriteEnable
,
342 .DepthTestFunction
= vk_to_gen_compare_op
[info
->depthCompareOp
],
343 .DoubleSidedStencilEnable
= true,
345 .StencilTestEnable
= info
->stencilTestEnable
,
346 .StencilFailOp
= vk_to_gen_stencil_op
[info
->front
.failOp
],
347 .StencilPassDepthPassOp
= vk_to_gen_stencil_op
[info
->front
.passOp
],
348 .StencilPassDepthFailOp
= vk_to_gen_stencil_op
[info
->front
.depthFailOp
],
349 .StencilTestFunction
= vk_to_gen_compare_op
[info
->front
.compareOp
],
350 .BackfaceStencilFailOp
= vk_to_gen_stencil_op
[info
->back
.failOp
],
351 .BackfaceStencilPassDepthPassOp
= vk_to_gen_stencil_op
[info
->back
.passOp
],
352 .BackfaceStencilPassDepthFailOp
=vk_to_gen_stencil_op
[info
->back
.depthFailOp
],
353 .BackfaceStencilTestFunction
= vk_to_gen_compare_op
[info
->back
.compareOp
],
356 GENX(3DSTATE_WM_DEPTH_STENCIL_pack
)(NULL
, dw
, &wm_depth_stencil
);
360 genX(graphics_pipeline_create
)(
362 const VkGraphicsPipelineCreateInfo
* pCreateInfo
,
363 const struct anv_graphics_pipeline_create_info
*extra
,
364 const VkAllocationCallbacks
* pAllocator
,
365 VkPipeline
* pPipeline
)
367 ANV_FROM_HANDLE(anv_device
, device
, _device
);
368 struct anv_pipeline
*pipeline
;
370 uint32_t offset
, length
;
372 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO
);
374 pipeline
= anv_alloc2(&device
->alloc
, pAllocator
, sizeof(*pipeline
), 8,
375 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
376 if (pipeline
== NULL
)
377 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
379 result
= anv_pipeline_init(pipeline
, device
, pCreateInfo
, extra
, pAllocator
);
380 if (result
!= VK_SUCCESS
)
383 assert(pCreateInfo
->pVertexInputState
);
384 emit_vertex_input(pipeline
, pCreateInfo
->pVertexInputState
, extra
);
385 assert(pCreateInfo
->pInputAssemblyState
);
386 emit_ia_state(pipeline
, pCreateInfo
->pInputAssemblyState
, extra
);
387 assert(pCreateInfo
->pRasterizationState
);
388 emit_rs_state(pipeline
, pCreateInfo
->pRasterizationState
, extra
);
389 emit_ds_state(pipeline
, pCreateInfo
->pDepthStencilState
);
390 emit_cb_state(pipeline
, pCreateInfo
->pColorBlendState
,
391 pCreateInfo
->pMultisampleState
);
393 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VF_STATISTICS
),
394 .StatisticsEnable
= true);
395 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_HS
), .Enable
= false);
396 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_TE
), .TEEnable
= false);
397 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_DS
), .FunctionEnable
= false);
398 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_STREAMOUT
), .SOFunctionEnable
= false);
400 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS
),
401 .ConstantBufferOffset
= 0,
402 .ConstantBufferSize
= 4);
403 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_GS
),
404 .ConstantBufferOffset
= 4,
405 .ConstantBufferSize
= 4);
406 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_PS
),
407 .ConstantBufferOffset
= 8,
408 .ConstantBufferSize
= 4);
410 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_WM_CHROMAKEY
),
411 .ChromaKeyKillEnable
= false);
412 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_AA_LINE_PARAMETERS
));
414 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_CLIP
),
416 .ViewportXYClipTestEnable
= !(extra
&& extra
->disable_viewport
),
417 .MinimumPointWidth
= 0.125,
418 .MaximumPointWidth
= 255.875);
420 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_WM
),
421 .StatisticsEnable
= true,
422 .LineEndCapAntialiasingRegionWidth
= _05pixels
,
423 .LineAntialiasingRegionWidth
= _10pixels
,
424 .EarlyDepthStencilControl
= NORMAL
,
425 .ForceThreadDispatchEnable
= NORMAL
,
426 .PointRasterizationRule
= RASTRULE_UPPER_RIGHT
,
427 .BarycentricInterpolationMode
=
428 pipeline
->wm_prog_data
.barycentric_interp_modes
);
430 uint32_t samples
= 1;
431 uint32_t log2_samples
= __builtin_ffs(samples
) - 1;
432 bool enable_sampling
= samples
> 1 ? true : false;
434 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_MULTISAMPLE
),
435 .PixelPositionOffsetEnable
= enable_sampling
,
436 .PixelLocation
= CENTER
,
437 .NumberofMultisamples
= log2_samples
);
439 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_SAMPLE_MASK
),
440 .SampleMask
= 0xffff);
442 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_URB_VS
),
443 .VSURBStartingAddress
= pipeline
->urb
.vs_start
,
444 .VSURBEntryAllocationSize
= pipeline
->urb
.vs_size
- 1,
445 .VSNumberofURBEntries
= pipeline
->urb
.nr_vs_entries
);
447 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_URB_GS
),
448 .GSURBStartingAddress
= pipeline
->urb
.gs_start
,
449 .GSURBEntryAllocationSize
= pipeline
->urb
.gs_size
- 1,
450 .GSNumberofURBEntries
= pipeline
->urb
.nr_gs_entries
);
452 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_URB_HS
),
453 .HSURBStartingAddress
= pipeline
->urb
.vs_start
,
454 .HSURBEntryAllocationSize
= 0,
455 .HSNumberofURBEntries
= 0);
457 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_URB_DS
),
458 .DSURBStartingAddress
= pipeline
->urb
.vs_start
,
459 .DSURBEntryAllocationSize
= 0,
460 .DSNumberofURBEntries
= 0);
462 const struct brw_gs_prog_data
*gs_prog_data
= &pipeline
->gs_prog_data
;
464 length
= (gs_prog_data
->base
.vue_map
.num_slots
+ 1) / 2 - offset
;
466 if (pipeline
->gs_kernel
== NO_KERNEL
)
467 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_GS
), .Enable
= false);
469 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_GS
),
470 .SingleProgramFlow
= false,
471 .KernelStartPointer
= pipeline
->gs_kernel
,
472 .VectorMaskEnable
= Dmask
,
474 .BindingTableEntryCount
= 0,
475 .ExpectedVertexCount
= pipeline
->gs_vertex_count
,
477 .ScratchSpaceBasePointer
= pipeline
->scratch_start
[MESA_SHADER_GEOMETRY
],
478 .PerThreadScratchSpace
= ffs(gs_prog_data
->base
.base
.total_scratch
/ 2048),
480 .OutputVertexSize
= gs_prog_data
->output_vertex_size_hwords
* 2 - 1,
481 .OutputTopology
= gs_prog_data
->output_topology
,
482 .VertexURBEntryReadLength
= gs_prog_data
->base
.urb_read_length
,
483 .IncludeVertexHandles
= gs_prog_data
->base
.include_vue_handles
,
484 .DispatchGRFStartRegisterForURBData
=
485 gs_prog_data
->base
.base
.dispatch_grf_start_reg
,
487 .MaximumNumberofThreads
= device
->info
.max_gs_threads
/ 2 - 1,
488 .ControlDataHeaderSize
= gs_prog_data
->control_data_header_size_hwords
,
489 .DispatchMode
= gs_prog_data
->base
.dispatch_mode
,
490 .StatisticsEnable
= true,
491 .IncludePrimitiveID
= gs_prog_data
->include_primitive_id
,
492 .ReorderMode
= TRAILING
,
495 .ControlDataFormat
= gs_prog_data
->control_data_format
,
497 .StaticOutput
= gs_prog_data
->static_vertex_count
>= 0,
498 .StaticOutputVertexCount
=
499 gs_prog_data
->static_vertex_count
>= 0 ?
500 gs_prog_data
->static_vertex_count
: 0,
502 /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled:
503 * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v)
504 * UserClipDistanceCullTestEnableBitmask(v)
507 .VertexURBEntryOutputReadOffset
= offset
,
508 .VertexURBEntryOutputLength
= length
);
510 const struct brw_vue_prog_data
*vue_prog_data
= &pipeline
->vs_prog_data
.base
;
511 /* Skip the VUE header and position slots */
513 length
= (vue_prog_data
->vue_map
.num_slots
+ 1) / 2 - offset
;
515 uint32_t vs_start
= pipeline
->vs_simd8
!= NO_KERNEL
? pipeline
->vs_simd8
:
518 if (vs_start
== NO_KERNEL
|| (extra
&& extra
->disable_vs
))
519 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VS
),
520 .FunctionEnable
= false,
521 /* Even if VS is disabled, SBE still gets the amount of
522 * vertex data to read from this field. */
523 .VertexURBEntryOutputReadOffset
= offset
,
524 .VertexURBEntryOutputLength
= length
);
526 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VS
),
527 .KernelStartPointer
= vs_start
,
528 .SingleVertexDispatch
= Multiple
,
529 .VectorMaskEnable
= Dmask
,
531 .BindingTableEntryCount
=
532 vue_prog_data
->base
.binding_table
.size_bytes
/ 4,
533 .ThreadDispatchPriority
= Normal
,
534 .FloatingPointMode
= IEEE754
,
535 .IllegalOpcodeExceptionEnable
= false,
536 .AccessesUAV
= false,
537 .SoftwareExceptionEnable
= false,
539 .ScratchSpaceBasePointer
= pipeline
->scratch_start
[MESA_SHADER_VERTEX
],
540 .PerThreadScratchSpace
= ffs(vue_prog_data
->base
.total_scratch
/ 2048),
542 .DispatchGRFStartRegisterForURBData
=
543 vue_prog_data
->base
.dispatch_grf_start_reg
,
544 .VertexURBEntryReadLength
= vue_prog_data
->urb_read_length
,
545 .VertexURBEntryReadOffset
= 0,
547 .MaximumNumberofThreads
= device
->info
.max_vs_threads
- 1,
548 .StatisticsEnable
= false,
549 .SIMD8DispatchEnable
= pipeline
->vs_simd8
!= NO_KERNEL
,
550 .VertexCacheDisable
= false,
551 .FunctionEnable
= true,
553 .VertexURBEntryOutputReadOffset
= offset
,
554 .VertexURBEntryOutputLength
= length
,
555 .UserClipDistanceClipTestEnableBitmask
= 0,
556 .UserClipDistanceCullTestEnableBitmask
= 0);
558 const struct brw_wm_prog_data
*wm_prog_data
= &pipeline
->wm_prog_data
;
560 /* TODO: We should clean this up. Among other things, this is mostly
561 * shared with other gens.
563 const struct brw_vue_map
*fs_input_map
;
564 if (pipeline
->gs_kernel
== NO_KERNEL
)
565 fs_input_map
= &vue_prog_data
->vue_map
;
567 fs_input_map
= &gs_prog_data
->base
.vue_map
;
569 struct GENX(3DSTATE_SBE_SWIZ
) swiz
= {
570 GENX(3DSTATE_SBE_SWIZ_header
),
573 int max_source_attr
= 0;
574 for (int attr
= 0; attr
< VARYING_SLOT_MAX
; attr
++) {
575 int input_index
= wm_prog_data
->urb_setup
[attr
];
580 /* We have to subtract two slots to accout for the URB entry output
581 * read offset in the VS and GS stages.
583 int source_attr
= fs_input_map
->varying_to_slot
[attr
] - 2;
584 max_source_attr
= MAX2(max_source_attr
, source_attr
);
586 if (input_index
>= 16)
589 swiz
.Attribute
[input_index
].SourceAttribute
= source_attr
;
592 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_SBE
),
593 .AttributeSwizzleEnable
= true,
594 .ForceVertexURBEntryReadLength
= false,
595 .ForceVertexURBEntryReadOffset
= false,
596 .VertexURBEntryReadLength
= DIV_ROUND_UP(max_source_attr
+ 1, 2),
597 .PointSpriteTextureCoordinateOrigin
= UPPERLEFT
,
598 .NumberofSFOutputAttributes
=
599 wm_prog_data
->num_varying_inputs
,
602 .Attribute0ActiveComponentFormat
= ACF_XYZW
,
603 .Attribute1ActiveComponentFormat
= ACF_XYZW
,
604 .Attribute2ActiveComponentFormat
= ACF_XYZW
,
605 .Attribute3ActiveComponentFormat
= ACF_XYZW
,
606 .Attribute4ActiveComponentFormat
= ACF_XYZW
,
607 .Attribute5ActiveComponentFormat
= ACF_XYZW
,
608 .Attribute6ActiveComponentFormat
= ACF_XYZW
,
609 .Attribute7ActiveComponentFormat
= ACF_XYZW
,
610 .Attribute8ActiveComponentFormat
= ACF_XYZW
,
611 .Attribute9ActiveComponentFormat
= ACF_XYZW
,
612 .Attribute10ActiveComponentFormat
= ACF_XYZW
,
613 .Attribute11ActiveComponentFormat
= ACF_XYZW
,
614 .Attribute12ActiveComponentFormat
= ACF_XYZW
,
615 .Attribute13ActiveComponentFormat
= ACF_XYZW
,
616 .Attribute14ActiveComponentFormat
= ACF_XYZW
,
617 .Attribute15ActiveComponentFormat
= ACF_XYZW
,
618 /* wow, much field, very attribute */
619 .Attribute16ActiveComponentFormat
= ACF_XYZW
,
620 .Attribute17ActiveComponentFormat
= ACF_XYZW
,
621 .Attribute18ActiveComponentFormat
= ACF_XYZW
,
622 .Attribute19ActiveComponentFormat
= ACF_XYZW
,
623 .Attribute20ActiveComponentFormat
= ACF_XYZW
,
624 .Attribute21ActiveComponentFormat
= ACF_XYZW
,
625 .Attribute22ActiveComponentFormat
= ACF_XYZW
,
626 .Attribute23ActiveComponentFormat
= ACF_XYZW
,
627 .Attribute24ActiveComponentFormat
= ACF_XYZW
,
628 .Attribute25ActiveComponentFormat
= ACF_XYZW
,
629 .Attribute26ActiveComponentFormat
= ACF_XYZW
,
630 .Attribute27ActiveComponentFormat
= ACF_XYZW
,
631 .Attribute28ActiveComponentFormat
= ACF_XYZW
,
632 .Attribute29ActiveComponentFormat
= ACF_XYZW
,
633 .Attribute28ActiveComponentFormat
= ACF_XYZW
,
634 .Attribute29ActiveComponentFormat
= ACF_XYZW
,
635 .Attribute30ActiveComponentFormat
= ACF_XYZW
,
639 uint32_t *dw
= anv_batch_emit_dwords(&pipeline
->batch
,
640 GENX(3DSTATE_SBE_SWIZ_length
));
641 GENX(3DSTATE_SBE_SWIZ_pack
)(&pipeline
->batch
, dw
, &swiz
);
643 const int num_thread_bias
= ANV_GEN
== 8 ? 2 : 1;
644 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PS
),
645 .KernelStartPointer0
= pipeline
->ps_ksp0
,
647 .SingleProgramFlow
= false,
648 .VectorMaskEnable
= true,
651 .ScratchSpaceBasePointer
= pipeline
->scratch_start
[MESA_SHADER_FRAGMENT
],
652 .PerThreadScratchSpace
= ffs(wm_prog_data
->base
.total_scratch
/ 2048),
654 .MaximumNumberofThreadsPerPSD
= 64 - num_thread_bias
,
655 .PositionXYOffsetSelect
= wm_prog_data
->uses_pos_offset
?
656 POSOFFSET_SAMPLE
: POSOFFSET_NONE
,
657 .PushConstantEnable
= wm_prog_data
->base
.nr_params
> 0,
658 ._8PixelDispatchEnable
= pipeline
->ps_simd8
!= NO_KERNEL
,
659 ._16PixelDispatchEnable
= pipeline
->ps_simd16
!= NO_KERNEL
,
660 ._32PixelDispatchEnable
= false,
662 .DispatchGRFStartRegisterForConstantSetupData0
= pipeline
->ps_grf_start0
,
663 .DispatchGRFStartRegisterForConstantSetupData1
= 0,
664 .DispatchGRFStartRegisterForConstantSetupData2
= pipeline
->ps_grf_start2
,
666 .KernelStartPointer1
= 0,
667 .KernelStartPointer2
= pipeline
->ps_ksp2
);
669 bool per_sample_ps
= false;
670 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PS_EXTRA
),
671 .PixelShaderValid
= true,
672 .PixelShaderKillsPixel
= wm_prog_data
->uses_kill
,
673 .PixelShaderComputedDepthMode
= wm_prog_data
->computed_depth_mode
,
674 .AttributeEnable
= wm_prog_data
->num_varying_inputs
> 0,
675 .oMaskPresenttoRenderTarget
= wm_prog_data
->uses_omask
,
676 .PixelShaderIsPerSample
= per_sample_ps
,
678 .PixelShaderPullsBary
= wm_prog_data
->pulls_bary
,
679 .InputCoverageMaskState
= ICMS_NONE
683 *pPipeline
= anv_pipeline_to_handle(pipeline
);
688 VkResult
genX(compute_pipeline_create
)(
690 const VkComputePipelineCreateInfo
* pCreateInfo
,
691 const VkAllocationCallbacks
* pAllocator
,
692 VkPipeline
* pPipeline
)
694 ANV_FROM_HANDLE(anv_device
, device
, _device
);
695 struct anv_pipeline
*pipeline
;
698 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO
);
700 pipeline
= anv_alloc2(&device
->alloc
, pAllocator
, sizeof(*pipeline
), 8,
701 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
702 if (pipeline
== NULL
)
703 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
705 pipeline
->device
= device
;
706 pipeline
->layout
= anv_pipeline_layout_from_handle(pCreateInfo
->layout
);
708 pipeline
->blend_state
.map
= NULL
;
710 result
= anv_reloc_list_init(&pipeline
->batch_relocs
,
711 pAllocator
? pAllocator
: &device
->alloc
);
712 if (result
!= VK_SUCCESS
) {
713 anv_free2(&device
->alloc
, pAllocator
, pipeline
);
716 pipeline
->batch
.next
= pipeline
->batch
.start
= pipeline
->batch_data
;
717 pipeline
->batch
.end
= pipeline
->batch
.start
+ sizeof(pipeline
->batch_data
);
718 pipeline
->batch
.relocs
= &pipeline
->batch_relocs
;
720 anv_state_stream_init(&pipeline
->program_stream
,
721 &device
->instruction_block_pool
);
723 /* When we free the pipeline, we detect stages based on the NULL status
724 * of various prog_data pointers. Make them NULL by default.
726 memset(pipeline
->prog_data
, 0, sizeof(pipeline
->prog_data
));
727 memset(pipeline
->scratch_start
, 0, sizeof(pipeline
->scratch_start
));
729 pipeline
->vs_simd8
= NO_KERNEL
;
730 pipeline
->vs_vec4
= NO_KERNEL
;
731 pipeline
->gs_kernel
= NO_KERNEL
;
733 pipeline
->active_stages
= 0;
734 pipeline
->total_scratch
= 0;
736 assert(pCreateInfo
->stage
.stage
== VK_SHADER_STAGE_COMPUTE_BIT
);
737 ANV_FROM_HANDLE(anv_shader_module
, module
, pCreateInfo
->stage
.module
);
738 anv_pipeline_compile_cs(pipeline
, pCreateInfo
, module
,
739 pCreateInfo
->stage
.pName
);
741 pipeline
->use_repclear
= false;
743 const struct brw_cs_prog_data
*cs_prog_data
= &pipeline
->cs_prog_data
;
745 anv_batch_emit(&pipeline
->batch
, GENX(MEDIA_VFE_STATE
),
746 .ScratchSpaceBasePointer
= pipeline
->scratch_start
[MESA_SHADER_COMPUTE
],
747 .PerThreadScratchSpace
= ffs(cs_prog_data
->base
.total_scratch
/ 2048),
748 .ScratchSpaceBasePointerHigh
= 0,
751 .MaximumNumberofThreads
= device
->info
.max_cs_threads
- 1,
752 .NumberofURBEntries
= 2,
753 .ResetGatewayTimer
= true,
755 .BypassGatewayControl
= true,
757 .URBEntryAllocationSize
= 2,
758 .CURBEAllocationSize
= 0);
760 struct brw_cs_prog_data
*prog_data
= &pipeline
->cs_prog_data
;
761 uint32_t group_size
= prog_data
->local_size
[0] *
762 prog_data
->local_size
[1] * prog_data
->local_size
[2];
763 pipeline
->cs_thread_width_max
= DIV_ROUND_UP(group_size
, prog_data
->simd_size
);
764 uint32_t remainder
= group_size
& (prog_data
->simd_size
- 1);
767 pipeline
->cs_right_mask
= ~0u >> (32 - remainder
);
769 pipeline
->cs_right_mask
= ~0u >> (32 - prog_data
->simd_size
);
772 *pPipeline
= anv_pipeline_to_handle(pipeline
);