2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #include "anv_private.h"
32 #include "genxml/gen_macros.h"
33 #include "genxml/genX_pack.h"
35 #include "genX_pipeline_util.h"
38 emit_ia_state(struct anv_pipeline
*pipeline
,
39 const VkPipelineInputAssemblyStateCreateInfo
*info
,
40 const struct anv_graphics_pipeline_create_info
*extra
)
42 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VF_TOPOLOGY
), vft
) {
43 vft
.PrimitiveTopologyType
= pipeline
->topology
;
48 emit_rs_state(struct anv_pipeline
*pipeline
,
49 const VkPipelineRasterizationStateCreateInfo
*info
,
50 const VkPipelineMultisampleStateCreateInfo
*ms_info
,
51 const struct anv_graphics_pipeline_create_info
*extra
)
56 samples
= ms_info
->rasterizationSamples
;
58 struct GENX(3DSTATE_SF
) sf
= {
59 GENX(3DSTATE_SF_header
),
60 .ViewportTransformEnable
= !(extra
&& extra
->use_rectlist
),
61 .TriangleStripListProvokingVertexSelect
= 0,
62 .LineStripListProvokingVertexSelect
= 0,
63 .TriangleFanProvokingVertexSelect
= 1,
64 .PointWidthSource
= Vertex
,
68 /* FINISHME: VkBool32 rasterizerDiscardEnable; */
70 GENX(3DSTATE_SF_pack
)(NULL
, pipeline
->gen8
.sf
, &sf
);
72 struct GENX(3DSTATE_RASTER
) raster
= {
73 GENX(3DSTATE_RASTER_header
),
75 /* For details on 3DSTATE_RASTER multisample state, see the BSpec table
76 * "Multisample Modes State".
78 .DXMultisampleRasterizationEnable
= samples
> 1,
79 .ForcedSampleCount
= FSC_NUMRASTSAMPLES_0
,
80 .ForceMultisampling
= false,
82 .FrontWinding
= vk_to_gen_front_face
[info
->frontFace
],
83 .CullMode
= vk_to_gen_cullmode
[info
->cullMode
],
84 .FrontFaceFillMode
= vk_to_gen_fillmode
[info
->polygonMode
],
85 .BackFaceFillMode
= vk_to_gen_fillmode
[info
->polygonMode
],
86 .ScissorRectangleEnable
= !(extra
&& extra
->use_rectlist
),
88 .ViewportZClipTestEnable
= !pipeline
->depth_clamp_enable
,
90 /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */
91 .ViewportZFarClipTestEnable
= !pipeline
->depth_clamp_enable
,
92 .ViewportZNearClipTestEnable
= !pipeline
->depth_clamp_enable
,
94 .GlobalDepthOffsetEnableSolid
= info
->depthBiasEnable
,
95 .GlobalDepthOffsetEnableWireframe
= info
->depthBiasEnable
,
96 .GlobalDepthOffsetEnablePoint
= info
->depthBiasEnable
,
99 GENX(3DSTATE_RASTER_pack
)(NULL
, pipeline
->gen8
.raster
, &raster
);
103 emit_ms_state(struct anv_pipeline
*pipeline
,
104 const VkPipelineMultisampleStateCreateInfo
*info
)
106 uint32_t samples
= 1;
107 uint32_t log2_samples
= 0;
109 /* From the Vulkan 1.0 spec:
110 * If pSampleMask is NULL, it is treated as if the mask has all bits
111 * enabled, i.e. no coverage is removed from fragments.
113 * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits.
115 uint32_t sample_mask
= 0xffff;
118 samples
= info
->rasterizationSamples
;
119 log2_samples
= __builtin_ffs(samples
) - 1;
122 if (info
&& info
->pSampleMask
)
123 sample_mask
&= info
->pSampleMask
[0];
125 if (info
&& info
->sampleShadingEnable
)
126 anv_finishme("VkPipelineMultisampleStateCreateInfo::sampleShadingEnable");
128 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_MULTISAMPLE
), ms
) {
129 /* The PRM says that this bit is valid only for DX9:
131 * SW can choose to set this bit only for DX9 API. DX10/OGL API's
132 * should not have any effect by setting or not setting this bit.
134 ms
.PixelPositionOffsetEnable
= false;
136 ms
.PixelLocation
= CENTER
;
137 ms
.NumberofMultisamples
= log2_samples
;
140 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_SAMPLE_MASK
), sm
) {
141 sm
.SampleMask
= sample_mask
;
146 genX(graphics_pipeline_create
)(
148 struct anv_pipeline_cache
* cache
,
149 const VkGraphicsPipelineCreateInfo
* pCreateInfo
,
150 const struct anv_graphics_pipeline_create_info
*extra
,
151 const VkAllocationCallbacks
* pAllocator
,
152 VkPipeline
* pPipeline
)
154 ANV_FROM_HANDLE(anv_device
, device
, _device
);
155 ANV_FROM_HANDLE(anv_render_pass
, pass
, pCreateInfo
->renderPass
);
156 struct anv_subpass
*subpass
= &pass
->subpasses
[pCreateInfo
->subpass
];
157 struct anv_pipeline
*pipeline
;
159 uint32_t offset
, length
;
161 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO
);
163 pipeline
= anv_alloc2(&device
->alloc
, pAllocator
, sizeof(*pipeline
), 8,
164 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
165 if (pipeline
== NULL
)
166 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
168 result
= anv_pipeline_init(pipeline
, device
, cache
,
169 pCreateInfo
, extra
, pAllocator
);
170 if (result
!= VK_SUCCESS
) {
171 anv_free2(&device
->alloc
, pAllocator
, pipeline
);
175 assert(pCreateInfo
->pVertexInputState
);
176 emit_vertex_input(pipeline
, pCreateInfo
->pVertexInputState
, extra
);
177 assert(pCreateInfo
->pInputAssemblyState
);
178 emit_ia_state(pipeline
, pCreateInfo
->pInputAssemblyState
, extra
);
179 assert(pCreateInfo
->pRasterizationState
);
180 emit_rs_state(pipeline
, pCreateInfo
->pRasterizationState
,
181 pCreateInfo
->pMultisampleState
, extra
);
182 emit_ms_state(pipeline
, pCreateInfo
->pMultisampleState
);
183 emit_ds_state(pipeline
, pCreateInfo
->pDepthStencilState
, pass
, subpass
);
184 emit_cb_state(pipeline
, pCreateInfo
->pColorBlendState
,
185 pCreateInfo
->pMultisampleState
);
187 emit_urb_setup(pipeline
);
189 const struct brw_wm_prog_data
*wm_prog_data
= get_wm_prog_data(pipeline
);
190 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_CLIP
), clip
) {
191 clip
.ClipEnable
= !(extra
&& extra
->use_rectlist
);
192 clip
.EarlyCullEnable
= true;
193 clip
.APIMode
= 1; /* D3D */
194 clip
.ViewportXYClipTestEnable
= true;
197 pCreateInfo
->pRasterizationState
->rasterizerDiscardEnable
?
198 CLIPMODE_REJECT_ALL
: CLIPMODE_NORMAL
;
200 clip
.NonPerspectiveBarycentricEnable
= wm_prog_data
?
201 (wm_prog_data
->barycentric_interp_modes
& 0x38) != 0 : 0;
203 clip
.TriangleStripListProvokingVertexSelect
= 0;
204 clip
.LineStripListProvokingVertexSelect
= 0;
205 clip
.TriangleFanProvokingVertexSelect
= 1;
207 clip
.MinimumPointWidth
= 0.125;
208 clip
.MaximumPointWidth
= 255.875;
209 clip
.MaximumVPIndex
= pCreateInfo
->pViewportState
->viewportCount
- 1;
212 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_WM
), wm
) {
213 wm
.StatisticsEnable
= true;
214 wm
.LineEndCapAntialiasingRegionWidth
= _05pixels
;
215 wm
.LineAntialiasingRegionWidth
= _10pixels
;
216 wm
.ForceThreadDispatchEnable
= NORMAL
;
217 wm
.PointRasterizationRule
= RASTRULE_UPPER_RIGHT
;
219 if (wm_prog_data
&& wm_prog_data
->early_fragment_tests
) {
220 wm
.EarlyDepthStencilControl
= PREPS
;
221 } else if (wm_prog_data
&& wm_prog_data
->has_side_effects
) {
222 wm
.EarlyDepthStencilControl
= PSEXEC
;
224 wm
.EarlyDepthStencilControl
= NORMAL
;
227 wm
.BarycentricInterpolationMode
= pipeline
->ps_ksp0
== NO_KERNEL
?
228 0 : wm_prog_data
->barycentric_interp_modes
;
231 if (pipeline
->gs_kernel
== NO_KERNEL
) {
232 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_GS
), gs
);
234 const struct brw_gs_prog_data
*gs_prog_data
= get_gs_prog_data(pipeline
);
236 length
= (gs_prog_data
->base
.vue_map
.num_slots
+ 1) / 2 - offset
;
238 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_GS
), gs
) {
239 gs
.SingleProgramFlow
= false;
240 gs
.KernelStartPointer
= pipeline
->gs_kernel
;
241 gs
.VectorMaskEnable
= false;
243 gs
.BindingTableEntryCount
= 0;
244 gs
.ExpectedVertexCount
= gs_prog_data
->vertices_in
;
246 gs
.ScratchSpaceBasePointer
= (struct anv_address
) {
247 .bo
= anv_scratch_pool_alloc(device
, &device
->scratch_pool
,
248 MESA_SHADER_GEOMETRY
,
249 gs_prog_data
->base
.base
.total_scratch
),
252 gs
.PerThreadScratchSpace
= scratch_space(&gs_prog_data
->base
.base
);
253 gs
.OutputVertexSize
= gs_prog_data
->output_vertex_size_hwords
* 2 - 1;
254 gs
.OutputTopology
= gs_prog_data
->output_topology
;
255 gs
.VertexURBEntryReadLength
= gs_prog_data
->base
.urb_read_length
;
256 gs
.IncludeVertexHandles
= gs_prog_data
->base
.include_vue_handles
;
258 gs
.DispatchGRFStartRegisterForURBData
=
259 gs_prog_data
->base
.base
.dispatch_grf_start_reg
;
261 gs
.MaximumNumberofThreads
= device
->info
.max_gs_threads
/ 2 - 1;
262 gs
.ControlDataHeaderSize
= gs_prog_data
->control_data_header_size_hwords
;
263 gs
.DispatchMode
= gs_prog_data
->base
.dispatch_mode
;
264 gs
.StatisticsEnable
= true;
265 gs
.IncludePrimitiveID
= gs_prog_data
->include_primitive_id
;
266 gs
.ReorderMode
= TRAILING
;
269 gs
.ControlDataFormat
= gs_prog_data
->control_data_format
;
271 gs
.StaticOutput
= gs_prog_data
->static_vertex_count
>= 0;
272 gs
.StaticOutputVertexCount
=
273 gs_prog_data
->static_vertex_count
>= 0 ?
274 gs_prog_data
->static_vertex_count
: 0;
276 /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled:
277 * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v)
278 * UserClipDistanceCullTestEnableBitmask(v)
281 gs
.VertexURBEntryOutputReadOffset
= offset
;
282 gs
.VertexURBEntryOutputLength
= length
;
286 const struct brw_vs_prog_data
*vs_prog_data
= get_vs_prog_data(pipeline
);
287 /* Skip the VUE header and position slots */
289 length
= (vs_prog_data
->base
.vue_map
.num_slots
+ 1) / 2 - offset
;
291 uint32_t vs_start
= pipeline
->vs_simd8
!= NO_KERNEL
? pipeline
->vs_simd8
:
294 if (vs_start
== NO_KERNEL
|| (extra
&& extra
->disable_vs
)) {
295 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VS
), vs
) {
296 vs
.FunctionEnable
= false;
297 /* Even if VS is disabled, SBE still gets the amount of
298 * vertex data to read from this field. */
299 vs
.VertexURBEntryOutputReadOffset
= offset
;
300 vs
.VertexURBEntryOutputLength
= length
;
303 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VS
), vs
) {
304 vs
.KernelStartPointer
= vs_start
;
305 vs
.SingleVertexDispatch
= false;
306 vs
.VectorMaskEnable
= false;
309 vs
.BindingTableEntryCount
=
310 vs_prog_data
->base
.base
.binding_table
.size_bytes
/ 4,
312 vs
.ThreadDispatchPriority
= false;
313 vs
.FloatingPointMode
= IEEE754
;
314 vs
.IllegalOpcodeExceptionEnable
= false;
315 vs
.AccessesUAV
= false;
316 vs
.SoftwareExceptionEnable
= false;
318 vs
.ScratchSpaceBasePointer
= (struct anv_address
) {
319 .bo
= anv_scratch_pool_alloc(device
, &device
->scratch_pool
,
321 vs_prog_data
->base
.base
.total_scratch
),
324 vs
.PerThreadScratchSpace
= scratch_space(&vs_prog_data
->base
.base
);
326 vs
.DispatchGRFStartRegisterForURBData
=
327 vs_prog_data
->base
.base
.dispatch_grf_start_reg
;
329 vs
.VertexURBEntryReadLength
= vs_prog_data
->base
.urb_read_length
;
330 vs
.VertexURBEntryReadOffset
= 0;
332 vs
.MaximumNumberofThreads
= device
->info
.max_vs_threads
- 1;
333 vs
.StatisticsEnable
= false;
334 vs
.SIMD8DispatchEnable
= pipeline
->vs_simd8
!= NO_KERNEL
;
335 vs
.VertexCacheDisable
= false;
336 vs
.FunctionEnable
= true;
338 vs
.VertexURBEntryOutputReadOffset
= offset
;
339 vs
.VertexURBEntryOutputLength
= length
;
342 vs
.UserClipDistanceClipTestEnableBitmask
= 0;
343 vs
.UserClipDistanceCullTestEnableBitmask
= 0;
347 const int num_thread_bias
= GEN_GEN
== 8 ? 2 : 1;
348 if (pipeline
->ps_ksp0
== NO_KERNEL
) {
349 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PS
), ps
);
350 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PS_EXTRA
), extra
) {
351 extra
.PixelShaderValid
= false;
354 emit_3dstate_sbe(pipeline
);
356 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PS
), ps
) {
357 ps
.KernelStartPointer0
= pipeline
->ps_ksp0
;
358 ps
.KernelStartPointer1
= 0;
359 ps
.KernelStartPointer2
= pipeline
->ps_ksp0
+ wm_prog_data
->prog_offset_2
;
360 ps
._8PixelDispatchEnable
= wm_prog_data
->dispatch_8
;
361 ps
._16PixelDispatchEnable
= wm_prog_data
->dispatch_16
;
362 ps
._32PixelDispatchEnable
= false;
363 ps
.SingleProgramFlow
= false;
364 ps
.VectorMaskEnable
= true;
366 ps
.PushConstantEnable
= wm_prog_data
->base
.nr_params
> 0;
367 ps
.PositionXYOffsetSelect
= wm_prog_data
->uses_pos_offset
?
368 POSOFFSET_SAMPLE
: POSOFFSET_NONE
;
370 ps
.MaximumNumberofThreadsPerPSD
= 64 - num_thread_bias
;
372 ps
.ScratchSpaceBasePointer
= (struct anv_address
) {
373 .bo
= anv_scratch_pool_alloc(device
, &device
->scratch_pool
,
374 MESA_SHADER_FRAGMENT
,
375 wm_prog_data
->base
.total_scratch
),
378 ps
.PerThreadScratchSpace
= scratch_space(&wm_prog_data
->base
);
380 ps
.DispatchGRFStartRegisterForConstantSetupData0
=
381 wm_prog_data
->base
.dispatch_grf_start_reg
;
382 ps
.DispatchGRFStartRegisterForConstantSetupData1
= 0;
383 ps
.DispatchGRFStartRegisterForConstantSetupData2
=
384 wm_prog_data
->dispatch_grf_start_reg_2
;
387 bool per_sample_ps
= pCreateInfo
->pMultisampleState
&&
388 pCreateInfo
->pMultisampleState
->sampleShadingEnable
;
390 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PS_EXTRA
), ps
) {
391 ps
.PixelShaderValid
= true;
392 ps
.PixelShaderKillsPixel
= wm_prog_data
->uses_kill
;
393 ps
.PixelShaderComputedDepthMode
= wm_prog_data
->computed_depth_mode
;
394 ps
.AttributeEnable
= wm_prog_data
->num_varying_inputs
> 0;
395 ps
.oMaskPresenttoRenderTarget
= wm_prog_data
->uses_omask
;
396 ps
.PixelShaderIsPerSample
= per_sample_ps
;
397 ps
.PixelShaderUsesSourceDepth
= wm_prog_data
->uses_src_depth
;
398 ps
.PixelShaderUsesSourceW
= wm_prog_data
->uses_src_w
;
400 ps
.PixelShaderPullsBary
= wm_prog_data
->pulls_bary
;
401 ps
.InputCoverageMaskState
= wm_prog_data
->uses_sample_mask
?
402 ICMS_INNER_CONSERVATIVE
: ICMS_NONE
;
404 ps
.PixelShaderUsesInputCoverageMask
= wm_prog_data
->uses_sample_mask
;
409 *pPipeline
= anv_pipeline_to_handle(pipeline
);