2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #include "anv_private.h"
32 #include "genxml/gen_macros.h"
33 #include "genxml/genX_pack.h"
35 #include "genX_pipeline_util.h"
38 emit_ia_state(struct anv_pipeline
*pipeline
,
39 const VkPipelineInputAssemblyStateCreateInfo
*info
)
41 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VF_TOPOLOGY
), vft
) {
42 vft
.PrimitiveTopologyType
= pipeline
->topology
;
47 genX(graphics_pipeline_create
)(
49 struct anv_pipeline_cache
* cache
,
50 const VkGraphicsPipelineCreateInfo
* pCreateInfo
,
51 const VkAllocationCallbacks
* pAllocator
,
52 VkPipeline
* pPipeline
)
54 ANV_FROM_HANDLE(anv_device
, device
, _device
);
55 ANV_FROM_HANDLE(anv_render_pass
, pass
, pCreateInfo
->renderPass
);
56 const struct anv_physical_device
*physical_device
=
57 &device
->instance
->physicalDevice
;
58 const struct gen_device_info
*devinfo
= &physical_device
->info
;
59 struct anv_subpass
*subpass
= &pass
->subpasses
[pCreateInfo
->subpass
];
60 struct anv_pipeline
*pipeline
;
62 uint32_t offset
, length
;
64 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO
);
66 pipeline
= vk_alloc2(&device
->alloc
, pAllocator
, sizeof(*pipeline
), 8,
67 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
69 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
71 result
= anv_pipeline_init(pipeline
, device
, cache
,
72 pCreateInfo
, pAllocator
);
73 if (result
!= VK_SUCCESS
) {
74 vk_free2(&device
->alloc
, pAllocator
, pipeline
);
78 assert(pCreateInfo
->pVertexInputState
);
79 emit_vertex_input(pipeline
, pCreateInfo
->pVertexInputState
);
80 assert(pCreateInfo
->pInputAssemblyState
);
81 emit_ia_state(pipeline
, pCreateInfo
->pInputAssemblyState
);
82 assert(pCreateInfo
->pRasterizationState
);
83 emit_rs_state(pipeline
, pCreateInfo
->pRasterizationState
,
84 pCreateInfo
->pMultisampleState
, pass
, subpass
);
85 emit_ms_state(pipeline
, pCreateInfo
->pMultisampleState
);
86 emit_ds_state(pipeline
, pCreateInfo
->pDepthStencilState
, pass
, subpass
);
87 emit_cb_state(pipeline
, pCreateInfo
->pColorBlendState
,
88 pCreateInfo
->pMultisampleState
);
90 emit_urb_setup(pipeline
);
92 emit_3dstate_clip(pipeline
, pCreateInfo
->pViewportState
,
93 pCreateInfo
->pRasterizationState
);
94 emit_3dstate_streamout(pipeline
, pCreateInfo
->pRasterizationState
);
96 const struct brw_wm_prog_data
*wm_prog_data
= get_wm_prog_data(pipeline
);
97 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_WM
), wm
) {
98 wm
.StatisticsEnable
= true;
99 wm
.LineEndCapAntialiasingRegionWidth
= _05pixels
;
100 wm
.LineAntialiasingRegionWidth
= _10pixels
;
101 wm
.ForceThreadDispatchEnable
= NORMAL
;
102 wm
.PointRasterizationRule
= RASTRULE_UPPER_RIGHT
;
104 if (wm_prog_data
&& wm_prog_data
->early_fragment_tests
) {
105 wm
.EarlyDepthStencilControl
= PREPS
;
106 } else if (wm_prog_data
&& wm_prog_data
->has_side_effects
) {
107 wm
.EarlyDepthStencilControl
= PSEXEC
;
109 wm
.EarlyDepthStencilControl
= NORMAL
;
112 wm
.BarycentricInterpolationMode
= pipeline
->ps_ksp0
== NO_KERNEL
?
113 0 : wm_prog_data
->barycentric_interp_modes
;
116 if (pipeline
->gs_kernel
== NO_KERNEL
) {
117 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_GS
), gs
);
119 const struct brw_gs_prog_data
*gs_prog_data
= get_gs_prog_data(pipeline
);
121 length
= (gs_prog_data
->base
.vue_map
.num_slots
+ 1) / 2 - offset
;
123 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_GS
), gs
) {
124 gs
.SingleProgramFlow
= false;
125 gs
.KernelStartPointer
= pipeline
->gs_kernel
;
126 gs
.VectorMaskEnable
= false;
128 gs
.BindingTableEntryCount
= 0;
129 gs
.ExpectedVertexCount
= gs_prog_data
->vertices_in
;
131 gs
.ScratchSpaceBasePointer
= (struct anv_address
) {
132 .bo
= anv_scratch_pool_alloc(device
, &device
->scratch_pool
,
133 MESA_SHADER_GEOMETRY
,
134 gs_prog_data
->base
.base
.total_scratch
),
137 gs
.PerThreadScratchSpace
= scratch_space(&gs_prog_data
->base
.base
);
138 gs
.OutputVertexSize
= gs_prog_data
->output_vertex_size_hwords
* 2 - 1;
139 gs
.OutputTopology
= gs_prog_data
->output_topology
;
140 gs
.VertexURBEntryReadLength
= gs_prog_data
->base
.urb_read_length
;
141 gs
.IncludeVertexHandles
= gs_prog_data
->base
.include_vue_handles
;
143 gs
.DispatchGRFStartRegisterForURBData
=
144 gs_prog_data
->base
.base
.dispatch_grf_start_reg
;
146 gs
.MaximumNumberofThreads
= devinfo
->max_gs_threads
/ 2 - 1;
147 gs
.ControlDataHeaderSize
= gs_prog_data
->control_data_header_size_hwords
;
148 gs
.DispatchMode
= gs_prog_data
->base
.dispatch_mode
;
149 gs
.StatisticsEnable
= true;
150 gs
.IncludePrimitiveID
= gs_prog_data
->include_primitive_id
;
151 gs
.ReorderMode
= TRAILING
;
154 gs
.ControlDataFormat
= gs_prog_data
->control_data_format
;
156 gs
.StaticOutput
= gs_prog_data
->static_vertex_count
>= 0;
157 gs
.StaticOutputVertexCount
=
158 gs_prog_data
->static_vertex_count
>= 0 ?
159 gs_prog_data
->static_vertex_count
: 0;
161 /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled:
162 * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v)
163 * UserClipDistanceCullTestEnableBitmask(v)
166 gs
.VertexURBEntryOutputReadOffset
= offset
;
167 gs
.VertexURBEntryOutputLength
= length
;
171 const struct brw_vs_prog_data
*vs_prog_data
= get_vs_prog_data(pipeline
);
172 assert(!vs_prog_data
->base
.base
.use_alt_mode
);
173 /* Skip the VUE header and position slots */
175 length
= (vs_prog_data
->base
.vue_map
.num_slots
+ 1) / 2 - offset
;
177 uint32_t vs_start
= pipeline
->vs_simd8
!= NO_KERNEL
? pipeline
->vs_simd8
:
180 if (vs_start
== NO_KERNEL
) {
181 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VS
), vs
) {
182 vs
.FunctionEnable
= false;
183 /* Even if VS is disabled, SBE still gets the amount of
184 * vertex data to read from this field. */
185 vs
.VertexURBEntryOutputReadOffset
= offset
;
186 vs
.VertexURBEntryOutputLength
= length
;
189 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VS
), vs
) {
190 vs
.KernelStartPointer
= vs_start
;
191 vs
.SingleVertexDispatch
= false;
192 vs
.VectorMaskEnable
= false;
195 vs
.BindingTableEntryCount
=
196 vs_prog_data
->base
.base
.binding_table
.size_bytes
/ 4;
198 vs
.ThreadDispatchPriority
= false;
199 vs
.FloatingPointMode
= IEEE754
;
200 vs
.IllegalOpcodeExceptionEnable
= false;
201 vs
.AccessesUAV
= false;
202 vs
.SoftwareExceptionEnable
= false;
204 vs
.ScratchSpaceBasePointer
= (struct anv_address
) {
205 .bo
= anv_scratch_pool_alloc(device
, &device
->scratch_pool
,
207 vs_prog_data
->base
.base
.total_scratch
),
210 vs
.PerThreadScratchSpace
= scratch_space(&vs_prog_data
->base
.base
);
212 vs
.DispatchGRFStartRegisterForURBData
=
213 vs_prog_data
->base
.base
.dispatch_grf_start_reg
;
215 vs
.VertexURBEntryReadLength
= vs_prog_data
->base
.urb_read_length
;
216 vs
.VertexURBEntryReadOffset
= 0;
218 vs
.MaximumNumberofThreads
= devinfo
->max_vs_threads
- 1;
219 vs
.StatisticsEnable
= false;
220 vs
.SIMD8DispatchEnable
= pipeline
->vs_simd8
!= NO_KERNEL
;
221 vs
.VertexCacheDisable
= false;
222 vs
.FunctionEnable
= true;
224 vs
.VertexURBEntryOutputReadOffset
= offset
;
225 vs
.VertexURBEntryOutputLength
= length
;
228 vs
.UserClipDistanceClipTestEnableBitmask
= 0;
229 vs
.UserClipDistanceCullTestEnableBitmask
= 0;
233 const int num_thread_bias
= GEN_GEN
== 8 ? 2 : 1;
234 if (pipeline
->ps_ksp0
== NO_KERNEL
) {
235 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PS
), ps
);
236 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PS_EXTRA
), extra
) {
237 extra
.PixelShaderValid
= false;
240 emit_3dstate_sbe(pipeline
);
242 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PS
), ps
) {
243 ps
.KernelStartPointer0
= pipeline
->ps_ksp0
;
244 ps
.KernelStartPointer1
= 0;
245 ps
.KernelStartPointer2
= pipeline
->ps_ksp0
+ wm_prog_data
->prog_offset_2
;
246 ps
._8PixelDispatchEnable
= wm_prog_data
->dispatch_8
;
247 ps
._16PixelDispatchEnable
= wm_prog_data
->dispatch_16
;
248 ps
._32PixelDispatchEnable
= false;
249 ps
.SingleProgramFlow
= false;
250 ps
.VectorMaskEnable
= true;
252 ps
.PushConstantEnable
= wm_prog_data
->base
.nr_params
> 0;
253 ps
.PositionXYOffsetSelect
= wm_prog_data
->uses_pos_offset
?
254 POSOFFSET_SAMPLE
: POSOFFSET_NONE
;
256 ps
.MaximumNumberofThreadsPerPSD
= 64 - num_thread_bias
;
258 ps
.ScratchSpaceBasePointer
= (struct anv_address
) {
259 .bo
= anv_scratch_pool_alloc(device
, &device
->scratch_pool
,
260 MESA_SHADER_FRAGMENT
,
261 wm_prog_data
->base
.total_scratch
),
264 ps
.PerThreadScratchSpace
= scratch_space(&wm_prog_data
->base
);
266 ps
.DispatchGRFStartRegisterForConstantSetupData0
=
267 wm_prog_data
->base
.dispatch_grf_start_reg
;
268 ps
.DispatchGRFStartRegisterForConstantSetupData1
= 0;
269 ps
.DispatchGRFStartRegisterForConstantSetupData2
=
270 wm_prog_data
->dispatch_grf_start_reg_2
;
273 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PS_EXTRA
), ps
) {
274 ps
.PixelShaderValid
= true;
275 ps
.PixelShaderKillsPixel
= wm_prog_data
->uses_kill
;
276 ps
.PixelShaderComputedDepthMode
= wm_prog_data
->computed_depth_mode
;
277 ps
.AttributeEnable
= wm_prog_data
->num_varying_inputs
> 0;
278 ps
.oMaskPresenttoRenderTarget
= wm_prog_data
->uses_omask
;
279 ps
.PixelShaderIsPerSample
= wm_prog_data
->persample_dispatch
;
280 ps
.PixelShaderUsesSourceDepth
= wm_prog_data
->uses_src_depth
;
281 ps
.PixelShaderUsesSourceW
= wm_prog_data
->uses_src_w
;
283 ps
.PixelShaderPullsBary
= wm_prog_data
->pulls_bary
;
284 ps
.InputCoverageMaskState
= wm_prog_data
->uses_sample_mask
?
285 ICMS_INNER_CONSERVATIVE
: ICMS_NONE
;
287 ps
.PixelShaderUsesInputCoverageMask
= wm_prog_data
->uses_sample_mask
;
292 *pPipeline
= anv_pipeline_to_handle(pipeline
);