2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #include "anv_private.h"
32 #include "genxml/gen_macros.h"
33 #include "genxml/genX_pack.h"
35 #include "genX_pipeline_util.h"
38 genX(graphics_pipeline_create
)(
40 struct anv_pipeline_cache
* cache
,
41 const VkGraphicsPipelineCreateInfo
* pCreateInfo
,
42 const VkAllocationCallbacks
* pAllocator
,
43 VkPipeline
* pPipeline
)
45 ANV_FROM_HANDLE(anv_device
, device
, _device
);
46 ANV_FROM_HANDLE(anv_render_pass
, pass
, pCreateInfo
->renderPass
);
47 const struct anv_physical_device
*physical_device
=
48 &device
->instance
->physicalDevice
;
49 const struct gen_device_info
*devinfo
= &physical_device
->info
;
50 struct anv_subpass
*subpass
= &pass
->subpasses
[pCreateInfo
->subpass
];
51 struct anv_pipeline
*pipeline
;
54 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO
);
56 pipeline
= vk_alloc2(&device
->alloc
, pAllocator
, sizeof(*pipeline
), 8,
57 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
59 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
61 result
= anv_pipeline_init(pipeline
, device
, cache
,
62 pCreateInfo
, pAllocator
);
63 if (result
!= VK_SUCCESS
) {
64 vk_free2(&device
->alloc
, pAllocator
, pipeline
);
68 assert(pCreateInfo
->pVertexInputState
);
69 emit_vertex_input(pipeline
, pCreateInfo
->pVertexInputState
);
71 assert(pCreateInfo
->pRasterizationState
);
72 emit_rs_state(pipeline
, pCreateInfo
->pRasterizationState
,
73 pCreateInfo
->pMultisampleState
, pass
, subpass
);
75 emit_ds_state(pipeline
, pCreateInfo
->pDepthStencilState
, pass
, subpass
);
77 emit_cb_state(pipeline
, pCreateInfo
->pColorBlendState
,
78 pCreateInfo
->pMultisampleState
);
80 emit_urb_setup(pipeline
);
82 emit_3dstate_clip(pipeline
, pCreateInfo
->pViewportState
,
83 pCreateInfo
->pRasterizationState
);
84 emit_3dstate_streamout(pipeline
, pCreateInfo
->pRasterizationState
);
86 emit_ms_state(pipeline
, pCreateInfo
->pMultisampleState
);
88 const struct brw_vs_prog_data
*vs_prog_data
= get_vs_prog_data(pipeline
);
91 /* From gen7_vs_state.c */
94 * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
95 * Geometry > Geometry Shader > State:
97 * "Note: Because of corruption in IVB:GT2, software needs to flush the
98 * whole fixed function pipeline when the GS enable changes value in
101 * The hardware architects have clarified that in this context "flush the
102 * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
105 if (!brw
->is_haswell
&& !brw
->is_baytrail
)
106 gen7_emit_vs_workaround_flush(brw
);
109 assert(anv_pipeline_has_stage(pipeline
, MESA_SHADER_VERTEX
));
110 const struct anv_shader_bin
*vs_bin
=
111 pipeline
->shaders
[MESA_SHADER_VERTEX
];
112 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VS
), vs
) {
113 vs
.KernelStartPointer
= vs_bin
->kernel
.offset
;
115 vs
.ScratchSpaceBasePointer
= (struct anv_address
) {
116 .bo
= anv_scratch_pool_alloc(device
, &device
->scratch_pool
,
118 vs_prog_data
->base
.base
.total_scratch
),
121 vs
.PerThreadScratchSpace
= scratch_space(&vs_prog_data
->base
.base
);
123 vs
.DispatchGRFStartRegisterForURBData
=
124 vs_prog_data
->base
.base
.dispatch_grf_start_reg
;
126 vs
.SamplerCount
= get_sampler_count(vs_bin
);
127 vs
.BindingTableEntryCount
= get_binding_table_entry_count(vs_bin
);
129 vs
.VertexURBEntryReadLength
= vs_prog_data
->base
.urb_read_length
;
130 vs
.VertexURBEntryReadOffset
= 0;
131 vs
.MaximumNumberofThreads
= devinfo
->max_vs_threads
- 1;
132 vs
.StatisticsEnable
= true;
133 vs
.FunctionEnable
= true;
136 const struct brw_gs_prog_data
*gs_prog_data
= get_gs_prog_data(pipeline
);
138 if (!anv_pipeline_has_stage(pipeline
, MESA_SHADER_GEOMETRY
)) {
139 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_GS
), gs
);
141 const struct anv_shader_bin
*gs_bin
=
142 pipeline
->shaders
[MESA_SHADER_GEOMETRY
];
144 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_GS
), gs
) {
145 gs
.KernelStartPointer
= gs_bin
->kernel
.offset
;
147 gs
.ScratchSpaceBasePointer
= (struct anv_address
) {
148 .bo
= anv_scratch_pool_alloc(device
, &device
->scratch_pool
,
149 MESA_SHADER_GEOMETRY
,
150 gs_prog_data
->base
.base
.total_scratch
),
153 gs
.PerThreadScratchSpace
= scratch_space(&gs_prog_data
->base
.base
);
155 gs
.OutputVertexSize
= gs_prog_data
->output_vertex_size_hwords
* 2 - 1;
156 gs
.OutputTopology
= gs_prog_data
->output_topology
;
157 gs
.VertexURBEntryReadLength
= gs_prog_data
->base
.urb_read_length
;
158 gs
.IncludeVertexHandles
= gs_prog_data
->base
.include_vue_handles
;
160 gs
.DispatchGRFStartRegisterForURBData
=
161 gs_prog_data
->base
.base
.dispatch_grf_start_reg
;
163 gs
.SamplerCount
= get_sampler_count(gs_bin
);
164 gs
.BindingTableEntryCount
= get_binding_table_entry_count(gs_bin
);
166 gs
.MaximumNumberofThreads
= devinfo
->max_gs_threads
- 1;
167 /* This in the next dword on HSW. */
168 gs
.ControlDataFormat
= gs_prog_data
->control_data_format
;
169 gs
.ControlDataHeaderSize
= gs_prog_data
->control_data_header_size_hwords
;
170 gs
.InstanceControl
= MAX2(gs_prog_data
->invocations
, 1) - 1;
171 gs
.DispatchMode
= gs_prog_data
->base
.dispatch_mode
;
172 gs
.GSStatisticsEnable
= true;
173 gs
.IncludePrimitiveID
= gs_prog_data
->include_primitive_id
;
174 # if (GEN_IS_HASWELL)
175 gs
.ReorderMode
= REORDER_TRAILING
;
177 gs
.ReorderEnable
= true;
183 if (!anv_pipeline_has_stage(pipeline
, MESA_SHADER_FRAGMENT
)) {
184 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_SBE
), sbe
);
186 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_WM
), wm
) {
187 wm
.StatisticsEnable
= true;
188 wm
.ThreadDispatchEnable
= false;
189 wm
.LineEndCapAntialiasingRegionWidth
= 0; /* 0.5 pixels */
190 wm
.LineAntialiasingRegionWidth
= 1; /* 1.0 pixels */
191 wm
.EarlyDepthStencilControl
= EDSC_NORMAL
;
192 wm
.PointRasterizationRule
= RASTRULE_UPPER_RIGHT
;
195 /* Even if no fragments are ever dispatched, the hardware hangs if we
196 * don't at least set the maximum number of threads.
198 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PS
), ps
) {
199 ps
.MaximumNumberofThreads
= devinfo
->max_wm_threads
- 1;
202 const struct anv_shader_bin
*fs_bin
=
203 pipeline
->shaders
[MESA_SHADER_FRAGMENT
];
204 const struct brw_wm_prog_data
*wm_prog_data
= get_wm_prog_data(pipeline
);
206 if (wm_prog_data
->urb_setup
[VARYING_SLOT_BFC0
] != -1 ||
207 wm_prog_data
->urb_setup
[VARYING_SLOT_BFC1
] != -1)
208 anv_finishme("two-sided color needs sbe swizzling setup");
209 if (wm_prog_data
->urb_setup
[VARYING_SLOT_PRIMITIVE_ID
] != -1)
210 anv_finishme("primitive_id needs sbe swizzling setup");
212 emit_3dstate_sbe(pipeline
);
214 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PS
), ps
) {
215 ps
.KernelStartPointer0
= fs_bin
->kernel
.offset
;
216 ps
.KernelStartPointer1
= 0;
217 ps
.KernelStartPointer2
= fs_bin
->kernel
.offset
+
218 wm_prog_data
->prog_offset_2
;
220 ps
.ScratchSpaceBasePointer
= (struct anv_address
) {
221 .bo
= anv_scratch_pool_alloc(device
, &device
->scratch_pool
,
222 MESA_SHADER_FRAGMENT
,
223 wm_prog_data
->base
.total_scratch
),
226 ps
.PerThreadScratchSpace
= scratch_space(&wm_prog_data
->base
);
228 ps
.SamplerCount
= get_sampler_count(fs_bin
);
229 ps
.BindingTableEntryCount
= get_binding_table_entry_count(fs_bin
);
231 ps
.MaximumNumberofThreads
= devinfo
->max_wm_threads
- 1;
232 ps
.PushConstantEnable
= wm_prog_data
->base
.nr_params
> 0;
233 ps
.AttributeEnable
= wm_prog_data
->num_varying_inputs
> 0;
234 ps
.oMaskPresenttoRenderTarget
= wm_prog_data
->uses_omask
;
236 ps
.RenderTargetFastClearEnable
= false;
237 ps
.DualSourceBlendEnable
= false;
238 ps
.RenderTargetResolveEnable
= false;
240 ps
.PositionXYOffsetSelect
= wm_prog_data
->uses_pos_offset
?
241 POSOFFSET_SAMPLE
: POSOFFSET_NONE
;
243 ps
._32PixelDispatchEnable
= false;
244 ps
._16PixelDispatchEnable
= wm_prog_data
->dispatch_16
;
245 ps
._8PixelDispatchEnable
= wm_prog_data
->dispatch_8
;
247 ps
.DispatchGRFStartRegisterforConstantSetupData0
=
248 wm_prog_data
->base
.dispatch_grf_start_reg
,
249 ps
.DispatchGRFStartRegisterforConstantSetupData1
= 0,
250 ps
.DispatchGRFStartRegisterforConstantSetupData2
=
251 wm_prog_data
->dispatch_grf_start_reg_2
;
253 /* Haswell requires the sample mask to be set in this packet as well as
254 * in 3DSTATE_SAMPLE_MASK; the values should match. */
255 /* _NEW_BUFFERS, _NEW_MULTISAMPLE */
257 ps
.SampleMask
= 0xff;
261 uint32_t samples
= pCreateInfo
->pMultisampleState
?
262 pCreateInfo
->pMultisampleState
->rasterizationSamples
: 1;
264 /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */
265 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_WM
), wm
) {
266 wm
.StatisticsEnable
= true;
267 wm
.ThreadDispatchEnable
= true;
268 wm
.LineEndCapAntialiasingRegionWidth
= 0; /* 0.5 pixels */
269 wm
.LineAntialiasingRegionWidth
= 1; /* 1.0 pixels */
270 wm
.PointRasterizationRule
= RASTRULE_UPPER_RIGHT
;
271 wm
.PixelShaderKillPixel
= wm_prog_data
->uses_kill
;
272 wm
.PixelShaderComputedDepthMode
= wm_prog_data
->computed_depth_mode
;
273 wm
.PixelShaderUsesSourceDepth
= wm_prog_data
->uses_src_depth
;
274 wm
.PixelShaderUsesSourceW
= wm_prog_data
->uses_src_w
;
275 wm
.PixelShaderUsesInputCoverageMask
= wm_prog_data
->uses_sample_mask
;
277 if (wm_prog_data
->early_fragment_tests
) {
278 wm
.EarlyDepthStencilControl
= EDSC_PREPS
;
279 } else if (wm_prog_data
->has_side_effects
) {
280 wm
.EarlyDepthStencilControl
= EDSC_PSEXEC
;
282 wm
.EarlyDepthStencilControl
= EDSC_NORMAL
;
285 wm
.BarycentricInterpolationMode
= wm_prog_data
->barycentric_interp_modes
;
287 wm
.MultisampleRasterizationMode
= samples
> 1 ?
288 MSRASTMODE_ON_PATTERN
: MSRASTMODE_OFF_PIXEL
;
289 wm
.MultisampleDispatchMode
= ((samples
== 1) ||
290 (samples
> 1 && wm_prog_data
->persample_dispatch
)) ?
291 MSDISPMODE_PERSAMPLE
: MSDISPMODE_PERPIXEL
;
295 *pPipeline
= anv_pipeline_to_handle(pipeline
);