2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #include "anv_private.h"
32 #include "genxml/gen_macros.h"
33 #include "genxml/genX_pack.h"
35 #include "genX_pipeline_util.h"
38 gen7_emit_rs_state(struct anv_pipeline
*pipeline
,
39 const VkPipelineRasterizationStateCreateInfo
*info
,
40 const struct anv_graphics_pipeline_create_info
*extra
)
42 struct GENX(3DSTATE_SF
) sf
= {
43 GENX(3DSTATE_SF_header
),
45 /* LegacyGlobalDepthBiasEnable */
47 .StatisticsEnable
= true,
48 .FrontFaceFillMode
= vk_to_gen_fillmode
[info
->polygonMode
],
49 .BackFaceFillMode
= vk_to_gen_fillmode
[info
->polygonMode
],
50 .ViewTransformEnable
= !(extra
&& extra
->use_rectlist
),
51 .FrontWinding
= vk_to_gen_front_face
[info
->frontFace
],
52 /* bool AntiAliasingEnable; */
54 .CullMode
= vk_to_gen_cullmode
[info
->cullMode
],
56 /* uint32_t LineEndCapAntialiasingRegionWidth; */
57 .ScissorRectangleEnable
= !(extra
&& extra
->use_rectlist
),
59 /* uint32_t MultisampleRasterizationMode; */
60 /* bool LastPixelEnable; */
62 .TriangleStripListProvokingVertexSelect
= 0,
63 .LineStripListProvokingVertexSelect
= 0,
64 .TriangleFanProvokingVertexSelect
= 1,
66 /* uint32_t AALineDistanceMode; */
67 /* uint32_t VertexSubPixelPrecisionSelect; */
68 .UsePointWidthState
= false,
70 .GlobalDepthOffsetEnableSolid
= info
->depthBiasEnable
,
71 .GlobalDepthOffsetEnableWireframe
= info
->depthBiasEnable
,
72 .GlobalDepthOffsetEnablePoint
= info
->depthBiasEnable
,
75 GENX(3DSTATE_SF_pack
)(NULL
, &pipeline
->gen7
.sf
, &sf
);
79 gen7_emit_cb_state(struct anv_pipeline
*pipeline
,
80 const VkPipelineColorBlendStateCreateInfo
*info
,
81 const VkPipelineMultisampleStateCreateInfo
*ms_info
)
83 struct anv_device
*device
= pipeline
->device
;
85 if (info
== NULL
|| info
->attachmentCount
== 0) {
86 pipeline
->blend_state
=
87 anv_state_pool_emit(&device
->dynamic_state_pool
,
88 GENX(BLEND_STATE
), 64,
89 .ColorBufferBlendEnable
= false,
90 .WriteDisableAlpha
= true,
91 .WriteDisableRed
= true,
92 .WriteDisableGreen
= true,
93 .WriteDisableBlue
= true);
95 const VkPipelineColorBlendAttachmentState
*a
= &info
->pAttachments
[0];
96 struct GENX(BLEND_STATE
) blend
= {
97 .AlphaToCoverageEnable
= ms_info
&& ms_info
->alphaToCoverageEnable
,
98 .AlphaToOneEnable
= ms_info
&& ms_info
->alphaToOneEnable
,
100 .LogicOpEnable
= info
->logicOpEnable
,
101 .LogicOpFunction
= vk_to_gen_logic_op
[info
->logicOp
],
102 .ColorBufferBlendEnable
= a
->blendEnable
,
103 .ColorClampRange
= COLORCLAMP_RTFORMAT
,
104 .PreBlendColorClampEnable
= true,
105 .PostBlendColorClampEnable
= true,
106 .SourceBlendFactor
= vk_to_gen_blend
[a
->srcColorBlendFactor
],
107 .DestinationBlendFactor
= vk_to_gen_blend
[a
->dstColorBlendFactor
],
108 .ColorBlendFunction
= vk_to_gen_blend_op
[a
->colorBlendOp
],
109 .SourceAlphaBlendFactor
= vk_to_gen_blend
[a
->srcAlphaBlendFactor
],
110 .DestinationAlphaBlendFactor
= vk_to_gen_blend
[a
->dstAlphaBlendFactor
],
111 .AlphaBlendFunction
= vk_to_gen_blend_op
[a
->alphaBlendOp
],
112 .WriteDisableAlpha
= !(a
->colorWriteMask
& VK_COLOR_COMPONENT_A_BIT
),
113 .WriteDisableRed
= !(a
->colorWriteMask
& VK_COLOR_COMPONENT_R_BIT
),
114 .WriteDisableGreen
= !(a
->colorWriteMask
& VK_COLOR_COMPONENT_G_BIT
),
115 .WriteDisableBlue
= !(a
->colorWriteMask
& VK_COLOR_COMPONENT_B_BIT
),
118 /* Our hardware applies the blend factor prior to the blend function
119 * regardless of what function is used. Technically, this means the
120 * hardware can do MORE than GL or Vulkan specify. However, it also
121 * means that, for MIN and MAX, we have to stomp the blend factor to
122 * ONE to make it a no-op.
124 if (a
->colorBlendOp
== VK_BLEND_OP_MIN
||
125 a
->colorBlendOp
== VK_BLEND_OP_MAX
) {
126 blend
.SourceBlendFactor
= BLENDFACTOR_ONE
;
127 blend
.DestinationBlendFactor
= BLENDFACTOR_ONE
;
129 if (a
->alphaBlendOp
== VK_BLEND_OP_MIN
||
130 a
->alphaBlendOp
== VK_BLEND_OP_MAX
) {
131 blend
.SourceAlphaBlendFactor
= BLENDFACTOR_ONE
;
132 blend
.DestinationAlphaBlendFactor
= BLENDFACTOR_ONE
;
135 pipeline
->blend_state
= anv_state_pool_alloc(&device
->dynamic_state_pool
,
136 GENX(BLEND_STATE_length
) * 4,
138 GENX(BLEND_STATE_pack
)(NULL
, pipeline
->blend_state
.map
, &blend
);
139 if (pipeline
->device
->info
.has_llc
)
140 anv_state_clflush(pipeline
->blend_state
);
143 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_BLEND_STATE_POINTERS
), bsp
) {
144 bsp
.BlendStatePointer
= pipeline
->blend_state
.offset
;
149 genX(graphics_pipeline_create
)(
151 struct anv_pipeline_cache
* cache
,
152 const VkGraphicsPipelineCreateInfo
* pCreateInfo
,
153 const struct anv_graphics_pipeline_create_info
*extra
,
154 const VkAllocationCallbacks
* pAllocator
,
155 VkPipeline
* pPipeline
)
157 ANV_FROM_HANDLE(anv_device
, device
, _device
);
158 ANV_FROM_HANDLE(anv_render_pass
, pass
, pCreateInfo
->renderPass
);
159 struct anv_subpass
*subpass
= &pass
->subpasses
[pCreateInfo
->subpass
];
160 struct anv_pipeline
*pipeline
;
163 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO
);
165 pipeline
= anv_alloc2(&device
->alloc
, pAllocator
, sizeof(*pipeline
), 8,
166 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
167 if (pipeline
== NULL
)
168 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
170 result
= anv_pipeline_init(pipeline
, device
, cache
,
171 pCreateInfo
, extra
, pAllocator
);
172 if (result
!= VK_SUCCESS
) {
173 anv_free2(&device
->alloc
, pAllocator
, pipeline
);
177 assert(pCreateInfo
->pVertexInputState
);
178 emit_vertex_input(pipeline
, pCreateInfo
->pVertexInputState
, extra
);
180 assert(pCreateInfo
->pRasterizationState
);
181 gen7_emit_rs_state(pipeline
, pCreateInfo
->pRasterizationState
, extra
);
183 emit_ds_state(pipeline
, pCreateInfo
->pDepthStencilState
, pass
, subpass
);
185 gen7_emit_cb_state(pipeline
, pCreateInfo
->pColorBlendState
,
186 pCreateInfo
->pMultisampleState
);
188 emit_urb_setup(pipeline
);
190 const VkPipelineRasterizationStateCreateInfo
*rs_info
=
191 pCreateInfo
->pRasterizationState
;
193 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_CLIP
), clip
) {
194 clip
.FrontWinding
= vk_to_gen_front_face
[rs_info
->frontFace
],
195 clip
.CullMode
= vk_to_gen_cullmode
[rs_info
->cullMode
],
196 clip
.ClipEnable
= !(extra
&& extra
->use_rectlist
),
197 clip
.APIMode
= APIMODE_OGL
,
198 clip
.ViewportXYClipTestEnable
= true,
199 clip
.ViewportZClipTestEnable
= !pipeline
->depth_clamp_enable
,
200 clip
.ClipMode
= CLIPMODE_NORMAL
,
202 clip
.TriangleStripListProvokingVertexSelect
= 0,
203 clip
.LineStripListProvokingVertexSelect
= 0,
204 clip
.TriangleFanProvokingVertexSelect
= 1,
206 clip
.MinimumPointWidth
= 0.125,
207 clip
.MaximumPointWidth
= 255.875,
208 clip
.MaximumVPIndex
= pCreateInfo
->pViewportState
->viewportCount
- 1;
211 if (pCreateInfo
->pMultisampleState
&&
212 pCreateInfo
->pMultisampleState
->rasterizationSamples
> 1)
213 anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO");
215 uint32_t samples
= 1;
216 uint32_t log2_samples
= __builtin_ffs(samples
) - 1;
218 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_MULTISAMPLE
), ms
) {
219 ms
.PixelLocation
= PIXLOC_CENTER
;
220 ms
.NumberofMultisamples
= log2_samples
;
223 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_SAMPLE_MASK
), sm
) {
224 sm
.SampleMask
= 0xff;
227 const struct brw_vs_prog_data
*vs_prog_data
= get_vs_prog_data(pipeline
);
230 /* From gen7_vs_state.c */
233 * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
234 * Geometry > Geometry Shader > State:
236 * "Note: Because of corruption in IVB:GT2, software needs to flush the
237 * whole fixed function pipeline when the GS enable changes value in
240 * The hardware architects have clarified that in this context "flush the
241 * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
244 if (!brw
->is_haswell
&& !brw
->is_baytrail
)
245 gen7_emit_vs_workaround_flush(brw
);
248 if (pipeline
->vs_vec4
== NO_KERNEL
|| (extra
&& extra
->disable_vs
))
249 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VS
), vs
);
251 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_VS
), vs
) {
252 vs
.KernelStartPointer
= pipeline
->vs_vec4
;
254 vs
.ScratchSpaceBasePointer
= (struct anv_address
) {
256 .offset
= pipeline
->scratch_start
[MESA_SHADER_VERTEX
],
258 vs
.PerThreadScratchSpace
= scratch_space(&vs_prog_data
->base
.base
);
260 vs
.DispatchGRFStartRegisterforURBData
=
261 vs_prog_data
->base
.base
.dispatch_grf_start_reg
;
263 vs
.VertexURBEntryReadLength
= vs_prog_data
->base
.urb_read_length
;
264 vs
.VertexURBEntryReadOffset
= 0;
265 vs
.MaximumNumberofThreads
= device
->info
.max_vs_threads
- 1;
266 vs
.StatisticsEnable
= true;
267 vs
.VSFunctionEnable
= true;
270 const struct brw_gs_prog_data
*gs_prog_data
= get_gs_prog_data(pipeline
);
272 if (pipeline
->gs_kernel
== NO_KERNEL
|| (extra
&& extra
->disable_vs
)) {
273 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_GS
), gs
);
275 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_GS
), gs
) {
276 gs
.KernelStartPointer
= pipeline
->gs_kernel
;
278 gs
.ScratchSpaceBasePointer
= (struct anv_address
) {
280 .offset
= pipeline
->scratch_start
[MESA_SHADER_GEOMETRY
],
282 gs
.PerThreadScratchSpace
= scratch_space(&gs_prog_data
->base
.base
);
284 gs
.OutputVertexSize
= gs_prog_data
->output_vertex_size_hwords
* 2 - 1;
285 gs
.OutputTopology
= gs_prog_data
->output_topology
;
286 gs
.VertexURBEntryReadLength
= gs_prog_data
->base
.urb_read_length
;
287 gs
.IncludeVertexHandles
= gs_prog_data
->base
.include_vue_handles
;
289 gs
.DispatchGRFStartRegisterforURBData
=
290 gs_prog_data
->base
.base
.dispatch_grf_start_reg
;
292 gs
.MaximumNumberofThreads
= device
->info
.max_gs_threads
- 1;
293 /* This in the next dword on HSW. */
294 gs
.ControlDataFormat
= gs_prog_data
->control_data_format
;
295 gs
.ControlDataHeaderSize
= gs_prog_data
->control_data_header_size_hwords
;
296 gs
.InstanceControl
= MAX2(gs_prog_data
->invocations
, 1) - 1;
297 gs
.DispatchMode
= gs_prog_data
->base
.dispatch_mode
;
298 gs
.GSStatisticsEnable
= true;
299 gs
.IncludePrimitiveID
= gs_prog_data
->include_primitive_id
;
300 # if (GEN_IS_HASWELL)
301 gs
.ReorderMode
= REORDER_TRAILING
;
303 gs
.ReorderEnable
= true;
309 if (pipeline
->ps_ksp0
== NO_KERNEL
) {
310 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_SBE
), sbe
);
312 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_WM
), wm
) {
313 wm
.StatisticsEnable
= true;
314 wm
.ThreadDispatchEnable
= false;
315 wm
.LineEndCapAntialiasingRegionWidth
= 0; /* 0.5 pixels */
316 wm
.LineAntialiasingRegionWidth
= 1; /* 1.0 pixels */
317 wm
.EarlyDepthStencilControl
= EDSC_NORMAL
;
318 wm
.PointRasterizationRule
= RASTRULE_UPPER_RIGHT
;
321 /* Even if no fragments are ever dispatched, the hardware hangs if we
322 * don't at least set the maximum number of threads.
324 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PS
), ps
) {
325 ps
.MaximumNumberofThreads
= device
->info
.max_wm_threads
- 1;
328 const struct brw_wm_prog_data
*wm_prog_data
= get_wm_prog_data(pipeline
);
329 if (wm_prog_data
->urb_setup
[VARYING_SLOT_BFC0
] != -1 ||
330 wm_prog_data
->urb_setup
[VARYING_SLOT_BFC1
] != -1)
331 anv_finishme("two-sided color needs sbe swizzling setup");
332 if (wm_prog_data
->urb_setup
[VARYING_SLOT_PRIMITIVE_ID
] != -1)
333 anv_finishme("primitive_id needs sbe swizzling setup");
335 emit_3dstate_sbe(pipeline
);
337 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_PS
), ps
) {
338 ps
.KernelStartPointer0
= pipeline
->ps_ksp0
;
340 ps
.ScratchSpaceBasePointer
= (struct anv_address
) {
342 .offset
= pipeline
->scratch_start
[MESA_SHADER_FRAGMENT
],
344 ps
.PerThreadScratchSpace
= scratch_space(&wm_prog_data
->base
);
345 ps
.MaximumNumberofThreads
= device
->info
.max_wm_threads
- 1;
346 ps
.PushConstantEnable
= wm_prog_data
->base
.nr_params
> 0;
347 ps
.AttributeEnable
= wm_prog_data
->num_varying_inputs
> 0;
348 ps
.oMaskPresenttoRenderTarget
= wm_prog_data
->uses_omask
;
350 ps
.RenderTargetFastClearEnable
= false;
351 ps
.DualSourceBlendEnable
= false;
352 ps
.RenderTargetResolveEnable
= false;
354 ps
.PositionXYOffsetSelect
= wm_prog_data
->uses_pos_offset
?
355 POSOFFSET_SAMPLE
: POSOFFSET_NONE
;
357 ps
._32PixelDispatchEnable
= false;
358 ps
._16PixelDispatchEnable
= wm_prog_data
->dispatch_16
;
359 ps
._8PixelDispatchEnable
= wm_prog_data
->dispatch_8
;
361 ps
.DispatchGRFStartRegisterforConstantSetupData0
=
362 wm_prog_data
->base
.dispatch_grf_start_reg
,
363 ps
.DispatchGRFStartRegisterforConstantSetupData1
= 0,
364 ps
.DispatchGRFStartRegisterforConstantSetupData2
=
365 wm_prog_data
->dispatch_grf_start_reg_2
,
367 /* Haswell requires the sample mask to be set in this packet as well as
368 * in 3DSTATE_SAMPLE_MASK; the values should match. */
369 /* _NEW_BUFFERS, _NEW_MULTISAMPLE */
371 ps
.KernelStartPointer1
= 0;
372 ps
.KernelStartPointer2
= pipeline
->ps_ksp0
+ wm_prog_data
->prog_offset_2
;
375 /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */
376 anv_batch_emit(&pipeline
->batch
, GENX(3DSTATE_WM
), wm
) {
377 wm
.StatisticsEnable
= true;
378 wm
.ThreadDispatchEnable
= true;
379 wm
.LineEndCapAntialiasingRegionWidth
= 0; /* 0.5 pixels */
380 wm
.LineAntialiasingRegionWidth
= 1; /* 1.0 pixels */
381 wm
.PointRasterizationRule
= RASTRULE_UPPER_RIGHT
;
382 wm
.PixelShaderComputedDepthMode
= wm_prog_data
->computed_depth_mode
;
383 wm
.PixelShaderUsesSourceDepth
= wm_prog_data
->uses_src_depth
;
384 wm
.PixelShaderUsesSourceW
= wm_prog_data
->uses_src_w
;
385 wm
.PixelShaderUsesInputCoverageMask
= wm_prog_data
->uses_sample_mask
;
387 if (wm_prog_data
->early_fragment_tests
) {
388 wm
.EarlyDepthStencilControl
= EDSC_PREPS
;
389 } else if (wm_prog_data
->has_side_effects
) {
390 wm
.EarlyDepthStencilControl
= EDSC_PSEXEC
;
392 wm
.EarlyDepthStencilControl
= EDSC_NORMAL
;
395 wm
.BarycentricInterpolationMode
= wm_prog_data
->barycentric_interp_modes
;
399 *pPipeline
= anv_pipeline_to_handle(pipeline
);