anv: Rename clflush_range and state_clflush
[mesa.git] / src / intel / vulkan / gen8_cmd_buffer.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31
32 #include "genxml/gen_macros.h"
33 #include "genxml/genX_pack.h"
34
35 #if GEN_GEN == 8
36 void
37 gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer)
38 {
39 uint32_t count = cmd_buffer->state.dynamic.viewport.count;
40 const VkViewport *viewports = cmd_buffer->state.dynamic.viewport.viewports;
41 struct anv_state sf_clip_state =
42 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64);
43
44 for (uint32_t i = 0; i < count; i++) {
45 const VkViewport *vp = &viewports[i];
46
47 /* The gen7 state struct has just the matrix and guardband fields, the
48 * gen8 struct adds the min/max viewport fields. */
49 struct GENX(SF_CLIP_VIEWPORT) sf_clip_viewport = {
50 .ViewportMatrixElementm00 = vp->width / 2,
51 .ViewportMatrixElementm11 = vp->height / 2,
52 .ViewportMatrixElementm22 = 1.0,
53 .ViewportMatrixElementm30 = vp->x + vp->width / 2,
54 .ViewportMatrixElementm31 = vp->y + vp->height / 2,
55 .ViewportMatrixElementm32 = 0.0,
56 .XMinClipGuardband = -1.0f,
57 .XMaxClipGuardband = 1.0f,
58 .YMinClipGuardband = -1.0f,
59 .YMaxClipGuardband = 1.0f,
60 .XMinViewPort = vp->x,
61 .XMaxViewPort = vp->x + vp->width - 1,
62 .YMinViewPort = MIN2(vp->y, vp->y + vp->height),
63 .YMaxViewPort = MAX2(vp->y, vp->y + vp->height) - 1,
64 };
65
66 GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64,
67 &sf_clip_viewport);
68 }
69
70 if (!cmd_buffer->device->info.has_llc)
71 anv_state_flush(sf_clip_state);
72
73 anv_batch_emit(&cmd_buffer->batch,
74 GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) {
75 clip.SFClipViewportPointer = sf_clip_state.offset;
76 }
77 }
78
79 void
80 gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer,
81 bool depth_clamp_enable)
82 {
83 uint32_t count = cmd_buffer->state.dynamic.viewport.count;
84 const VkViewport *viewports = cmd_buffer->state.dynamic.viewport.viewports;
85 struct anv_state cc_state =
86 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32);
87
88 for (uint32_t i = 0; i < count; i++) {
89 const VkViewport *vp = &viewports[i];
90
91 struct GENX(CC_VIEWPORT) cc_viewport = {
92 .MinimumDepth = depth_clamp_enable ? vp->minDepth : 0.0f,
93 .MaximumDepth = depth_clamp_enable ? vp->maxDepth : 1.0f,
94 };
95
96 GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport);
97 }
98
99 if (!cmd_buffer->device->info.has_llc)
100 anv_state_flush(cc_state);
101
102 anv_batch_emit(&cmd_buffer->batch,
103 GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
104 cc.CCViewportPointer = cc_state.offset;
105 }
106 }
107 #endif
108
109 static void
110 __emit_genx_sf_state(struct anv_cmd_buffer *cmd_buffer)
111 {
112 uint32_t sf_dw[GENX(3DSTATE_SF_length)];
113 struct GENX(3DSTATE_SF) sf = {
114 GENX(3DSTATE_SF_header),
115 .LineWidth = cmd_buffer->state.dynamic.line_width,
116 };
117 GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
118 /* FIXME: gen9.fs */
119 anv_batch_emit_merge(&cmd_buffer->batch, sf_dw,
120 cmd_buffer->state.pipeline->gen8.sf);
121 }
122
123 void
124 gen9_emit_sf_state(struct anv_cmd_buffer *cmd_buffer);
125
126 #if GEN_GEN == 9
127
128 void
129 gen9_emit_sf_state(struct anv_cmd_buffer *cmd_buffer)
130 {
131 __emit_genx_sf_state(cmd_buffer);
132 }
133
134 #endif
135
136 #if GEN_GEN == 8
137
138 static void
139 __emit_sf_state(struct anv_cmd_buffer *cmd_buffer)
140 {
141 if (cmd_buffer->device->info.is_cherryview)
142 gen9_emit_sf_state(cmd_buffer);
143 else
144 __emit_genx_sf_state(cmd_buffer);
145 }
146
147 #else
148
149 static void
150 __emit_sf_state(struct anv_cmd_buffer *cmd_buffer)
151 {
152 __emit_genx_sf_state(cmd_buffer);
153 }
154
155 #endif
156
157 void
158 genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
159 {
160 if (cmd_buffer->state.pma_fix_enabled == enable)
161 return;
162
163 cmd_buffer->state.pma_fix_enabled = enable;
164
165 /* According to the Broadwell PIPE_CONTROL documentation, software should
166 * emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set
167 * prior to the LRI. If stencil buffer writes are enabled, then a Render
168 * Cache Flush is also necessary.
169 *
170 * The Skylake docs say to use a depth stall rather than a command
171 * streamer stall. However, the hardware seems to violently disagree.
172 * A full command streamer stall seems to be needed in both cases.
173 */
174 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
175 pc.DepthCacheFlushEnable = true;
176 pc.CommandStreamerStallEnable = true;
177 pc.RenderTargetCacheFlushEnable = true;
178 }
179
180 #if GEN_GEN == 9
181
182 uint32_t cache_mode;
183 anv_pack_struct(&cache_mode, GENX(CACHE_MODE_0),
184 .STCPMAOptimizationEnable = enable,
185 .STCPMAOptimizationEnableMask = true);
186 anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
187 lri.RegisterOffset = GENX(CACHE_MODE_0_num);
188 lri.DataDWord = cache_mode;
189 }
190
191 #elif GEN_GEN == 8
192
193 uint32_t cache_mode;
194 anv_pack_struct(&cache_mode, GENX(CACHE_MODE_1),
195 .NPPMAFixEnable = enable,
196 .NPEarlyZFailsDisable = enable,
197 .NPPMAFixEnableMask = true,
198 .NPEarlyZFailsDisableMask = true);
199 anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
200 lri.RegisterOffset = GENX(CACHE_MODE_1_num);
201 lri.DataDWord = cache_mode;
202 }
203
204 #endif /* GEN_GEN == 8 */
205
206 /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
207 * Flush bits is often necessary. We do it regardless because it's easier.
208 * The render cache flush is also necessary if stencil writes are enabled.
209 *
210 * Again, the Skylake docs give a different set of flushes but the BDW
211 * flushes seem to work just as well.
212 */
213 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
214 pc.DepthStallEnable = true;
215 pc.DepthCacheFlushEnable = true;
216 pc.RenderTargetCacheFlushEnable = true;
217 }
218 }
219
220 static inline bool
221 want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer)
222 {
223 assert(GEN_GEN == 8);
224
225 /* From the Broadwell PRM Vol. 2c CACHE_MODE_1::NP_PMA_FIX_ENABLE:
226 *
227 * SW must set this bit in order to enable this fix when following
228 * expression is TRUE.
229 *
230 * 3DSTATE_WM::ForceThreadDispatch != 1 &&
231 * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
232 * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
233 * (3DSTATE_DEPTH_BUFFER::HIZ Enable) &&
234 * !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) &&
235 * (3DSTATE_PS_EXTRA::PixelShaderValid) &&
236 * !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
237 * 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
238 * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
239 * 3DSTATE_WM_HZ_OP::StencilBufferClear) &&
240 * (3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable) &&
241 * (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
242 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
243 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
244 * 3DSTATE_PS_BLEND::AlphaTestEnable ||
245 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) &&
246 * 3DSTATE_WM::ForceKillPix != ForceOff &&
247 * ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
248 * 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) ||
249 * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
250 * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
251 * 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||
252 * (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
253 */
254
255 /* These are always true:
256 * 3DSTATE_WM::ForceThreadDispatch != 1 &&
257 * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
258 */
259
260 /* We only enable the PMA fix if we know for certain that HiZ is enabled.
261 * If we don't know whether HiZ is enabled or not, we disable the PMA fix
262 * and there is no harm.
263 *
264 * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
265 * 3DSTATE_DEPTH_BUFFER::HIZ Enable
266 */
267 if (!cmd_buffer->state.hiz_enabled)
268 return false;
269
270 /* 3DSTATE_PS_EXTRA::PixelShaderValid */
271 struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
272 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
273 return false;
274
275 /* !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) */
276 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
277 if (wm_prog_data->early_fragment_tests)
278 return false;
279
280 /* We never use anv_pipeline for HiZ ops so this is trivially true:
281 * !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
282 * 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
283 * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
284 * 3DSTATE_WM_HZ_OP::StencilBufferClear)
285 */
286
287 /* 3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable */
288 if (!pipeline->depth_test_enable)
289 return false;
290
291 /* (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
292 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
293 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
294 * 3DSTATE_PS_BLEND::AlphaTestEnable ||
295 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) &&
296 * 3DSTATE_WM::ForceKillPix != ForceOff &&
297 * ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
298 * 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) ||
299 * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
300 * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
301 * 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||
302 * (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
303 */
304 return (pipeline->kill_pixel && (pipeline->writes_depth ||
305 pipeline->writes_stencil)) ||
306 wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
307 }
308
309 static inline bool
310 want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer)
311 {
312 assert(GEN_GEN == 9);
313
314 /* From the Skylake PRM Vol. 2c CACHE_MODE_1::STC PMA Optimization Enable:
315 *
316 * Clearing this bit will force the STC cache to wait for pending
317 * retirement of pixels at the HZ-read stage and do the STC-test for
318 * Non-promoted, R-computed and Computed depth modes instead of
319 * postponing the STC-test to RCPFE.
320 *
321 * STC_TEST_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
322 * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
323 *
324 * STC_WRITE_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
325 * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
326 * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
327 *
328 * COMP_STC_EN = STC_TEST_EN &&
329 * 3DSTATE_PS_EXTRA::PixelShaderComputesStencil
330 *
331 * SW parses the pipeline states to generate the following logical
332 * signal indicating if PMA FIX can be enabled.
333 *
334 * STC_PMA_OPT =
335 * 3DSTATE_WM::ForceThreadDispatch != 1 &&
336 * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
337 * 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
338 * 3DSTATE_DEPTH_BUFFER::HIZ Enable &&
339 * !(3DSTATE_WM::EDSC_Mode == 2) &&
340 * 3DSTATE_PS_EXTRA::PixelShaderValid &&
341 * !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
342 * 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
343 * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
344 * 3DSTATE_WM_HZ_OP::StencilBufferClear) &&
345 * (COMP_STC_EN || STC_WRITE_EN) &&
346 * ((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
347 * 3DSTATE_WM::ForceKillPix == ON ||
348 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
349 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
350 * 3DSTATE_PS_BLEND::AlphaTestEnable ||
351 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
352 * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
353 */
354
355 /* These are always true:
356 * 3DSTATE_WM::ForceThreadDispatch != 1 &&
357 * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
358 */
359
360 /* We only enable the PMA fix if we know for certain that HiZ is enabled.
361 * If we don't know whether HiZ is enabled or not, we disable the PMA fix
362 * and there is no harm.
363 *
364 * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
365 * 3DSTATE_DEPTH_BUFFER::HIZ Enable
366 */
367 if (!cmd_buffer->state.hiz_enabled)
368 return false;
369
370 /* We can't possibly know if HiZ is enabled without the framebuffer */
371 assert(cmd_buffer->state.framebuffer);
372
373 /* HiZ is enabled so we had better have a depth buffer with HiZ */
374 const struct anv_image_view *ds_iview =
375 anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
376 assert(ds_iview && ds_iview->image->aux_usage == ISL_AUX_USAGE_HIZ);
377
378 /* 3DSTATE_PS_EXTRA::PixelShaderValid */
379 struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
380 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
381 return false;
382
383 /* !(3DSTATE_WM::EDSC_Mode == 2) */
384 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
385 if (wm_prog_data->early_fragment_tests)
386 return false;
387
388 /* We never use anv_pipeline for HiZ ops so this is trivially true:
389 * !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
390 * 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
391 * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
392 * 3DSTATE_WM_HZ_OP::StencilBufferClear)
393 */
394
395 /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
396 * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
397 */
398 const bool stc_test_en =
399 (ds_iview->image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
400 pipeline->stencil_test_enable;
401
402 /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
403 * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
404 * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
405 */
406 const bool stc_write_en =
407 (ds_iview->image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
408 pipeline->writes_stencil;
409
410 /* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */
411 const bool comp_stc_en = stc_test_en && wm_prog_data->computed_stencil;
412
413 /* COMP_STC_EN || STC_WRITE_EN */
414 if (!(comp_stc_en || stc_write_en))
415 return false;
416
417 /* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
418 * 3DSTATE_WM::ForceKillPix == ON ||
419 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
420 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
421 * 3DSTATE_PS_BLEND::AlphaTestEnable ||
422 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
423 * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF)
424 */
425 return pipeline->kill_pixel ||
426 wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
427 }
428
429 void
430 genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
431 {
432 struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
433
434 if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
435 ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) {
436 __emit_sf_state(cmd_buffer);
437 }
438
439 if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
440 ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)){
441 uint32_t raster_dw[GENX(3DSTATE_RASTER_length)];
442 struct GENX(3DSTATE_RASTER) raster = {
443 GENX(3DSTATE_RASTER_header),
444 .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias,
445 .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope,
446 .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp
447 };
448 GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster);
449 anv_batch_emit_merge(&cmd_buffer->batch, raster_dw,
450 pipeline->gen8.raster);
451 }
452
453 /* Stencil reference values moved from COLOR_CALC_STATE in gen8 to
454 * 3DSTATE_WM_DEPTH_STENCIL in gen9. That means the dirty bits gets split
455 * across different state packets for gen8 and gen9. We handle that by
456 * using a big old #if switch here.
457 */
458 #if GEN_GEN == 8
459 if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS |
460 ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) {
461 struct anv_dynamic_state *d = &cmd_buffer->state.dynamic;
462 struct anv_state cc_state =
463 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
464 GENX(COLOR_CALC_STATE_length) * 4,
465 64);
466 struct GENX(COLOR_CALC_STATE) cc = {
467 .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0],
468 .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1],
469 .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2],
470 .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3],
471 .StencilReferenceValue = d->stencil_reference.front & 0xff,
472 .BackFaceStencilReferenceValue = d->stencil_reference.back & 0xff,
473 };
474 GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
475
476 if (!cmd_buffer->device->info.has_llc)
477 anv_state_flush(cc_state);
478
479 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
480 ccp.ColorCalcStatePointer = cc_state.offset;
481 ccp.ColorCalcStatePointerValid = true;
482 }
483 }
484
485 if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
486 ANV_CMD_DIRTY_RENDER_TARGETS |
487 ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
488 ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) {
489 uint32_t wm_depth_stencil_dw[GENX(3DSTATE_WM_DEPTH_STENCIL_length)];
490 struct anv_dynamic_state *d = &cmd_buffer->state.dynamic;
491
492 struct GENX(3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil) = {
493 GENX(3DSTATE_WM_DEPTH_STENCIL_header),
494
495 .StencilTestMask = d->stencil_compare_mask.front & 0xff,
496 .StencilWriteMask = d->stencil_write_mask.front & 0xff,
497
498 .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff,
499 .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff,
500
501 .StencilBufferWriteEnable =
502 (d->stencil_write_mask.front || d->stencil_write_mask.back) &&
503 pipeline->writes_stencil,
504 };
505 GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, wm_depth_stencil_dw,
506 &wm_depth_stencil);
507
508 anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw,
509 pipeline->gen8.wm_depth_stencil);
510
511 genX(cmd_buffer_enable_pma_fix)(cmd_buffer,
512 want_depth_pma_fix(cmd_buffer));
513 }
514 #else
515 if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) {
516 struct anv_state cc_state =
517 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
518 GEN9_COLOR_CALC_STATE_length * 4,
519 64);
520 struct GEN9_COLOR_CALC_STATE cc = {
521 .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0],
522 .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1],
523 .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2],
524 .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3],
525 };
526 GEN9_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc);
527
528 if (!cmd_buffer->device->info.has_llc)
529 anv_state_flush(cc_state);
530
531 anv_batch_emit(&cmd_buffer->batch, GEN9_3DSTATE_CC_STATE_POINTERS, ccp) {
532 ccp.ColorCalcStatePointer = cc_state.offset;
533 ccp.ColorCalcStatePointerValid = true;
534 }
535 }
536
537 if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
538 ANV_CMD_DIRTY_RENDER_TARGETS |
539 ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
540 ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |
541 ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) {
542 uint32_t dwords[GEN9_3DSTATE_WM_DEPTH_STENCIL_length];
543 struct anv_dynamic_state *d = &cmd_buffer->state.dynamic;
544 struct GEN9_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = {
545 GEN9_3DSTATE_WM_DEPTH_STENCIL_header,
546
547 .StencilTestMask = d->stencil_compare_mask.front & 0xff,
548 .StencilWriteMask = d->stencil_write_mask.front & 0xff,
549
550 .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff,
551 .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff,
552
553 .StencilReferenceValue = d->stencil_reference.front & 0xff,
554 .BackfaceStencilReferenceValue = d->stencil_reference.back & 0xff,
555
556 .StencilBufferWriteEnable =
557 (d->stencil_write_mask.front || d->stencil_write_mask.back) &&
558 pipeline->writes_stencil,
559 };
560 GEN9_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, dwords, &wm_depth_stencil);
561
562 anv_batch_emit_merge(&cmd_buffer->batch, dwords,
563 pipeline->gen9.wm_depth_stencil);
564
565 genX(cmd_buffer_enable_pma_fix)(cmd_buffer,
566 want_stencil_pma_fix(cmd_buffer));
567 }
568 #endif
569
570 if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
571 ANV_CMD_DIRTY_INDEX_BUFFER)) {
572 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) {
573 vf.IndexedDrawCutIndexEnable = pipeline->primitive_restart;
574 vf.CutIndex = cmd_buffer->state.restart_index;
575 }
576 }
577
578 cmd_buffer->state.dirty = 0;
579 }
580
581 void genX(CmdBindIndexBuffer)(
582 VkCommandBuffer commandBuffer,
583 VkBuffer _buffer,
584 VkDeviceSize offset,
585 VkIndexType indexType)
586 {
587 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
588 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
589
590 static const uint32_t vk_to_gen_index_type[] = {
591 [VK_INDEX_TYPE_UINT16] = INDEX_WORD,
592 [VK_INDEX_TYPE_UINT32] = INDEX_DWORD,
593 };
594
595 static const uint32_t restart_index_for_type[] = {
596 [VK_INDEX_TYPE_UINT16] = UINT16_MAX,
597 [VK_INDEX_TYPE_UINT32] = UINT32_MAX,
598 };
599
600 cmd_buffer->state.restart_index = restart_index_for_type[indexType];
601
602 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
603 ib.IndexFormat = vk_to_gen_index_type[indexType];
604 ib.MemoryObjectControlState = GENX(MOCS);
605 ib.BufferStartingAddress =
606 (struct anv_address) { buffer->bo, buffer->offset + offset };
607 ib.BufferSize = buffer->size - offset;
608 }
609
610 cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER;
611 }
612
613 /* Set of stage bits for which are pipelined, i.e. they get queued by the
614 * command streamer for later execution.
615 */
616 #define ANV_PIPELINE_STAGE_PIPELINED_BITS \
617 (VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | \
618 VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | \
619 VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | \
620 VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | \
621 VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | \
622 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | \
623 VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | \
624 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | \
625 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | \
626 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | \
627 VK_PIPELINE_STAGE_TRANSFER_BIT | \
628 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | \
629 VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | \
630 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)
631
632 void genX(CmdSetEvent)(
633 VkCommandBuffer commandBuffer,
634 VkEvent _event,
635 VkPipelineStageFlags stageMask)
636 {
637 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
638 ANV_FROM_HANDLE(anv_event, event, _event);
639
640 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
641 if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
642 pc.StallAtPixelScoreboard = true;
643 pc.CommandStreamerStallEnable = true;
644 }
645
646 pc.DestinationAddressType = DAT_PPGTT,
647 pc.PostSyncOperation = WriteImmediateData,
648 pc.Address = (struct anv_address) {
649 &cmd_buffer->device->dynamic_state_block_pool.bo,
650 event->state.offset
651 };
652 pc.ImmediateData = VK_EVENT_SET;
653 }
654 }
655
656 void genX(CmdResetEvent)(
657 VkCommandBuffer commandBuffer,
658 VkEvent _event,
659 VkPipelineStageFlags stageMask)
660 {
661 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
662 ANV_FROM_HANDLE(anv_event, event, _event);
663
664 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
665 if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
666 pc.StallAtPixelScoreboard = true;
667 pc.CommandStreamerStallEnable = true;
668 }
669
670 pc.DestinationAddressType = DAT_PPGTT;
671 pc.PostSyncOperation = WriteImmediateData;
672 pc.Address = (struct anv_address) {
673 &cmd_buffer->device->dynamic_state_block_pool.bo,
674 event->state.offset
675 };
676 pc.ImmediateData = VK_EVENT_RESET;
677 }
678 }
679
680 void genX(CmdWaitEvents)(
681 VkCommandBuffer commandBuffer,
682 uint32_t eventCount,
683 const VkEvent* pEvents,
684 VkPipelineStageFlags srcStageMask,
685 VkPipelineStageFlags destStageMask,
686 uint32_t memoryBarrierCount,
687 const VkMemoryBarrier* pMemoryBarriers,
688 uint32_t bufferMemoryBarrierCount,
689 const VkBufferMemoryBarrier* pBufferMemoryBarriers,
690 uint32_t imageMemoryBarrierCount,
691 const VkImageMemoryBarrier* pImageMemoryBarriers)
692 {
693 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
694 for (uint32_t i = 0; i < eventCount; i++) {
695 ANV_FROM_HANDLE(anv_event, event, pEvents[i]);
696
697 anv_batch_emit(&cmd_buffer->batch, GENX(MI_SEMAPHORE_WAIT), sem) {
698 sem.WaitMode = PollingMode,
699 sem.CompareOperation = COMPARE_SAD_EQUAL_SDD,
700 sem.SemaphoreDataDword = VK_EVENT_SET,
701 sem.SemaphoreAddress = (struct anv_address) {
702 &cmd_buffer->device->dynamic_state_block_pool.bo,
703 event->state.offset
704 };
705 }
706 }
707
708 genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask,
709 false, /* byRegion */
710 memoryBarrierCount, pMemoryBarriers,
711 bufferMemoryBarrierCount, pBufferMemoryBarriers,
712 imageMemoryBarrierCount, pImageMemoryBarriers);
713 }