2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #include "anv_private.h"
34 gen7_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer
*cmd_buffer
)
36 struct anv_device
*device
= cmd_buffer
->device
;
37 struct anv_bo
*scratch_bo
= NULL
;
39 cmd_buffer
->state
.scratch_size
=
40 anv_block_pool_size(&device
->scratch_block_pool
);
41 if (cmd_buffer
->state
.scratch_size
> 0)
42 scratch_bo
= &device
->scratch_block_pool
.bo
;
44 anv_batch_emit(&cmd_buffer
->batch
, GEN7_STATE_BASE_ADDRESS
,
45 .GeneralStateBaseAddress
= { scratch_bo
, 0 },
46 .GeneralStateMemoryObjectControlState
= GEN7_MOCS
,
47 .GeneralStateBaseAddressModifyEnable
= true,
48 .GeneralStateAccessUpperBound
= { scratch_bo
, scratch_bo
->size
},
49 .GeneralStateAccessUpperBoundModifyEnable
= true,
51 .SurfaceStateBaseAddress
= { anv_cmd_buffer_current_surface_bo(cmd_buffer
), 0 },
52 .SurfaceStateMemoryObjectControlState
= GEN7_MOCS
,
53 .SurfaceStateBaseAddressModifyEnable
= true,
55 .DynamicStateBaseAddress
= { &device
->dynamic_state_block_pool
.bo
, 0 },
56 .DynamicStateMemoryObjectControlState
= GEN7_MOCS
,
57 .DynamicStateBaseAddressModifyEnable
= true,
58 .DynamicStateAccessUpperBound
= { &device
->dynamic_state_block_pool
.bo
,
59 device
->dynamic_state_block_pool
.bo
.size
},
60 .DynamicStateAccessUpperBoundModifyEnable
= true,
62 .IndirectObjectBaseAddress
= { NULL
, 0 },
63 .IndirectObjectMemoryObjectControlState
= GEN7_MOCS
,
64 .IndirectObjectBaseAddressModifyEnable
= true,
66 .IndirectObjectAccessUpperBound
= { NULL
, 0xffffffff },
67 .IndirectObjectAccessUpperBoundModifyEnable
= true,
69 .InstructionBaseAddress
= { &device
->instruction_block_pool
.bo
, 0 },
70 .InstructionMemoryObjectControlState
= GEN7_MOCS
,
71 .InstructionBaseAddressModifyEnable
= true,
72 .InstructionAccessUpperBound
= { &device
->instruction_block_pool
.bo
,
73 device
->instruction_block_pool
.bo
.size
},
74 .InstructionAccessUpperBoundModifyEnable
= true);
76 /* After re-setting the surface state base address, we have to do some
77 * cache flusing so that the sampler engine will pick up the new
78 * SURFACE_STATE objects and binding tables. From the Broadwell PRM,
79 * Shared Function > 3D Sampler > State > State Caching (page 96):
81 * Coherency with system memory in the state cache, like the texture
82 * cache is handled partially by software. It is expected that the
83 * command stream or shader will issue Cache Flush operation or
84 * Cache_Flush sampler message to ensure that the L1 cache remains
85 * coherent with system memory.
89 * Whenever the value of the Dynamic_State_Base_Addr,
90 * Surface_State_Base_Addr are altered, the L1 state cache must be
91 * invalidated to ensure the new surface or sampler state is fetched
94 * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit
95 * which, according the PIPE_CONTROL instruction documentation in the
98 * Setting this bit is independent of any other bit in this packet.
99 * This bit controls the invalidation of the L1 and L2 state caches
100 * at the top of the pipe i.e. at the parsing time.
102 * Unfortunately, experimentation seems to indicate that state cache
103 * invalidation through a PIPE_CONTROL does nothing whatsoever in
104 * regards to surface state and binding tables. In stead, it seems that
105 * invalidating the texture cache is what is actually needed.
107 * XXX: As far as we have been able to determine through
108 * experimentation, shows that flush the texture cache appears to be
109 * sufficient. The theory here is that all of the sampling/rendering
110 * units cache the binding table in the texture cache. However, we have
111 * yet to be able to actually confirm this.
113 anv_batch_emit(&cmd_buffer
->batch
, GEN7_PIPE_CONTROL
,
114 .TextureCacheInvalidationEnable
= true);
117 static const uint32_t vk_to_gen_index_type
[] = {
118 [VK_INDEX_TYPE_UINT16
] = INDEX_WORD
,
119 [VK_INDEX_TYPE_UINT32
] = INDEX_DWORD
,
122 void gen7_CmdBindIndexBuffer(
123 VkCmdBuffer cmdBuffer
,
126 VkIndexType indexType
)
128 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
129 ANV_FROM_HANDLE(anv_buffer
, buffer
, _buffer
);
131 cmd_buffer
->state
.dirty
|= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY
;
132 cmd_buffer
->state
.gen7
.index_buffer
= buffer
;
133 cmd_buffer
->state
.gen7
.index_type
= vk_to_gen_index_type
[indexType
];
134 cmd_buffer
->state
.gen7
.index_offset
= offset
;
138 gen7_flush_compute_descriptor_set(struct anv_cmd_buffer
*cmd_buffer
)
140 struct anv_device
*device
= cmd_buffer
->device
;
141 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.compute_pipeline
;
142 struct anv_state surfaces
= { 0, }, samplers
= { 0, };
145 result
= anv_cmd_buffer_emit_samplers(cmd_buffer
,
146 VK_SHADER_STAGE_COMPUTE
, &samplers
);
147 if (result
!= VK_SUCCESS
)
149 result
= anv_cmd_buffer_emit_binding_table(cmd_buffer
,
150 VK_SHADER_STAGE_COMPUTE
, &surfaces
);
151 if (result
!= VK_SUCCESS
)
154 struct GEN7_INTERFACE_DESCRIPTOR_DATA desc
= {
155 .KernelStartPointer
= pipeline
->cs_simd
,
156 .BindingTablePointer
= surfaces
.offset
,
157 .SamplerStatePointer
= samplers
.offset
,
158 .NumberofThreadsinGPGPUThreadGroup
= 0 /* FIXME: Really? */
161 uint32_t size
= GEN7_INTERFACE_DESCRIPTOR_DATA_length
* sizeof(uint32_t);
162 struct anv_state state
=
163 anv_state_pool_alloc(&device
->dynamic_state_pool
, size
, 64);
165 GEN7_INTERFACE_DESCRIPTOR_DATA_pack(NULL
, state
.map
, &desc
);
167 anv_batch_emit(&cmd_buffer
->batch
, GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD
,
168 .InterfaceDescriptorTotalLength
= size
,
169 .InterfaceDescriptorDataStartAddress
= state
.offset
);
175 gen7_cmd_buffer_flush_compute_state(struct anv_cmd_buffer
*cmd_buffer
)
177 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.compute_pipeline
;
180 assert(pipeline
->active_stages
== VK_SHADER_STAGE_COMPUTE_BIT
);
182 if (cmd_buffer
->state
.current_pipeline
!= GPGPU
) {
183 anv_batch_emit(&cmd_buffer
->batch
, GEN7_PIPELINE_SELECT
,
184 .PipelineSelection
= GPGPU
);
185 cmd_buffer
->state
.current_pipeline
= GPGPU
;
188 if (cmd_buffer
->state
.compute_dirty
& ANV_CMD_BUFFER_PIPELINE_DIRTY
)
189 anv_batch_emit_batch(&cmd_buffer
->batch
, &pipeline
->batch
);
191 if ((cmd_buffer
->state
.descriptors_dirty
& VK_SHADER_STAGE_COMPUTE_BIT
) ||
192 (cmd_buffer
->state
.compute_dirty
& ANV_CMD_BUFFER_PIPELINE_DIRTY
)) {
193 /* FIXME: figure out descriptors for gen7 */
194 result
= gen7_flush_compute_descriptor_set(cmd_buffer
);
195 assert(result
== VK_SUCCESS
);
196 cmd_buffer
->state
.descriptors_dirty
&= ~VK_SHADER_STAGE_COMPUTE
;
199 cmd_buffer
->state
.compute_dirty
= 0;
203 gen7_cmd_buffer_flush_state(struct anv_cmd_buffer
*cmd_buffer
)
205 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.pipeline
;
208 uint32_t vb_emit
= cmd_buffer
->state
.vb_dirty
& pipeline
->vb_used
;
210 assert((pipeline
->active_stages
& VK_SHADER_STAGE_COMPUTE_BIT
) == 0);
212 if (cmd_buffer
->state
.current_pipeline
!= _3D
) {
213 anv_batch_emit(&cmd_buffer
->batch
, GEN7_PIPELINE_SELECT
,
214 .PipelineSelection
= _3D
);
215 cmd_buffer
->state
.current_pipeline
= _3D
;
219 const uint32_t num_buffers
= __builtin_popcount(vb_emit
);
220 const uint32_t num_dwords
= 1 + num_buffers
* 4;
222 p
= anv_batch_emitn(&cmd_buffer
->batch
, num_dwords
,
223 GEN7_3DSTATE_VERTEX_BUFFERS
);
225 for_each_bit(vb
, vb_emit
) {
226 struct anv_buffer
*buffer
= cmd_buffer
->state
.vertex_bindings
[vb
].buffer
;
227 uint32_t offset
= cmd_buffer
->state
.vertex_bindings
[vb
].offset
;
229 struct GEN7_VERTEX_BUFFER_STATE state
= {
230 .VertexBufferIndex
= vb
,
231 .BufferAccessType
= pipeline
->instancing_enable
[vb
] ? INSTANCEDATA
: VERTEXDATA
,
232 .VertexBufferMemoryObjectControlState
= GEN7_MOCS
,
233 .AddressModifyEnable
= true,
234 .BufferPitch
= pipeline
->binding_stride
[vb
],
235 .BufferStartingAddress
= { buffer
->bo
, buffer
->offset
+ offset
},
236 .EndAddress
= { buffer
->bo
, buffer
->offset
+ buffer
->size
- 1},
237 .InstanceDataStepRate
= 1
240 GEN7_VERTEX_BUFFER_STATE_pack(&cmd_buffer
->batch
, &p
[1 + i
* 4], &state
);
245 if (cmd_buffer
->state
.dirty
& ANV_CMD_BUFFER_PIPELINE_DIRTY
) {
246 /* If somebody compiled a pipeline after starting a command buffer the
247 * scratch bo may have grown since we started this cmd buffer (and
248 * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now,
249 * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
250 if (cmd_buffer
->state
.scratch_size
< pipeline
->total_scratch
)
251 gen7_cmd_buffer_emit_state_base_address(cmd_buffer
);
253 anv_batch_emit_batch(&cmd_buffer
->batch
, &pipeline
->batch
);
256 if (cmd_buffer
->state
.descriptors_dirty
)
257 anv_flush_descriptor_sets(cmd_buffer
);
259 if (cmd_buffer
->state
.dirty
& ANV_CMD_BUFFER_VP_DIRTY
) {
260 struct anv_dynamic_vp_state
*vp_state
= cmd_buffer
->state
.vp_state
;
261 anv_batch_emit(&cmd_buffer
->batch
, GEN7_3DSTATE_SCISSOR_STATE_POINTERS
,
262 .ScissorRectPointer
= vp_state
->scissor
.offset
);
263 anv_batch_emit(&cmd_buffer
->batch
, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC
,
264 .CCViewportPointer
= vp_state
->cc_vp
.offset
);
265 anv_batch_emit(&cmd_buffer
->batch
, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP
,
266 .SFClipViewportPointer
= vp_state
->sf_clip_vp
.offset
);
269 if (cmd_buffer
->state
.dirty
&
270 (ANV_CMD_BUFFER_PIPELINE_DIRTY
| ANV_CMD_BUFFER_RS_DIRTY
)) {
271 anv_batch_emit_merge(&cmd_buffer
->batch
,
272 cmd_buffer
->state
.rs_state
->gen7
.sf
,
276 if (cmd_buffer
->state
.dirty
&
277 (ANV_CMD_BUFFER_PIPELINE_DIRTY
| ANV_CMD_BUFFER_DS_DIRTY
)) {
278 struct anv_state state
;
280 if (cmd_buffer
->state
.ds_state
== NULL
)
281 state
= anv_cmd_buffer_emit_dynamic(cmd_buffer
,
282 pipeline
->gen7
.depth_stencil_state
,
283 GEN7_COLOR_CALC_STATE_length
, 64);
285 state
= anv_cmd_buffer_merge_dynamic(cmd_buffer
,
286 cmd_buffer
->state
.ds_state
->gen7
.depth_stencil_state
,
287 pipeline
->gen7
.depth_stencil_state
,
288 GEN7_DEPTH_STENCIL_STATE_length
, 64);
289 anv_batch_emit(&cmd_buffer
->batch
, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS
,
290 .PointertoDEPTH_STENCIL_STATE
= state
.offset
);
293 if (cmd_buffer
->state
.dirty
&
294 (ANV_CMD_BUFFER_CB_DIRTY
| ANV_CMD_BUFFER_DS_DIRTY
)) {
295 struct anv_state state
;
296 if (cmd_buffer
->state
.ds_state
== NULL
)
297 state
= anv_cmd_buffer_emit_dynamic(cmd_buffer
,
298 cmd_buffer
->state
.cb_state
->color_calc_state
,
299 GEN7_COLOR_CALC_STATE_length
, 64);
300 else if (cmd_buffer
->state
.cb_state
== NULL
)
301 state
= anv_cmd_buffer_emit_dynamic(cmd_buffer
,
302 cmd_buffer
->state
.ds_state
->gen7
.color_calc_state
,
303 GEN7_COLOR_CALC_STATE_length
, 64);
305 state
= anv_cmd_buffer_merge_dynamic(cmd_buffer
,
306 cmd_buffer
->state
.ds_state
->gen7
.color_calc_state
,
307 cmd_buffer
->state
.cb_state
->color_calc_state
,
308 GEN7_COLOR_CALC_STATE_length
, 64);
310 anv_batch_emit(&cmd_buffer
->batch
,
311 GEN7_3DSTATE_CC_STATE_POINTERS
,
312 .ColorCalcStatePointer
= state
.offset
);
315 if (cmd_buffer
->state
.gen7
.index_buffer
&&
316 cmd_buffer
->state
.dirty
& (ANV_CMD_BUFFER_PIPELINE_DIRTY
|
317 ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY
)) {
318 struct anv_buffer
*buffer
= cmd_buffer
->state
.gen7
.index_buffer
;
319 uint32_t offset
= cmd_buffer
->state
.gen7
.index_offset
;
321 anv_batch_emit(&cmd_buffer
->batch
, GEN7_3DSTATE_INDEX_BUFFER
,
322 .CutIndexEnable
= pipeline
->primitive_restart
,
323 .IndexFormat
= cmd_buffer
->state
.gen7
.index_type
,
324 .MemoryObjectControlState
= GEN7_MOCS
,
325 .BufferStartingAddress
= { buffer
->bo
, buffer
->offset
+ offset
},
326 .BufferEndingAddress
= { buffer
->bo
, buffer
->offset
+ buffer
->size
});
329 cmd_buffer
->state
.vb_dirty
&= ~vb_emit
;
330 cmd_buffer
->state
.dirty
= 0;
334 VkCmdBuffer cmdBuffer
,
335 uint32_t firstVertex
,
336 uint32_t vertexCount
,
337 uint32_t firstInstance
,
338 uint32_t instanceCount
)
340 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
341 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.pipeline
;
343 gen7_cmd_buffer_flush_state(cmd_buffer
);
345 anv_batch_emit(&cmd_buffer
->batch
, GEN7_3DPRIMITIVE
,
346 .VertexAccessType
= SEQUENTIAL
,
347 .PrimitiveTopologyType
= pipeline
->topology
,
348 .VertexCountPerInstance
= vertexCount
,
349 .StartVertexLocation
= firstVertex
,
350 .InstanceCount
= instanceCount
,
351 .StartInstanceLocation
= firstInstance
,
352 .BaseVertexLocation
= 0);
355 void gen7_CmdDrawIndexed(
356 VkCmdBuffer cmdBuffer
,
359 int32_t vertexOffset
,
360 uint32_t firstInstance
,
361 uint32_t instanceCount
)
363 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
364 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.pipeline
;
366 gen7_cmd_buffer_flush_state(cmd_buffer
);
368 anv_batch_emit(&cmd_buffer
->batch
, GEN7_3DPRIMITIVE
,
369 .VertexAccessType
= RANDOM
,
370 .PrimitiveTopologyType
= pipeline
->topology
,
371 .VertexCountPerInstance
= indexCount
,
372 .StartVertexLocation
= firstIndex
,
373 .InstanceCount
= instanceCount
,
374 .StartInstanceLocation
= firstInstance
,
375 .BaseVertexLocation
= vertexOffset
);
379 gen7_batch_lrm(struct anv_batch
*batch
,
380 uint32_t reg
, struct anv_bo
*bo
, uint32_t offset
)
382 anv_batch_emit(batch
, GEN7_MI_LOAD_REGISTER_MEM
,
383 .RegisterAddress
= reg
,
384 .MemoryAddress
= { bo
, offset
});
388 gen7_batch_lri(struct anv_batch
*batch
, uint32_t reg
, uint32_t imm
)
390 anv_batch_emit(batch
, GEN7_MI_LOAD_REGISTER_IMM
,
391 .RegisterOffset
= reg
,
395 /* Auto-Draw / Indirect Registers */
396 #define GEN7_3DPRIM_END_OFFSET 0x2420
397 #define GEN7_3DPRIM_START_VERTEX 0x2430
398 #define GEN7_3DPRIM_VERTEX_COUNT 0x2434
399 #define GEN7_3DPRIM_INSTANCE_COUNT 0x2438
400 #define GEN7_3DPRIM_START_INSTANCE 0x243C
401 #define GEN7_3DPRIM_BASE_VERTEX 0x2440
403 void gen7_CmdDrawIndirect(
404 VkCmdBuffer cmdBuffer
,
410 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
411 ANV_FROM_HANDLE(anv_buffer
, buffer
, _buffer
);
412 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.pipeline
;
413 struct anv_bo
*bo
= buffer
->bo
;
414 uint32_t bo_offset
= buffer
->offset
+ offset
;
416 gen7_cmd_buffer_flush_state(cmd_buffer
);
418 gen7_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_VERTEX_COUNT
, bo
, bo_offset
);
419 gen7_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_INSTANCE_COUNT
, bo
, bo_offset
+ 4);
420 gen7_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_START_VERTEX
, bo
, bo_offset
+ 8);
421 gen7_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_START_INSTANCE
, bo
, bo_offset
+ 12);
422 gen7_batch_lri(&cmd_buffer
->batch
, GEN7_3DPRIM_BASE_VERTEX
, 0);
424 anv_batch_emit(&cmd_buffer
->batch
, GEN7_3DPRIMITIVE
,
425 .IndirectParameterEnable
= true,
426 .VertexAccessType
= SEQUENTIAL
,
427 .PrimitiveTopologyType
= pipeline
->topology
);
430 void gen7_CmdDrawIndexedIndirect(
431 VkCmdBuffer cmdBuffer
,
437 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
438 ANV_FROM_HANDLE(anv_buffer
, buffer
, _buffer
);
439 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.pipeline
;
440 struct anv_bo
*bo
= buffer
->bo
;
441 uint32_t bo_offset
= buffer
->offset
+ offset
;
443 gen7_cmd_buffer_flush_state(cmd_buffer
);
445 gen7_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_VERTEX_COUNT
, bo
, bo_offset
);
446 gen7_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_INSTANCE_COUNT
, bo
, bo_offset
+ 4);
447 gen7_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_START_VERTEX
, bo
, bo_offset
+ 8);
448 gen7_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_BASE_VERTEX
, bo
, bo_offset
+ 12);
449 gen7_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_START_INSTANCE
, bo
, bo_offset
+ 16);
451 anv_batch_emit(&cmd_buffer
->batch
, GEN7_3DPRIMITIVE
,
452 .IndirectParameterEnable
= true,
453 .VertexAccessType
= RANDOM
,
454 .PrimitiveTopologyType
= pipeline
->topology
);
457 void gen7_CmdDispatch(
458 VkCmdBuffer cmdBuffer
,
463 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
464 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.compute_pipeline
;
465 struct brw_cs_prog_data
*prog_data
= &pipeline
->cs_prog_data
;
467 gen7_cmd_buffer_flush_compute_state(cmd_buffer
);
469 anv_batch_emit(&cmd_buffer
->batch
, GEN7_GPGPU_WALKER
,
470 .SIMDSize
= prog_data
->simd_size
/ 16,
471 .ThreadDepthCounterMaximum
= 0,
472 .ThreadHeightCounterMaximum
= 0,
473 .ThreadWidthCounterMaximum
= pipeline
->cs_thread_width_max
,
474 .ThreadGroupIDXDimension
= x
,
475 .ThreadGroupIDYDimension
= y
,
476 .ThreadGroupIDZDimension
= z
,
477 .RightExecutionMask
= pipeline
->cs_right_mask
,
478 .BottomExecutionMask
= 0xffffffff);
480 anv_batch_emit(&cmd_buffer
->batch
, GEN7_MEDIA_STATE_FLUSH
);
483 #define GPGPU_DISPATCHDIMX 0x2500
484 #define GPGPU_DISPATCHDIMY 0x2504
485 #define GPGPU_DISPATCHDIMZ 0x2508
487 void gen7_CmdDispatchIndirect(
488 VkCmdBuffer cmdBuffer
,
492 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
493 ANV_FROM_HANDLE(anv_buffer
, buffer
, _buffer
);
494 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.compute_pipeline
;
495 struct brw_cs_prog_data
*prog_data
= &pipeline
->cs_prog_data
;
496 struct anv_bo
*bo
= buffer
->bo
;
497 uint32_t bo_offset
= buffer
->offset
+ offset
;
499 gen7_cmd_buffer_flush_compute_state(cmd_buffer
);
501 gen7_batch_lrm(&cmd_buffer
->batch
, GPGPU_DISPATCHDIMX
, bo
, bo_offset
);
502 gen7_batch_lrm(&cmd_buffer
->batch
, GPGPU_DISPATCHDIMY
, bo
, bo_offset
+ 4);
503 gen7_batch_lrm(&cmd_buffer
->batch
, GPGPU_DISPATCHDIMZ
, bo
, bo_offset
+ 8);
505 anv_batch_emit(&cmd_buffer
->batch
, GEN7_GPGPU_WALKER
,
506 .IndirectParameterEnable
= true,
507 .SIMDSize
= prog_data
->simd_size
/ 16,
508 .ThreadDepthCounterMaximum
= 0,
509 .ThreadHeightCounterMaximum
= 0,
510 .ThreadWidthCounterMaximum
= pipeline
->cs_thread_width_max
,
511 .RightExecutionMask
= pipeline
->cs_right_mask
,
512 .BottomExecutionMask
= 0xffffffff);
514 anv_batch_emit(&cmd_buffer
->batch
, GEN7_MEDIA_STATE_FLUSH
);
517 void gen7_CmdPipelineBarrier(
518 VkCmdBuffer cmdBuffer
,
519 VkPipelineStageFlags srcStageMask
,
520 VkPipelineStageFlags destStageMask
,
522 uint32_t memBarrierCount
,
523 const void* const* ppMemBarriers
)
529 gen7_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer
*cmd_buffer
)
531 struct anv_subpass
*subpass
= cmd_buffer
->state
.subpass
;
532 struct anv_framebuffer
*fb
= cmd_buffer
->state
.framebuffer
;
533 const struct anv_depth_stencil_view
*view
= NULL
;
535 if (subpass
->depth_stencil_attachment
!= VK_ATTACHMENT_UNUSED
) {
536 const struct anv_attachment_view
*aview
=
537 fb
->attachments
[subpass
->depth_stencil_attachment
];
538 assert(aview
->attachment_type
== ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL
);
539 view
= (const struct anv_depth_stencil_view
*)aview
;
542 const struct anv_image
*image
= view
? view
->image
: NULL
;
543 const bool has_depth
= view
&& view
->format
->depth_format
;
544 const bool has_stencil
= view
&& view
->format
->has_stencil
;
546 /* Emit 3DSTATE_DEPTH_BUFFER */
548 anv_batch_emit(&cmd_buffer
->batch
, GEN7_3DSTATE_DEPTH_BUFFER
,
549 .SurfaceType
= SURFTYPE_2D
,
550 .DepthWriteEnable
= view
->format
->depth_format
,
551 .StencilWriteEnable
= has_stencil
,
552 .HierarchicalDepthBufferEnable
= false,
553 .SurfaceFormat
= view
->format
->depth_format
,
554 .SurfacePitch
= image
->depth_surface
.stride
- 1,
555 .SurfaceBaseAddress
= {
557 .offset
= image
->depth_surface
.offset
,
559 .Height
= fb
->height
- 1,
560 .Width
= fb
->width
- 1,
563 .MinimumArrayElement
= 0,
564 .DepthBufferObjectControlState
= GEN7_MOCS
,
565 .RenderTargetViewExtent
= 1 - 1);
567 /* Even when no depth buffer is present, the hardware requires that
568 * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says:
570 * If a null depth buffer is bound, the driver must instead bind depth as:
571 * 3DSTATE_DEPTH.SurfaceType = SURFTYPE_2D
572 * 3DSTATE_DEPTH.Width = 1
573 * 3DSTATE_DEPTH.Height = 1
574 * 3DSTATE_DEPTH.SuraceFormat = D16_UNORM
575 * 3DSTATE_DEPTH.SurfaceBaseAddress = 0
576 * 3DSTATE_DEPTH.HierarchicalDepthBufferEnable = 0
577 * 3DSTATE_WM_DEPTH_STENCIL.DepthTestEnable = 0
578 * 3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0
580 * The PRM is wrong, though. The width and height must be programmed to
581 * actual framebuffer's width and height, even when neither depth buffer
582 * nor stencil buffer is present.
584 anv_batch_emit(&cmd_buffer
->batch
, GEN7_3DSTATE_DEPTH_BUFFER
,
585 .SurfaceType
= SURFTYPE_2D
,
586 .SurfaceFormat
= D16_UNORM
,
587 .Width
= fb
->width
- 1,
588 .Height
= fb
->height
- 1,
589 .StencilWriteEnable
= has_stencil
);
592 /* Emit 3DSTATE_STENCIL_BUFFER */
594 anv_batch_emit(&cmd_buffer
->batch
, GEN7_3DSTATE_STENCIL_BUFFER
,
595 .StencilBufferObjectControlState
= GEN7_MOCS
,
596 .SurfacePitch
= image
->stencil_surface
.stride
- 1,
597 .SurfaceBaseAddress
= {
599 .offset
= image
->offset
+ image
->stencil_surface
.offset
,
602 anv_batch_emit(&cmd_buffer
->batch
, GEN7_3DSTATE_STENCIL_BUFFER
);
605 /* Disable hierarchial depth buffers. */
606 anv_batch_emit(&cmd_buffer
->batch
, GEN7_3DSTATE_HIER_DEPTH_BUFFER
);
608 /* Clear the clear params. */
609 anv_batch_emit(&cmd_buffer
->batch
, GEN7_3DSTATE_CLEAR_PARAMS
);
613 gen7_cmd_buffer_begin_subpass(struct anv_cmd_buffer
*cmd_buffer
,
614 struct anv_subpass
*subpass
)
616 cmd_buffer
->state
.subpass
= subpass
;
617 cmd_buffer
->state
.descriptors_dirty
|= VK_SHADER_STAGE_FRAGMENT_BIT
;
619 gen7_cmd_buffer_emit_depth_stencil(cmd_buffer
);
623 begin_render_pass(struct anv_cmd_buffer
*cmd_buffer
,
624 const VkRenderPassBeginInfo
* pRenderPassBegin
)
626 ANV_FROM_HANDLE(anv_render_pass
, pass
, pRenderPassBegin
->renderPass
);
627 ANV_FROM_HANDLE(anv_framebuffer
, framebuffer
, pRenderPassBegin
->framebuffer
);
629 cmd_buffer
->state
.framebuffer
= framebuffer
;
630 cmd_buffer
->state
.pass
= pass
;
632 const VkRect2D
*render_area
= &pRenderPassBegin
->renderArea
;
634 anv_batch_emit(&cmd_buffer
->batch
, GEN7_3DSTATE_DRAWING_RECTANGLE
,
635 .ClippedDrawingRectangleYMin
= render_area
->offset
.y
,
636 .ClippedDrawingRectangleXMin
= render_area
->offset
.x
,
637 .ClippedDrawingRectangleYMax
=
638 render_area
->offset
.y
+ render_area
->extent
.height
- 1,
639 .ClippedDrawingRectangleXMax
=
640 render_area
->offset
.x
+ render_area
->extent
.width
- 1,
641 .DrawingRectangleOriginY
= 0,
642 .DrawingRectangleOriginX
= 0);
644 anv_cmd_buffer_clear_attachments(cmd_buffer
, pass
,
645 pRenderPassBegin
->pAttachmentClearValues
);
648 void gen7_CmdBeginRenderPass(
649 VkCmdBuffer cmdBuffer
,
650 const VkRenderPassBeginInfo
* pRenderPassBegin
,
651 VkRenderPassContents contents
)
653 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
654 ANV_FROM_HANDLE(anv_render_pass
, pass
, pRenderPassBegin
->renderPass
);
656 begin_render_pass(cmd_buffer
, pRenderPassBegin
);
658 gen7_cmd_buffer_begin_subpass(cmd_buffer
, pass
->subpasses
);
661 void gen7_CmdNextSubpass(
662 VkCmdBuffer cmdBuffer
,
663 VkRenderPassContents contents
)
665 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
667 assert(cmd_buffer
->level
== VK_CMD_BUFFER_LEVEL_PRIMARY
);
669 gen7_cmd_buffer_begin_subpass(cmd_buffer
, cmd_buffer
->state
.subpass
+ 1);
672 void gen7_CmdEndRenderPass(
673 VkCmdBuffer cmdBuffer
)
675 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
677 /* Emit a flushing pipe control at the end of a pass. This is kind of a
678 * hack but it ensures that render targets always actually get written.
679 * Eventually, we should do flushing based on image format transitions
680 * or something of that nature.
682 anv_batch_emit(&cmd_buffer
->batch
, GEN7_PIPE_CONTROL
,
683 .PostSyncOperation
= NoWrite
,
684 .RenderTargetCacheFlushEnable
= true,
685 .InstructionCacheInvalidateEnable
= true,
686 .DepthCacheFlushEnable
= true,
687 .VFCacheInvalidationEnable
= true,
688 .TextureCacheInvalidationEnable
= true,
689 .CommandStreamerStallEnable
= true);