vk: Don't duplicate anv_depth_stencil_view's surface data
[mesa.git] / src / vulkan / gen7_cmd_buffer.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31
32
33 void
34 gen7_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer)
35 {
36 struct anv_device *device = cmd_buffer->device;
37 struct anv_bo *scratch_bo = NULL;
38
39 cmd_buffer->state.scratch_size =
40 anv_block_pool_size(&device->scratch_block_pool);
41 if (cmd_buffer->state.scratch_size > 0)
42 scratch_bo = &device->scratch_block_pool.bo;
43
44 anv_batch_emit(&cmd_buffer->batch, GEN7_STATE_BASE_ADDRESS,
45 .GeneralStateBaseAddress = { scratch_bo, 0 },
46 .GeneralStateMemoryObjectControlState = GEN7_MOCS,
47 .GeneralStateBaseAddressModifyEnable = true,
48 .GeneralStateAccessUpperBound = { scratch_bo, scratch_bo->size },
49 .GeneralStateAccessUpperBoundModifyEnable = true,
50
51 .SurfaceStateBaseAddress = { anv_cmd_buffer_current_surface_bo(cmd_buffer), 0 },
52 .SurfaceStateMemoryObjectControlState = GEN7_MOCS,
53 .SurfaceStateBaseAddressModifyEnable = true,
54
55 .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 },
56 .DynamicStateMemoryObjectControlState = GEN7_MOCS,
57 .DynamicStateBaseAddressModifyEnable = true,
58 .DynamicStateAccessUpperBound = { &device->dynamic_state_block_pool.bo,
59 device->dynamic_state_block_pool.bo.size },
60 .DynamicStateAccessUpperBoundModifyEnable = true,
61
62 .IndirectObjectBaseAddress = { NULL, 0 },
63 .IndirectObjectMemoryObjectControlState = GEN7_MOCS,
64 .IndirectObjectBaseAddressModifyEnable = true,
65
66 .IndirectObjectAccessUpperBound = { NULL, 0xffffffff },
67 .IndirectObjectAccessUpperBoundModifyEnable = true,
68
69 .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 },
70 .InstructionMemoryObjectControlState = GEN7_MOCS,
71 .InstructionBaseAddressModifyEnable = true,
72 .InstructionAccessUpperBound = { &device->instruction_block_pool.bo,
73 device->instruction_block_pool.bo.size },
74 .InstructionAccessUpperBoundModifyEnable = true);
75
76 /* After re-setting the surface state base address, we have to do some
77 * cache flusing so that the sampler engine will pick up the new
78 * SURFACE_STATE objects and binding tables. From the Broadwell PRM,
79 * Shared Function > 3D Sampler > State > State Caching (page 96):
80 *
81 * Coherency with system memory in the state cache, like the texture
82 * cache is handled partially by software. It is expected that the
83 * command stream or shader will issue Cache Flush operation or
84 * Cache_Flush sampler message to ensure that the L1 cache remains
85 * coherent with system memory.
86 *
87 * [...]
88 *
89 * Whenever the value of the Dynamic_State_Base_Addr,
90 * Surface_State_Base_Addr are altered, the L1 state cache must be
91 * invalidated to ensure the new surface or sampler state is fetched
92 * from system memory.
93 *
94 * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit
95 * which, according the PIPE_CONTROL instruction documentation in the
96 * Broadwell PRM:
97 *
98 * Setting this bit is independent of any other bit in this packet.
99 * This bit controls the invalidation of the L1 and L2 state caches
100 * at the top of the pipe i.e. at the parsing time.
101 *
102 * Unfortunately, experimentation seems to indicate that state cache
103 * invalidation through a PIPE_CONTROL does nothing whatsoever in
104 * regards to surface state and binding tables. In stead, it seems that
105 * invalidating the texture cache is what is actually needed.
106 *
107 * XXX: As far as we have been able to determine through
108 * experimentation, shows that flush the texture cache appears to be
109 * sufficient. The theory here is that all of the sampling/rendering
110 * units cache the binding table in the texture cache. However, we have
111 * yet to be able to actually confirm this.
112 */
113 anv_batch_emit(&cmd_buffer->batch, GEN7_PIPE_CONTROL,
114 .TextureCacheInvalidationEnable = true);
115 }
116
117 static const uint32_t vk_to_gen_index_type[] = {
118 [VK_INDEX_TYPE_UINT16] = INDEX_WORD,
119 [VK_INDEX_TYPE_UINT32] = INDEX_DWORD,
120 };
121
122 void gen7_CmdBindIndexBuffer(
123 VkCmdBuffer cmdBuffer,
124 VkBuffer _buffer,
125 VkDeviceSize offset,
126 VkIndexType indexType)
127 {
128 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
129 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
130
131 cmd_buffer->state.dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY;
132 cmd_buffer->state.gen7.index_buffer = buffer;
133 cmd_buffer->state.gen7.index_type = vk_to_gen_index_type[indexType];
134 cmd_buffer->state.gen7.index_offset = offset;
135 }
136
137 static VkResult
138 gen7_flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
139 {
140 struct anv_device *device = cmd_buffer->device;
141 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
142 struct anv_state surfaces = { 0, }, samplers = { 0, };
143 VkResult result;
144
145 result = anv_cmd_buffer_emit_samplers(cmd_buffer,
146 VK_SHADER_STAGE_COMPUTE, &samplers);
147 if (result != VK_SUCCESS)
148 return result;
149 result = anv_cmd_buffer_emit_binding_table(cmd_buffer,
150 VK_SHADER_STAGE_COMPUTE, &surfaces);
151 if (result != VK_SUCCESS)
152 return result;
153
154 struct GEN7_INTERFACE_DESCRIPTOR_DATA desc = {
155 .KernelStartPointer = pipeline->cs_simd,
156 .BindingTablePointer = surfaces.offset,
157 .SamplerStatePointer = samplers.offset,
158 .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */
159 };
160
161 uint32_t size = GEN7_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t);
162 struct anv_state state =
163 anv_state_pool_alloc(&device->dynamic_state_pool, size, 64);
164
165 GEN7_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc);
166
167 anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD,
168 .InterfaceDescriptorTotalLength = size,
169 .InterfaceDescriptorDataStartAddress = state.offset);
170
171 return VK_SUCCESS;
172 }
173
174 static void
175 gen7_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
176 {
177 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
178 VkResult result;
179
180 assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
181
182 if (cmd_buffer->state.current_pipeline != GPGPU) {
183 anv_batch_emit(&cmd_buffer->batch, GEN7_PIPELINE_SELECT,
184 .PipelineSelection = GPGPU);
185 cmd_buffer->state.current_pipeline = GPGPU;
186 }
187
188 if (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)
189 anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
190
191 if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
192 (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) {
193 /* FIXME: figure out descriptors for gen7 */
194 result = gen7_flush_compute_descriptor_set(cmd_buffer);
195 assert(result == VK_SUCCESS);
196 cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE;
197 }
198
199 cmd_buffer->state.compute_dirty = 0;
200 }
201
202 static void
203 gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
204 {
205 struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
206 uint32_t *p;
207
208 uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used;
209
210 assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0);
211
212 if (cmd_buffer->state.current_pipeline != _3D) {
213 anv_batch_emit(&cmd_buffer->batch, GEN7_PIPELINE_SELECT,
214 .PipelineSelection = _3D);
215 cmd_buffer->state.current_pipeline = _3D;
216 }
217
218 if (vb_emit) {
219 const uint32_t num_buffers = __builtin_popcount(vb_emit);
220 const uint32_t num_dwords = 1 + num_buffers * 4;
221
222 p = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
223 GEN7_3DSTATE_VERTEX_BUFFERS);
224 uint32_t vb, i = 0;
225 for_each_bit(vb, vb_emit) {
226 struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer;
227 uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset;
228
229 struct GEN7_VERTEX_BUFFER_STATE state = {
230 .VertexBufferIndex = vb,
231 .BufferAccessType = pipeline->instancing_enable[vb] ? INSTANCEDATA : VERTEXDATA,
232 .VertexBufferMemoryObjectControlState = GEN7_MOCS,
233 .AddressModifyEnable = true,
234 .BufferPitch = pipeline->binding_stride[vb],
235 .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
236 .EndAddress = { buffer->bo, buffer->offset + buffer->size - 1},
237 .InstanceDataStepRate = 1
238 };
239
240 GEN7_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state);
241 i++;
242 }
243 }
244
245 if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) {
246 /* If somebody compiled a pipeline after starting a command buffer the
247 * scratch bo may have grown since we started this cmd buffer (and
248 * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now,
249 * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
250 if (cmd_buffer->state.scratch_size < pipeline->total_scratch)
251 gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
252
253 anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
254 }
255
256 if (cmd_buffer->state.descriptors_dirty)
257 anv_flush_descriptor_sets(cmd_buffer);
258
259 if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_VP_DIRTY) {
260 struct anv_dynamic_vp_state *vp_state = cmd_buffer->state.vp_state;
261 anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_SCISSOR_STATE_POINTERS,
262 .ScissorRectPointer = vp_state->scissor.offset);
263 anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC,
264 .CCViewportPointer = vp_state->cc_vp.offset);
265 anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP,
266 .SFClipViewportPointer = vp_state->sf_clip_vp.offset);
267 }
268
269 if (cmd_buffer->state.dirty &
270 (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_RS_DIRTY)) {
271 anv_batch_emit_merge(&cmd_buffer->batch,
272 cmd_buffer->state.rs_state->gen7.sf,
273 pipeline->gen7.sf);
274 }
275
276 if (cmd_buffer->state.dirty &
277 (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)) {
278 struct anv_state state;
279
280 if (cmd_buffer->state.ds_state == NULL)
281 state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
282 pipeline->gen7.depth_stencil_state,
283 GEN7_COLOR_CALC_STATE_length, 64);
284 else
285 state = anv_cmd_buffer_merge_dynamic(cmd_buffer,
286 cmd_buffer->state.ds_state->gen7.depth_stencil_state,
287 pipeline->gen7.depth_stencil_state,
288 GEN7_DEPTH_STENCIL_STATE_length, 64);
289 anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS,
290 .PointertoDEPTH_STENCIL_STATE = state.offset);
291 }
292
293 if (cmd_buffer->state.dirty &
294 (ANV_CMD_BUFFER_CB_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)) {
295 struct anv_state state;
296 if (cmd_buffer->state.ds_state == NULL)
297 state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
298 cmd_buffer->state.cb_state->color_calc_state,
299 GEN7_COLOR_CALC_STATE_length, 64);
300 else if (cmd_buffer->state.cb_state == NULL)
301 state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
302 cmd_buffer->state.ds_state->gen7.color_calc_state,
303 GEN7_COLOR_CALC_STATE_length, 64);
304 else
305 state = anv_cmd_buffer_merge_dynamic(cmd_buffer,
306 cmd_buffer->state.ds_state->gen7.color_calc_state,
307 cmd_buffer->state.cb_state->color_calc_state,
308 GEN7_COLOR_CALC_STATE_length, 64);
309
310 anv_batch_emit(&cmd_buffer->batch,
311 GEN7_3DSTATE_CC_STATE_POINTERS,
312 .ColorCalcStatePointer = state.offset);
313 }
314
315 if (cmd_buffer->state.gen7.index_buffer &&
316 cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY |
317 ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) {
318 struct anv_buffer *buffer = cmd_buffer->state.gen7.index_buffer;
319 uint32_t offset = cmd_buffer->state.gen7.index_offset;
320
321 anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_INDEX_BUFFER,
322 .CutIndexEnable = pipeline->primitive_restart,
323 .IndexFormat = cmd_buffer->state.gen7.index_type,
324 .MemoryObjectControlState = GEN7_MOCS,
325 .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
326 .BufferEndingAddress = { buffer->bo, buffer->offset + buffer->size });
327 }
328
329 cmd_buffer->state.vb_dirty &= ~vb_emit;
330 cmd_buffer->state.dirty = 0;
331 }
332
333 void gen7_CmdDraw(
334 VkCmdBuffer cmdBuffer,
335 uint32_t firstVertex,
336 uint32_t vertexCount,
337 uint32_t firstInstance,
338 uint32_t instanceCount)
339 {
340 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
341 struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
342
343 gen7_cmd_buffer_flush_state(cmd_buffer);
344
345 anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE,
346 .VertexAccessType = SEQUENTIAL,
347 .PrimitiveTopologyType = pipeline->topology,
348 .VertexCountPerInstance = vertexCount,
349 .StartVertexLocation = firstVertex,
350 .InstanceCount = instanceCount,
351 .StartInstanceLocation = firstInstance,
352 .BaseVertexLocation = 0);
353 }
354
355 void gen7_CmdDrawIndexed(
356 VkCmdBuffer cmdBuffer,
357 uint32_t firstIndex,
358 uint32_t indexCount,
359 int32_t vertexOffset,
360 uint32_t firstInstance,
361 uint32_t instanceCount)
362 {
363 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
364 struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
365
366 gen7_cmd_buffer_flush_state(cmd_buffer);
367
368 anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE,
369 .VertexAccessType = RANDOM,
370 .PrimitiveTopologyType = pipeline->topology,
371 .VertexCountPerInstance = indexCount,
372 .StartVertexLocation = firstIndex,
373 .InstanceCount = instanceCount,
374 .StartInstanceLocation = firstInstance,
375 .BaseVertexLocation = vertexOffset);
376 }
377
378 static void
379 gen7_batch_lrm(struct anv_batch *batch,
380 uint32_t reg, struct anv_bo *bo, uint32_t offset)
381 {
382 anv_batch_emit(batch, GEN7_MI_LOAD_REGISTER_MEM,
383 .RegisterAddress = reg,
384 .MemoryAddress = { bo, offset });
385 }
386
387 static void
388 gen7_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm)
389 {
390 anv_batch_emit(batch, GEN7_MI_LOAD_REGISTER_IMM,
391 .RegisterOffset = reg,
392 .DataDWord = imm);
393 }
394
395 /* Auto-Draw / Indirect Registers */
396 #define GEN7_3DPRIM_END_OFFSET 0x2420
397 #define GEN7_3DPRIM_START_VERTEX 0x2430
398 #define GEN7_3DPRIM_VERTEX_COUNT 0x2434
399 #define GEN7_3DPRIM_INSTANCE_COUNT 0x2438
400 #define GEN7_3DPRIM_START_INSTANCE 0x243C
401 #define GEN7_3DPRIM_BASE_VERTEX 0x2440
402
403 void gen7_CmdDrawIndirect(
404 VkCmdBuffer cmdBuffer,
405 VkBuffer _buffer,
406 VkDeviceSize offset,
407 uint32_t count,
408 uint32_t stride)
409 {
410 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
411 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
412 struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
413 struct anv_bo *bo = buffer->bo;
414 uint32_t bo_offset = buffer->offset + offset;
415
416 gen7_cmd_buffer_flush_state(cmd_buffer);
417
418 gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
419 gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
420 gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
421 gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12);
422 gen7_batch_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0);
423
424 anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE,
425 .IndirectParameterEnable = true,
426 .VertexAccessType = SEQUENTIAL,
427 .PrimitiveTopologyType = pipeline->topology);
428 }
429
430 void gen7_CmdDrawIndexedIndirect(
431 VkCmdBuffer cmdBuffer,
432 VkBuffer _buffer,
433 VkDeviceSize offset,
434 uint32_t count,
435 uint32_t stride)
436 {
437 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
438 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
439 struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
440 struct anv_bo *bo = buffer->bo;
441 uint32_t bo_offset = buffer->offset + offset;
442
443 gen7_cmd_buffer_flush_state(cmd_buffer);
444
445 gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
446 gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
447 gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
448 gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12);
449 gen7_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16);
450
451 anv_batch_emit(&cmd_buffer->batch, GEN7_3DPRIMITIVE,
452 .IndirectParameterEnable = true,
453 .VertexAccessType = RANDOM,
454 .PrimitiveTopologyType = pipeline->topology);
455 }
456
457 void gen7_CmdDispatch(
458 VkCmdBuffer cmdBuffer,
459 uint32_t x,
460 uint32_t y,
461 uint32_t z)
462 {
463 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
464 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
465 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
466
467 gen7_cmd_buffer_flush_compute_state(cmd_buffer);
468
469 anv_batch_emit(&cmd_buffer->batch, GEN7_GPGPU_WALKER,
470 .SIMDSize = prog_data->simd_size / 16,
471 .ThreadDepthCounterMaximum = 0,
472 .ThreadHeightCounterMaximum = 0,
473 .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
474 .ThreadGroupIDXDimension = x,
475 .ThreadGroupIDYDimension = y,
476 .ThreadGroupIDZDimension = z,
477 .RightExecutionMask = pipeline->cs_right_mask,
478 .BottomExecutionMask = 0xffffffff);
479
480 anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_STATE_FLUSH);
481 }
482
483 #define GPGPU_DISPATCHDIMX 0x2500
484 #define GPGPU_DISPATCHDIMY 0x2504
485 #define GPGPU_DISPATCHDIMZ 0x2508
486
487 void gen7_CmdDispatchIndirect(
488 VkCmdBuffer cmdBuffer,
489 VkBuffer _buffer,
490 VkDeviceSize offset)
491 {
492 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
493 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
494 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
495 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
496 struct anv_bo *bo = buffer->bo;
497 uint32_t bo_offset = buffer->offset + offset;
498
499 gen7_cmd_buffer_flush_compute_state(cmd_buffer);
500
501 gen7_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset);
502 gen7_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4);
503 gen7_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8);
504
505 anv_batch_emit(&cmd_buffer->batch, GEN7_GPGPU_WALKER,
506 .IndirectParameterEnable = true,
507 .SIMDSize = prog_data->simd_size / 16,
508 .ThreadDepthCounterMaximum = 0,
509 .ThreadHeightCounterMaximum = 0,
510 .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
511 .RightExecutionMask = pipeline->cs_right_mask,
512 .BottomExecutionMask = 0xffffffff);
513
514 anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_STATE_FLUSH);
515 }
516
517 void gen7_CmdPipelineBarrier(
518 VkCmdBuffer cmdBuffer,
519 VkPipelineStageFlags srcStageMask,
520 VkPipelineStageFlags destStageMask,
521 VkBool32 byRegion,
522 uint32_t memBarrierCount,
523 const void* const* ppMemBarriers)
524 {
525 stub();
526 }
527
528 static void
529 gen7_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
530 {
531 struct anv_subpass *subpass = cmd_buffer->state.subpass;
532 struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
533 const struct anv_depth_stencil_view *view = NULL;
534
535 if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) {
536 const struct anv_attachment_view *aview =
537 fb->attachments[subpass->depth_stencil_attachment];
538 assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL);
539 view = (const struct anv_depth_stencil_view *)aview;
540 }
541
542 const struct anv_image *image = view ? view->image : NULL;
543 const bool has_depth = view && view->format->depth_format;
544 const bool has_stencil = view && view->format->has_stencil;
545
546 /* Emit 3DSTATE_DEPTH_BUFFER */
547 if (has_depth) {
548 anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DEPTH_BUFFER,
549 .SurfaceType = SURFTYPE_2D,
550 .DepthWriteEnable = view->format->depth_format,
551 .StencilWriteEnable = has_stencil,
552 .HierarchicalDepthBufferEnable = false,
553 .SurfaceFormat = view->format->depth_format,
554 .SurfacePitch = image->depth_surface.stride - 1,
555 .SurfaceBaseAddress = {
556 .bo = image->bo,
557 .offset = image->depth_surface.offset,
558 },
559 .Height = fb->height - 1,
560 .Width = fb->width - 1,
561 .LOD = 0,
562 .Depth = 1 - 1,
563 .MinimumArrayElement = 0,
564 .DepthBufferObjectControlState = GEN7_MOCS,
565 .RenderTargetViewExtent = 1 - 1);
566 } else {
567 /* Even when no depth buffer is present, the hardware requires that
568 * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says:
569 *
570 * If a null depth buffer is bound, the driver must instead bind depth as:
571 * 3DSTATE_DEPTH.SurfaceType = SURFTYPE_2D
572 * 3DSTATE_DEPTH.Width = 1
573 * 3DSTATE_DEPTH.Height = 1
574 * 3DSTATE_DEPTH.SuraceFormat = D16_UNORM
575 * 3DSTATE_DEPTH.SurfaceBaseAddress = 0
576 * 3DSTATE_DEPTH.HierarchicalDepthBufferEnable = 0
577 * 3DSTATE_WM_DEPTH_STENCIL.DepthTestEnable = 0
578 * 3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0
579 *
580 * The PRM is wrong, though. The width and height must be programmed to
581 * actual framebuffer's width and height, even when neither depth buffer
582 * nor stencil buffer is present.
583 */
584 anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DEPTH_BUFFER,
585 .SurfaceType = SURFTYPE_2D,
586 .SurfaceFormat = D16_UNORM,
587 .Width = fb->width - 1,
588 .Height = fb->height - 1,
589 .StencilWriteEnable = has_stencil);
590 }
591
592 /* Emit 3DSTATE_STENCIL_BUFFER */
593 if (has_stencil) {
594 anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_STENCIL_BUFFER,
595 .StencilBufferObjectControlState = GEN7_MOCS,
596 .SurfacePitch = image->stencil_surface.stride - 1,
597 .SurfaceBaseAddress = {
598 .bo = image->bo,
599 .offset = image->offset + image->stencil_surface.offset,
600 });
601 } else {
602 anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_STENCIL_BUFFER);
603 }
604
605 /* Disable hierarchial depth buffers. */
606 anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER);
607
608 /* Clear the clear params. */
609 anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_CLEAR_PARAMS);
610 }
611
612 void
613 gen7_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
614 struct anv_subpass *subpass)
615 {
616 cmd_buffer->state.subpass = subpass;
617 cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
618
619 gen7_cmd_buffer_emit_depth_stencil(cmd_buffer);
620 }
621
622 static void
623 begin_render_pass(struct anv_cmd_buffer *cmd_buffer,
624 const VkRenderPassBeginInfo* pRenderPassBegin)
625 {
626 ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass);
627 ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
628
629 cmd_buffer->state.framebuffer = framebuffer;
630 cmd_buffer->state.pass = pass;
631
632 const VkRect2D *render_area = &pRenderPassBegin->renderArea;
633
634 anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DRAWING_RECTANGLE,
635 .ClippedDrawingRectangleYMin = render_area->offset.y,
636 .ClippedDrawingRectangleXMin = render_area->offset.x,
637 .ClippedDrawingRectangleYMax =
638 render_area->offset.y + render_area->extent.height - 1,
639 .ClippedDrawingRectangleXMax =
640 render_area->offset.x + render_area->extent.width - 1,
641 .DrawingRectangleOriginY = 0,
642 .DrawingRectangleOriginX = 0);
643
644 anv_cmd_buffer_clear_attachments(cmd_buffer, pass,
645 pRenderPassBegin->pAttachmentClearValues);
646 }
647
648 void gen7_CmdBeginRenderPass(
649 VkCmdBuffer cmdBuffer,
650 const VkRenderPassBeginInfo* pRenderPassBegin,
651 VkRenderPassContents contents)
652 {
653 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
654 ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass);
655
656 begin_render_pass(cmd_buffer, pRenderPassBegin);
657
658 gen7_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses);
659 }
660
661 void gen7_CmdNextSubpass(
662 VkCmdBuffer cmdBuffer,
663 VkRenderPassContents contents)
664 {
665 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
666
667 assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY);
668
669 gen7_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1);
670 }
671
672 void gen7_CmdEndRenderPass(
673 VkCmdBuffer cmdBuffer)
674 {
675 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
676
677 /* Emit a flushing pipe control at the end of a pass. This is kind of a
678 * hack but it ensures that render targets always actually get written.
679 * Eventually, we should do flushing based on image format transitions
680 * or something of that nature.
681 */
682 anv_batch_emit(&cmd_buffer->batch, GEN7_PIPE_CONTROL,
683 .PostSyncOperation = NoWrite,
684 .RenderTargetCacheFlushEnable = true,
685 .InstructionCacheInvalidateEnable = true,
686 .DepthCacheFlushEnable = true,
687 .VFCacheInvalidationEnable = true,
688 .TextureCacheInvalidationEnable = true,
689 .CommandStreamerStallEnable = true);
690 }