e30bc36fa2ac7a106e6c933562c00863d8dee995
[mesa.git] / src / freedreno / vulkan / tu_cmd_buffer.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 * DEALINGS IN THE SOFTWARE.
26 */
27
28 #include "tu_private.h"
29
30 #include "registers/adreno_pm4.xml.h"
31 #include "registers/adreno_common.xml.h"
32 #include "registers/a6xx.xml.h"
33
34 #include "vk_format.h"
35
36 #include "tu_cs.h"
37 #include "tu_blit.h"
38
39 #define OVERFLOW_FLAG_REG REG_A6XX_CP_SCRATCH_REG(0)
40
41 void
42 tu_bo_list_init(struct tu_bo_list *list)
43 {
44 list->count = list->capacity = 0;
45 list->bo_infos = NULL;
46 }
47
48 void
49 tu_bo_list_destroy(struct tu_bo_list *list)
50 {
51 free(list->bo_infos);
52 }
53
54 void
55 tu_bo_list_reset(struct tu_bo_list *list)
56 {
57 list->count = 0;
58 }
59
60 /**
61 * \a flags consists of MSM_SUBMIT_BO_FLAGS.
62 */
63 static uint32_t
64 tu_bo_list_add_info(struct tu_bo_list *list,
65 const struct drm_msm_gem_submit_bo *bo_info)
66 {
67 assert(bo_info->handle != 0);
68
69 for (uint32_t i = 0; i < list->count; ++i) {
70 if (list->bo_infos[i].handle == bo_info->handle) {
71 assert(list->bo_infos[i].presumed == bo_info->presumed);
72 list->bo_infos[i].flags |= bo_info->flags;
73 return i;
74 }
75 }
76
77 /* grow list->bo_infos if needed */
78 if (list->count == list->capacity) {
79 uint32_t new_capacity = MAX2(2 * list->count, 16);
80 struct drm_msm_gem_submit_bo *new_bo_infos = realloc(
81 list->bo_infos, new_capacity * sizeof(struct drm_msm_gem_submit_bo));
82 if (!new_bo_infos)
83 return TU_BO_LIST_FAILED;
84 list->bo_infos = new_bo_infos;
85 list->capacity = new_capacity;
86 }
87
88 list->bo_infos[list->count] = *bo_info;
89 return list->count++;
90 }
91
92 uint32_t
93 tu_bo_list_add(struct tu_bo_list *list,
94 const struct tu_bo *bo,
95 uint32_t flags)
96 {
97 return tu_bo_list_add_info(list, &(struct drm_msm_gem_submit_bo) {
98 .flags = flags,
99 .handle = bo->gem_handle,
100 .presumed = bo->iova,
101 });
102 }
103
104 VkResult
105 tu_bo_list_merge(struct tu_bo_list *list, const struct tu_bo_list *other)
106 {
107 for (uint32_t i = 0; i < other->count; i++) {
108 if (tu_bo_list_add_info(list, other->bo_infos + i) == TU_BO_LIST_FAILED)
109 return VK_ERROR_OUT_OF_HOST_MEMORY;
110 }
111
112 return VK_SUCCESS;
113 }
114
115 static VkResult
116 tu_tiling_config_update_gmem_layout(struct tu_tiling_config *tiling,
117 const struct tu_device *dev)
118 {
119 const uint32_t gmem_size = dev->physical_device->gmem_size;
120 uint32_t offset = 0;
121
122 for (uint32_t i = 0; i < tiling->buffer_count; i++) {
123 /* 16KB-aligned */
124 offset = align(offset, 0x4000);
125
126 tiling->gmem_offsets[i] = offset;
127 offset += tiling->tile0.extent.width * tiling->tile0.extent.height *
128 tiling->buffer_cpp[i];
129 }
130
131 return offset <= gmem_size ? VK_SUCCESS : VK_ERROR_OUT_OF_DEVICE_MEMORY;
132 }
133
134 static void
135 tu_tiling_config_update_tile_layout(struct tu_tiling_config *tiling,
136 const struct tu_device *dev)
137 {
138 const uint32_t tile_align_w = dev->physical_device->tile_align_w;
139 const uint32_t tile_align_h = dev->physical_device->tile_align_h;
140 const uint32_t max_tile_width = 1024; /* A6xx */
141
142 tiling->tile0.offset = (VkOffset2D) {
143 .x = tiling->render_area.offset.x & ~(tile_align_w - 1),
144 .y = tiling->render_area.offset.y & ~(tile_align_h - 1),
145 };
146
147 const uint32_t ra_width =
148 tiling->render_area.extent.width +
149 (tiling->render_area.offset.x - tiling->tile0.offset.x);
150 const uint32_t ra_height =
151 tiling->render_area.extent.height +
152 (tiling->render_area.offset.y - tiling->tile0.offset.y);
153
154 /* start from 1 tile */
155 tiling->tile_count = (VkExtent2D) {
156 .width = 1,
157 .height = 1,
158 };
159 tiling->tile0.extent = (VkExtent2D) {
160 .width = align(ra_width, tile_align_w),
161 .height = align(ra_height, tile_align_h),
162 };
163
164 /* do not exceed max tile width */
165 while (tiling->tile0.extent.width > max_tile_width) {
166 tiling->tile_count.width++;
167 tiling->tile0.extent.width =
168 align(ra_width / tiling->tile_count.width, tile_align_w);
169 }
170
171 /* do not exceed gmem size */
172 while (tu_tiling_config_update_gmem_layout(tiling, dev) != VK_SUCCESS) {
173 if (tiling->tile0.extent.width > tiling->tile0.extent.height) {
174 tiling->tile_count.width++;
175 tiling->tile0.extent.width =
176 align(ra_width / tiling->tile_count.width, tile_align_w);
177 } else {
178 tiling->tile_count.height++;
179 tiling->tile0.extent.height =
180 align(ra_height / tiling->tile_count.height, tile_align_h);
181 }
182 }
183 }
184
185 static void
186 tu_tiling_config_update_pipe_layout(struct tu_tiling_config *tiling,
187 const struct tu_device *dev)
188 {
189 const uint32_t max_pipe_count = 32; /* A6xx */
190
191 /* start from 1 tile per pipe */
192 tiling->pipe0 = (VkExtent2D) {
193 .width = 1,
194 .height = 1,
195 };
196 tiling->pipe_count = tiling->tile_count;
197
198 /* do not exceed max pipe count vertically */
199 while (tiling->pipe_count.height > max_pipe_count) {
200 tiling->pipe0.height += 2;
201 tiling->pipe_count.height =
202 (tiling->tile_count.height + tiling->pipe0.height - 1) /
203 tiling->pipe0.height;
204 }
205
206 /* do not exceed max pipe count */
207 while (tiling->pipe_count.width * tiling->pipe_count.height >
208 max_pipe_count) {
209 tiling->pipe0.width += 1;
210 tiling->pipe_count.width =
211 (tiling->tile_count.width + tiling->pipe0.width - 1) /
212 tiling->pipe0.width;
213 }
214 }
215
216 static void
217 tu_tiling_config_update_pipes(struct tu_tiling_config *tiling,
218 const struct tu_device *dev)
219 {
220 const uint32_t max_pipe_count = 32; /* A6xx */
221 const uint32_t used_pipe_count =
222 tiling->pipe_count.width * tiling->pipe_count.height;
223 const VkExtent2D last_pipe = {
224 .width = (tiling->tile_count.width - 1) % tiling->pipe0.width + 1,
225 .height = (tiling->tile_count.height - 1) % tiling->pipe0.height + 1,
226 };
227
228 assert(used_pipe_count <= max_pipe_count);
229 assert(max_pipe_count <= ARRAY_SIZE(tiling->pipe_config));
230
231 for (uint32_t y = 0; y < tiling->pipe_count.height; y++) {
232 for (uint32_t x = 0; x < tiling->pipe_count.width; x++) {
233 const uint32_t pipe_x = tiling->pipe0.width * x;
234 const uint32_t pipe_y = tiling->pipe0.height * y;
235 const uint32_t pipe_w = (x == tiling->pipe_count.width - 1)
236 ? last_pipe.width
237 : tiling->pipe0.width;
238 const uint32_t pipe_h = (y == tiling->pipe_count.height - 1)
239 ? last_pipe.height
240 : tiling->pipe0.height;
241 const uint32_t n = tiling->pipe_count.width * y + x;
242
243 tiling->pipe_config[n] = A6XX_VSC_PIPE_CONFIG_REG_X(pipe_x) |
244 A6XX_VSC_PIPE_CONFIG_REG_Y(pipe_y) |
245 A6XX_VSC_PIPE_CONFIG_REG_W(pipe_w) |
246 A6XX_VSC_PIPE_CONFIG_REG_H(pipe_h);
247 tiling->pipe_sizes[n] = CP_SET_BIN_DATA5_0_VSC_SIZE(pipe_w * pipe_h);
248 }
249 }
250
251 memset(tiling->pipe_config + used_pipe_count, 0,
252 sizeof(uint32_t) * (max_pipe_count - used_pipe_count));
253 }
254
255 static void
256 tu_tiling_config_update(struct tu_tiling_config *tiling,
257 const struct tu_device *dev,
258 const uint32_t *buffer_cpp,
259 uint32_t buffer_count,
260 const VkRect2D *render_area)
261 {
262 /* see if there is any real change */
263 const bool ra_changed =
264 render_area &&
265 memcmp(&tiling->render_area, render_area, sizeof(*render_area));
266 const bool buf_changed = tiling->buffer_count != buffer_count ||
267 memcmp(tiling->buffer_cpp, buffer_cpp,
268 sizeof(*buffer_cpp) * buffer_count);
269 if (!ra_changed && !buf_changed)
270 return;
271
272 if (ra_changed)
273 tiling->render_area = *render_area;
274
275 if (buf_changed) {
276 memcpy(tiling->buffer_cpp, buffer_cpp,
277 sizeof(*buffer_cpp) * buffer_count);
278 tiling->buffer_count = buffer_count;
279 }
280
281 tu_tiling_config_update_tile_layout(tiling, dev);
282 tu_tiling_config_update_pipe_layout(tiling, dev);
283 tu_tiling_config_update_pipes(tiling, dev);
284 }
285
286 static void
287 tu_tiling_config_get_tile(const struct tu_tiling_config *tiling,
288 const struct tu_device *dev,
289 uint32_t tx,
290 uint32_t ty,
291 struct tu_tile *tile)
292 {
293 /* find the pipe and the slot for tile (tx, ty) */
294 const uint32_t px = tx / tiling->pipe0.width;
295 const uint32_t py = ty / tiling->pipe0.height;
296 const uint32_t sx = tx - tiling->pipe0.width * px;
297 const uint32_t sy = ty - tiling->pipe0.height * py;
298
299 assert(tx < tiling->tile_count.width && ty < tiling->tile_count.height);
300 assert(px < tiling->pipe_count.width && py < tiling->pipe_count.height);
301 assert(sx < tiling->pipe0.width && sy < tiling->pipe0.height);
302
303 /* convert to 1D indices */
304 tile->pipe = tiling->pipe_count.width * py + px;
305 tile->slot = tiling->pipe0.width * sy + sx;
306
307 /* get the blit area for the tile */
308 tile->begin = (VkOffset2D) {
309 .x = tiling->tile0.offset.x + tiling->tile0.extent.width * tx,
310 .y = tiling->tile0.offset.y + tiling->tile0.extent.height * ty,
311 };
312 tile->end.x =
313 (tx == tiling->tile_count.width - 1)
314 ? tiling->render_area.offset.x + tiling->render_area.extent.width
315 : tile->begin.x + tiling->tile0.extent.width;
316 tile->end.y =
317 (ty == tiling->tile_count.height - 1)
318 ? tiling->render_area.offset.y + tiling->render_area.extent.height
319 : tile->begin.y + tiling->tile0.extent.height;
320 }
321
322 enum a3xx_msaa_samples
323 tu_msaa_samples(uint32_t samples)
324 {
325 switch (samples) {
326 case 1:
327 return MSAA_ONE;
328 case 2:
329 return MSAA_TWO;
330 case 4:
331 return MSAA_FOUR;
332 case 8:
333 return MSAA_EIGHT;
334 default:
335 assert(!"invalid sample count");
336 return MSAA_ONE;
337 }
338 }
339
340 static enum a4xx_index_size
341 tu6_index_size(VkIndexType type)
342 {
343 switch (type) {
344 case VK_INDEX_TYPE_UINT16:
345 return INDEX4_SIZE_16_BIT;
346 case VK_INDEX_TYPE_UINT32:
347 return INDEX4_SIZE_32_BIT;
348 default:
349 unreachable("invalid VkIndexType");
350 return INDEX4_SIZE_8_BIT;
351 }
352 }
353
354 static void
355 tu6_emit_marker(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
356 {
357 tu_cs_emit_write_reg(cs, cmd->marker_reg, ++cmd->marker_seqno);
358 }
359
360 unsigned
361 tu6_emit_event_write(struct tu_cmd_buffer *cmd,
362 struct tu_cs *cs,
363 enum vgt_event_type event,
364 bool need_seqno)
365 {
366 unsigned seqno = 0;
367
368 tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, need_seqno ? 4 : 1);
369 tu_cs_emit(cs, CP_EVENT_WRITE_0_EVENT(event));
370 if (need_seqno) {
371 tu_cs_emit_qw(cs, cmd->scratch_bo.iova);
372 seqno = ++cmd->scratch_seqno;
373 tu_cs_emit(cs, seqno);
374 }
375
376 return seqno;
377 }
378
379 static void
380 tu6_emit_cache_flush(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
381 {
382 tu6_emit_event_write(cmd, cs, 0x31, false);
383 }
384
385 static void
386 tu6_emit_lrz_flush(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
387 {
388 tu6_emit_event_write(cmd, cs, LRZ_FLUSH, false);
389 }
390
391 static void
392 tu6_emit_wfi(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
393 {
394 if (cmd->wait_for_idle) {
395 tu_cs_emit_wfi(cs);
396 cmd->wait_for_idle = false;
397 }
398 }
399
400 static void
401 tu6_emit_flag_buffer(struct tu_cs *cs, const struct tu_image_view *iview)
402 {
403 uint64_t va = tu_image_ubwc_base(iview->image, iview->base_mip, iview->base_layer);
404 uint32_t pitch = tu_image_ubwc_pitch(iview->image, iview->base_mip);
405 uint32_t size = tu_image_ubwc_size(iview->image, iview->base_mip);
406 if (iview->image->layout.ubwc_size) {
407 tu_cs_emit_qw(cs, va);
408 tu_cs_emit(cs, A6XX_RB_DEPTH_FLAG_BUFFER_PITCH_PITCH(pitch) |
409 A6XX_RB_DEPTH_FLAG_BUFFER_PITCH_ARRAY_PITCH(size >> 2));
410 } else {
411 tu_cs_emit_qw(cs, 0);
412 tu_cs_emit(cs, 0);
413 }
414 }
415
416 static void
417 tu6_emit_zs(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
418 {
419 const struct tu_framebuffer *fb = cmd->state.framebuffer;
420 const struct tu_subpass *subpass = cmd->state.subpass;
421 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
422
423 const uint32_t a = subpass->depth_stencil_attachment.attachment;
424 if (a == VK_ATTACHMENT_UNUSED) {
425 tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6);
426 tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE));
427 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */
428 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */
429 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */
430 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
431 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_BASE_GMEM */
432
433 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
434 tu_cs_emit(cs,
435 A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE));
436
437 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_BUFFER_BASE_LO, 5);
438 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
439 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
440 tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
441 tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */
442 tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI */
443
444 tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCIL_INFO, 1);
445 tu_cs_emit(cs, 0x00000000); /* RB_STENCIL_INFO */
446
447 return;
448 }
449
450 const struct tu_image_view *iview = fb->attachments[a].attachment;
451 enum a6xx_depth_format fmt = tu6_pipe2depth(iview->vk_format);
452
453 tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6);
454 tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
455 tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_PITCH(tu_image_stride(iview->image, iview->base_mip)));
456 tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(iview->image->layout.layer_size));
457 tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer));
458 tu_cs_emit(cs, tiling->gmem_offsets[subpass->color_count]);
459
460 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
461 tu_cs_emit(cs, A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
462
463 tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
464 tu6_emit_flag_buffer(cs, iview);
465
466 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_BUFFER_BASE_LO, 5);
467 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
468 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
469 tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
470 tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */
471 tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI */
472
473 tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCIL_INFO, 1);
474 tu_cs_emit(cs, 0x00000000); /* RB_STENCIL_INFO */
475
476 /* enable zs? */
477 }
478
479 static void
480 tu6_emit_mrt(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
481 {
482 const struct tu_framebuffer *fb = cmd->state.framebuffer;
483 const struct tu_subpass *subpass = cmd->state.subpass;
484 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
485 unsigned char mrt_comp[MAX_RTS] = { 0 };
486 unsigned srgb_cntl = 0;
487
488 for (uint32_t i = 0; i < subpass->color_count; ++i) {
489 uint32_t a = subpass->color_attachments[i].attachment;
490 if (a == VK_ATTACHMENT_UNUSED)
491 continue;
492
493 const struct tu_image_view *iview = fb->attachments[a].attachment;
494 const enum a6xx_tile_mode tile_mode =
495 tu6_get_image_tile_mode(iview->image, iview->base_mip);
496
497 mrt_comp[i] = 0xf;
498
499 if (vk_format_is_srgb(iview->vk_format))
500 srgb_cntl |= (1 << i);
501
502 const struct tu_native_format *format =
503 tu6_get_native_format(iview->vk_format);
504 assert(format && format->rb >= 0);
505
506 tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(i), 6);
507 tu_cs_emit(cs, A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format->rb) |
508 A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
509 A6XX_RB_MRT_BUF_INFO_COLOR_SWAP(format->swap));
510 tu_cs_emit(cs, A6XX_RB_MRT_PITCH(tu_image_stride(iview->image, iview->base_mip)));
511 tu_cs_emit(cs, A6XX_RB_MRT_ARRAY_PITCH(iview->image->layout.layer_size));
512 tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer));
513 tu_cs_emit(
514 cs, tiling->gmem_offsets[i]); /* RB_MRT[i].BASE_GMEM */
515
516 tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_MRT_REG(i), 1);
517 tu_cs_emit(cs, A6XX_SP_FS_MRT_REG_COLOR_FORMAT(format->rb) |
518 COND(vk_format_is_sint(iview->vk_format), A6XX_SP_FS_MRT_REG_COLOR_SINT) |
519 COND(vk_format_is_uint(iview->vk_format), A6XX_SP_FS_MRT_REG_COLOR_UINT));
520
521 tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 3);
522 tu6_emit_flag_buffer(cs, iview);
523 }
524
525 tu_cs_emit_pkt4(cs, REG_A6XX_RB_SRGB_CNTL, 1);
526 tu_cs_emit(cs, srgb_cntl);
527
528 tu_cs_emit_pkt4(cs, REG_A6XX_SP_SRGB_CNTL, 1);
529 tu_cs_emit(cs, srgb_cntl);
530
531 tu_cs_emit_pkt4(cs, REG_A6XX_RB_RENDER_COMPONENTS, 1);
532 tu_cs_emit(cs, A6XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
533 A6XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
534 A6XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
535 A6XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
536 A6XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
537 A6XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
538 A6XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
539 A6XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
540
541 tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_RENDER_COMPONENTS, 1);
542 tu_cs_emit(cs, A6XX_SP_FS_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
543 A6XX_SP_FS_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
544 A6XX_SP_FS_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
545 A6XX_SP_FS_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
546 A6XX_SP_FS_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
547 A6XX_SP_FS_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
548 A6XX_SP_FS_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
549 A6XX_SP_FS_RENDER_COMPONENTS_RT7(mrt_comp[7]));
550 }
551
552 static void
553 tu6_emit_msaa(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
554 {
555 const struct tu_subpass *subpass = cmd->state.subpass;
556 const enum a3xx_msaa_samples samples =
557 tu_msaa_samples(subpass->max_sample_count);
558
559 tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_RAS_MSAA_CNTL, 2);
560 tu_cs_emit(cs, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(samples));
561 tu_cs_emit(cs, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
562 COND(samples == MSAA_ONE, A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
563
564 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_RAS_MSAA_CNTL, 2);
565 tu_cs_emit(cs, A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(samples));
566 tu_cs_emit(cs, A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(samples) |
567 COND(samples == MSAA_ONE, A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE));
568
569 tu_cs_emit_pkt4(cs, REG_A6XX_RB_RAS_MSAA_CNTL, 2);
570 tu_cs_emit(cs, A6XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
571 tu_cs_emit(cs, A6XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
572 COND(samples == MSAA_ONE, A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
573
574 tu_cs_emit_pkt4(cs, REG_A6XX_RB_MSAA_CNTL, 1);
575 tu_cs_emit(cs, A6XX_RB_MSAA_CNTL_SAMPLES(samples));
576 }
577
578 static void
579 tu6_emit_bin_size(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t flags)
580 {
581 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
582 const uint32_t bin_w = tiling->tile0.extent.width;
583 const uint32_t bin_h = tiling->tile0.extent.height;
584
585 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_BIN_CONTROL, 1);
586 tu_cs_emit(cs, A6XX_GRAS_BIN_CONTROL_BINW(bin_w) |
587 A6XX_GRAS_BIN_CONTROL_BINH(bin_h) | flags);
588
589 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BIN_CONTROL, 1);
590 tu_cs_emit(cs, A6XX_RB_BIN_CONTROL_BINW(bin_w) |
591 A6XX_RB_BIN_CONTROL_BINH(bin_h) | flags);
592
593 /* no flag for RB_BIN_CONTROL2... */
594 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BIN_CONTROL2, 1);
595 tu_cs_emit(cs, A6XX_RB_BIN_CONTROL2_BINW(bin_w) |
596 A6XX_RB_BIN_CONTROL2_BINH(bin_h));
597 }
598
599 static void
600 tu6_emit_render_cntl(struct tu_cmd_buffer *cmd,
601 struct tu_cs *cs,
602 bool binning)
603 {
604 uint32_t cntl = 0;
605 cntl |= A6XX_RB_RENDER_CNTL_UNK4;
606 if (binning)
607 cntl |= A6XX_RB_RENDER_CNTL_BINNING;
608
609 tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3);
610 tu_cs_emit(cs, 0x2);
611 tu_cs_emit(cs, REG_A6XX_RB_RENDER_CNTL);
612 tu_cs_emit(cs, cntl);
613 }
614
615 static void
616 tu6_emit_blit_scissor(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
617 {
618 const VkRect2D *render_area = &cmd->state.tiling_config.render_area;
619 const uint32_t x1 = render_area->offset.x;
620 const uint32_t y1 = render_area->offset.y;
621 const uint32_t x2 = x1 + render_area->extent.width - 1;
622 const uint32_t y2 = y1 + render_area->extent.height - 1;
623
624 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
625 tu_cs_emit(cs,
626 A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1));
627 tu_cs_emit(cs,
628 A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2));
629 }
630
631 static void
632 tu6_emit_blit_info(struct tu_cmd_buffer *cmd,
633 struct tu_cs *cs,
634 const struct tu_image_view *iview,
635 uint32_t gmem_offset,
636 uint32_t blit_info)
637 {
638 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1);
639 tu_cs_emit(cs, blit_info);
640
641 const struct tu_native_format *format =
642 tu6_get_native_format(iview->vk_format);
643 assert(format && format->rb >= 0);
644
645 enum a6xx_tile_mode tile_mode =
646 tu6_get_image_tile_mode(iview->image, iview->base_mip);
647 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 5);
648 tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_TILE_MODE(tile_mode) |
649 A6XX_RB_BLIT_DST_INFO_SAMPLES(tu_msaa_samples(iview->image->samples)) |
650 A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format->rb) |
651 A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(format->swap) |
652 COND(iview->image->layout.ubwc_size,
653 A6XX_RB_BLIT_DST_INFO_FLAGS));
654 tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer));
655 tu_cs_emit(cs, A6XX_RB_BLIT_DST_PITCH(tu_image_stride(iview->image, iview->base_mip)));
656 tu_cs_emit(cs, A6XX_RB_BLIT_DST_ARRAY_PITCH(iview->image->layout.layer_size));
657
658 if (iview->image->layout.ubwc_size) {
659 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST_LO, 3);
660 tu6_emit_flag_buffer(cs, iview);
661 }
662
663 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
664 tu_cs_emit(cs, gmem_offset);
665 }
666
667 static void
668 tu6_emit_blit_clear(struct tu_cmd_buffer *cmd,
669 struct tu_cs *cs,
670 const struct tu_image_view *iview,
671 uint32_t gmem_offset,
672 const VkClearValue *clear_value)
673 {
674 const struct tu_native_format *format =
675 tu6_get_native_format(iview->vk_format);
676 assert(format && format->rb >= 0);
677
678 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1);
679 tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format->rb));
680
681 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1);
682 tu_cs_emit(cs, A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf));
683
684 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
685 tu_cs_emit(cs, gmem_offset);
686
687 tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1);
688 tu_cs_emit(cs, 0);
689
690 uint32_t clear_vals[4] = { 0 };
691 tu_pack_clear_value(clear_value, iview->vk_format, clear_vals);
692
693 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
694 tu_cs_emit(cs, clear_vals[0]);
695 tu_cs_emit(cs, clear_vals[1]);
696 tu_cs_emit(cs, clear_vals[2]);
697 tu_cs_emit(cs, clear_vals[3]);
698 }
699
700 static void
701 tu6_emit_blit(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
702 {
703 tu6_emit_marker(cmd, cs);
704 tu6_emit_event_write(cmd, cs, BLIT, false);
705 tu6_emit_marker(cmd, cs);
706 }
707
708 static void
709 tu6_emit_window_scissor(struct tu_cmd_buffer *cmd,
710 struct tu_cs *cs,
711 uint32_t x1,
712 uint32_t y1,
713 uint32_t x2,
714 uint32_t y2)
715 {
716 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
717 tu_cs_emit(cs, A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
718 A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
719 tu_cs_emit(cs, A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
720 A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
721
722 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_RESOLVE_CNTL_1, 2);
723 tu_cs_emit(
724 cs, A6XX_GRAS_RESOLVE_CNTL_1_X(x1) | A6XX_GRAS_RESOLVE_CNTL_1_Y(y1));
725 tu_cs_emit(
726 cs, A6XX_GRAS_RESOLVE_CNTL_2_X(x2) | A6XX_GRAS_RESOLVE_CNTL_2_Y(y2));
727 }
728
729 static void
730 tu6_emit_window_offset(struct tu_cmd_buffer *cmd,
731 struct tu_cs *cs,
732 uint32_t x1,
733 uint32_t y1)
734 {
735 tu_cs_emit_pkt4(cs, REG_A6XX_RB_WINDOW_OFFSET, 1);
736 tu_cs_emit(cs, A6XX_RB_WINDOW_OFFSET_X(x1) | A6XX_RB_WINDOW_OFFSET_Y(y1));
737
738 tu_cs_emit_pkt4(cs, REG_A6XX_RB_WINDOW_OFFSET2, 1);
739 tu_cs_emit(cs,
740 A6XX_RB_WINDOW_OFFSET2_X(x1) | A6XX_RB_WINDOW_OFFSET2_Y(y1));
741
742 tu_cs_emit_pkt4(cs, REG_A6XX_SP_WINDOW_OFFSET, 1);
743 tu_cs_emit(cs, A6XX_SP_WINDOW_OFFSET_X(x1) | A6XX_SP_WINDOW_OFFSET_Y(y1));
744
745 tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
746 tu_cs_emit(
747 cs, A6XX_SP_TP_WINDOW_OFFSET_X(x1) | A6XX_SP_TP_WINDOW_OFFSET_Y(y1));
748 }
749
750 static bool
751 use_hw_binning(struct tu_cmd_buffer *cmd)
752 {
753 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
754
755 if (unlikely(cmd->device->physical_device->instance->debug_flags & TU_DEBUG_NOBIN))
756 return false;
757
758 return (tiling->tile_count.width * tiling->tile_count.height) > 2;
759 }
760
761 static void
762 tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
763 struct tu_cs *cs,
764 const struct tu_tile *tile)
765 {
766 tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
767 tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(0x7));
768
769 tu6_emit_marker(cmd, cs);
770 tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
771 tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_GMEM) | 0x10);
772 tu6_emit_marker(cmd, cs);
773
774 const uint32_t x1 = tile->begin.x;
775 const uint32_t y1 = tile->begin.y;
776 const uint32_t x2 = tile->end.x - 1;
777 const uint32_t y2 = tile->end.y - 1;
778 tu6_emit_window_scissor(cmd, cs, x1, y1, x2, y2);
779 tu6_emit_window_offset(cmd, cs, x1, y1);
780
781 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_OVERRIDE, 1);
782 tu_cs_emit(cs, A6XX_VPC_SO_OVERRIDE_SO_DISABLE);
783
784 if (use_hw_binning(cmd)) {
785 tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
786
787 tu_cs_emit_pkt7(cs, CP_SET_MODE, 1);
788 tu_cs_emit(cs, 0x0);
789
790 tu_cs_emit_pkt7(cs, CP_REG_TEST, 1);
791 tu_cs_emit(cs, A6XX_CP_REG_TEST_0_REG(OVERFLOW_FLAG_REG) |
792 A6XX_CP_REG_TEST_0_BIT(0) |
793 A6XX_CP_REG_TEST_0_UNK25);
794
795 tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
796 tu_cs_emit(cs, 0x10000000);
797 tu_cs_emit(cs, 11); /* conditionally execute next 11 dwords */
798
799 /* if (no overflow) */ {
800 tu_cs_emit_pkt7(cs, CP_SET_BIN_DATA5, 7);
801 tu_cs_emit(cs, cmd->state.tiling_config.pipe_sizes[tile->pipe] |
802 CP_SET_BIN_DATA5_0_VSC_N(tile->slot));
803 tu_cs_emit_qw(cs, cmd->vsc_data.iova + tile->pipe * cmd->vsc_data_pitch);
804 tu_cs_emit_qw(cs, cmd->vsc_data.iova + (tile->pipe * 4) + (32 * cmd->vsc_data_pitch));
805 tu_cs_emit_qw(cs, cmd->vsc_data2.iova + (tile->pipe * cmd->vsc_data2_pitch));
806
807 tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
808 tu_cs_emit(cs, 0x0);
809
810 /* use a NOP packet to skip over the 'else' side: */
811 tu_cs_emit_pkt7(cs, CP_NOP, 2);
812 } /* else */ {
813 tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
814 tu_cs_emit(cs, 0x1);
815 }
816
817 tu_cs_emit_pkt7(cs, CP_SET_MODE, 1);
818 tu_cs_emit(cs, 0x0);
819
820 tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8804, 1);
821 tu_cs_emit(cs, 0x0);
822
823 tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_UNKNOWN_B304, 1);
824 tu_cs_emit(cs, 0x0);
825
826 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_80A4, 1);
827 tu_cs_emit(cs, 0x0);
828 } else {
829 tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
830 tu_cs_emit(cs, 0x1);
831
832 tu_cs_emit_pkt7(cs, CP_SET_MODE, 1);
833 tu_cs_emit(cs, 0x0);
834 }
835 }
836
837 static void
838 tu6_emit_tile_load_attachment(struct tu_cmd_buffer *cmd,
839 struct tu_cs *cs,
840 uint32_t a,
841 uint32_t gmem_index)
842 {
843 const struct tu_framebuffer *fb = cmd->state.framebuffer;
844 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
845 const struct tu_attachment_state *attachments = cmd->state.attachments;
846
847 const struct tu_image_view *iview = fb->attachments[a].attachment;
848 const struct tu_attachment_state *att = attachments + a;
849 if (att->pending_clear_aspects) {
850 tu6_emit_blit_clear(cmd, cs, iview,
851 tiling->gmem_offsets[gmem_index],
852 &att->clear_value);
853 } else {
854 tu6_emit_blit_info(cmd, cs, iview,
855 tiling->gmem_offsets[gmem_index],
856 A6XX_RB_BLIT_INFO_UNK0 | A6XX_RB_BLIT_INFO_GMEM);
857 }
858
859 tu6_emit_blit(cmd, cs);
860 }
861
862 static void
863 tu6_emit_tile_load(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
864 {
865 const struct tu_subpass *subpass = cmd->state.subpass;
866
867 tu6_emit_blit_scissor(cmd, cs);
868
869 for (uint32_t i = 0; i < subpass->color_count; ++i) {
870 const uint32_t a = subpass->color_attachments[i].attachment;
871 if (a != VK_ATTACHMENT_UNUSED)
872 tu6_emit_tile_load_attachment(cmd, cs, a, i);
873 }
874
875 const uint32_t a = subpass->depth_stencil_attachment.attachment;
876 if (a != VK_ATTACHMENT_UNUSED)
877 tu6_emit_tile_load_attachment(cmd, cs, a, subpass->color_count);
878 }
879
880 static void
881 tu6_emit_store_attachment(struct tu_cmd_buffer *cmd,
882 struct tu_cs *cs,
883 uint32_t a,
884 uint32_t gmem_index)
885 {
886 const struct tu_framebuffer *fb = cmd->state.framebuffer;
887 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
888
889 if (a == VK_ATTACHMENT_UNUSED)
890 return;
891
892 tu6_emit_blit_info(cmd, cs, fb->attachments[a].attachment,
893 tiling->gmem_offsets[gmem_index], 0);
894 tu6_emit_blit(cmd, cs);
895 }
896
897 static void
898 tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
899 {
900 const struct tu_subpass *subpass = cmd->state.subpass;
901
902 tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
903 tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(0) |
904 CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
905 CP_SET_DRAW_STATE__0_GROUP_ID(0));
906 tu_cs_emit(cs, CP_SET_DRAW_STATE__1_ADDR_LO(0));
907 tu_cs_emit(cs, CP_SET_DRAW_STATE__2_ADDR_HI(0));
908
909 tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
910 tu_cs_emit(cs, 0x0);
911
912 tu6_emit_marker(cmd, cs);
913 tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
914 tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10);
915 tu6_emit_marker(cmd, cs);
916
917 tu6_emit_blit_scissor(cmd, cs);
918
919 for (uint32_t i = 0; i < subpass->color_count; ++i) {
920 tu6_emit_store_attachment(cmd, cs,
921 subpass->color_attachments[i].attachment,
922 i);
923 if (subpass->resolve_attachments) {
924 tu6_emit_store_attachment(cmd, cs,
925 subpass->resolve_attachments[i].attachment,
926 i);
927 }
928 }
929
930 tu6_emit_store_attachment(cmd, cs,
931 subpass->depth_stencil_attachment.attachment,
932 subpass->color_count);
933 }
934
935 static void
936 tu6_emit_restart_index(struct tu_cs *cs, uint32_t restart_index)
937 {
938 tu_cs_emit_pkt4(cs, REG_A6XX_PC_RESTART_INDEX, 1);
939 tu_cs_emit(cs, restart_index);
940 }
941
942 static void
943 tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
944 {
945 VkResult result = tu_cs_reserve_space(cmd->device, cs, 256);
946 if (result != VK_SUCCESS) {
947 cmd->record_result = result;
948 return;
949 }
950
951 tu6_emit_cache_flush(cmd, cs);
952
953 tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 0xfffff);
954
955 tu_cs_emit_write_reg(cs, REG_A6XX_RB_CCU_CNTL, 0x7c400004);
956 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8E04, 0x00100000);
957 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE04, 0x8);
958 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE00, 0);
959 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE0F, 0x3f);
960 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B605, 0x44);
961 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B600, 0x100000);
962 tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE00, 0x80);
963 tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE01, 0);
964
965 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9600, 0);
966 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8600, 0x880);
967 tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE04, 0);
968 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE03, 0x00000410);
969 tu_cs_emit_write_reg(cs, REG_A6XX_SP_IBO_COUNT, 0);
970 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B182, 0);
971 tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BB11, 0);
972 tu_cs_emit_write_reg(cs, REG_A6XX_UCHE_UNKNOWN_0E12, 0x3200000);
973 tu_cs_emit_write_reg(cs, REG_A6XX_UCHE_CLIENT_PF, 4);
974 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8E01, 0x0);
975 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AB00, 0x5);
976 tu_cs_emit_write_reg(cs, REG_A6XX_VFD_UNKNOWN_A009, 0x00000001);
977 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8811, 0x00000010);
978 tu_cs_emit_write_reg(cs, REG_A6XX_PC_MODE_CNTL, 0x1f);
979
980 tu_cs_emit_write_reg(cs, REG_A6XX_RB_SRGB_CNTL, 0);
981
982 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8101, 0);
983 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SAMPLE_CNTL, 0);
984 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8110, 0);
985
986 tu_cs_emit_write_reg(cs, REG_A6XX_RB_RENDER_CONTROL0, 0x401);
987 tu_cs_emit_write_reg(cs, REG_A6XX_RB_RENDER_CONTROL1, 0);
988 tu_cs_emit_write_reg(cs, REG_A6XX_RB_FS_OUTPUT_CNTL0, 0);
989 tu_cs_emit_write_reg(cs, REG_A6XX_RB_SAMPLE_CNTL, 0);
990 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8818, 0);
991 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8819, 0);
992 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881A, 0);
993 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881B, 0);
994 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881C, 0);
995 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881D, 0);
996 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881E, 0);
997 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_88F0, 0);
998
999 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9101, 0xffff00);
1000 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9107, 0);
1001
1002 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9236, 1);
1003 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9300, 0);
1004
1005 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_SO_OVERRIDE,
1006 A6XX_VPC_SO_OVERRIDE_SO_DISABLE);
1007
1008 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9801, 0);
1009 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9806, 0);
1010 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9980, 0);
1011
1012 tu_cs_emit_write_reg(cs, REG_A6XX_PC_PRIMITIVE_CNTL_6, 0);
1013 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9B07, 0);
1014
1015 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A81B, 0);
1016
1017 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B183, 0);
1018
1019 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8099, 0);
1020 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_809B, 0);
1021 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A0, 2);
1022 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80AF, 0);
1023 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9210, 0);
1024 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9211, 0);
1025 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9602, 0);
1026 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9981, 0x3);
1027 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9E72, 0);
1028 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9108, 0x3);
1029 tu_cs_emit_write_reg(cs, REG_A6XX_SP_TP_UNKNOWN_B304, 0);
1030 tu_cs_emit_write_reg(cs, REG_A6XX_SP_TP_UNKNOWN_B309, 0x000000a2);
1031 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8804, 0);
1032 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A4, 0);
1033 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A5, 0);
1034 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A6, 0);
1035 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8805, 0);
1036 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8806, 0);
1037 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8878, 0);
1038 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8879, 0);
1039 tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_CONTROL_5_REG, 0xfc);
1040
1041 tu6_emit_marker(cmd, cs);
1042
1043 tu_cs_emit_write_reg(cs, REG_A6XX_VFD_MODE_CNTL, 0x00000000);
1044
1045 tu_cs_emit_write_reg(cs, REG_A6XX_VFD_UNKNOWN_A008, 0);
1046
1047 tu_cs_emit_write_reg(cs, REG_A6XX_PC_MODE_CNTL, 0x0000001f);
1048
1049 /* we don't use this yet.. probably best to disable.. */
1050 tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
1051 tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(0) |
1052 CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
1053 CP_SET_DRAW_STATE__0_GROUP_ID(0));
1054 tu_cs_emit(cs, CP_SET_DRAW_STATE__1_ADDR_LO(0));
1055 tu_cs_emit(cs, CP_SET_DRAW_STATE__2_ADDR_HI(0));
1056
1057 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_BASE_LO(0), 3);
1058 tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUFFER_BASE_LO_0 */
1059 tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUFFER_BASE_HI_0 */
1060 tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUFFER_SIZE_0 */
1061
1062 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_FLUSH_BASE_LO(0), 2);
1063 tu_cs_emit(cs, 0x00000000); /* VPC_SO_FLUSH_BASE_LO_0 */
1064 tu_cs_emit(cs, 0x00000000); /* VPC_SO_FLUSH_BASE_HI_0 */
1065
1066 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUF_CNTL, 1);
1067 tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUF_CNTL */
1068
1069 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(0), 1);
1070 tu_cs_emit(cs, 0x00000000); /* UNKNOWN_E2AB */
1071
1072 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_BASE_LO(1), 3);
1073 tu_cs_emit(cs, 0x00000000);
1074 tu_cs_emit(cs, 0x00000000);
1075 tu_cs_emit(cs, 0x00000000);
1076
1077 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(1), 6);
1078 tu_cs_emit(cs, 0x00000000);
1079 tu_cs_emit(cs, 0x00000000);
1080 tu_cs_emit(cs, 0x00000000);
1081 tu_cs_emit(cs, 0x00000000);
1082 tu_cs_emit(cs, 0x00000000);
1083 tu_cs_emit(cs, 0x00000000);
1084
1085 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(2), 6);
1086 tu_cs_emit(cs, 0x00000000);
1087 tu_cs_emit(cs, 0x00000000);
1088 tu_cs_emit(cs, 0x00000000);
1089 tu_cs_emit(cs, 0x00000000);
1090 tu_cs_emit(cs, 0x00000000);
1091 tu_cs_emit(cs, 0x00000000);
1092
1093 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(3), 3);
1094 tu_cs_emit(cs, 0x00000000);
1095 tu_cs_emit(cs, 0x00000000);
1096 tu_cs_emit(cs, 0x00000000);
1097
1098 tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_CTRL_REG0, 1);
1099 tu_cs_emit(cs, 0x00000000);
1100
1101 tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_CTRL_REG0, 1);
1102 tu_cs_emit(cs, 0x00000000);
1103
1104 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_CNTL, 1);
1105 tu_cs_emit(cs, 0x00000000);
1106
1107 tu_cs_emit_pkt4(cs, REG_A6XX_RB_LRZ_CNTL, 1);
1108 tu_cs_emit(cs, 0x00000000);
1109
1110 tu_cs_sanity_check(cs);
1111 }
1112
1113 static void
1114 tu6_cache_flush(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
1115 {
1116 unsigned seqno;
1117
1118 seqno = tu6_emit_event_write(cmd, cs, CACHE_FLUSH_AND_INV_EVENT, true);
1119
1120 tu_cs_emit_pkt7(cs, CP_WAIT_REG_MEM, 6);
1121 tu_cs_emit(cs, 0x00000013);
1122 tu_cs_emit_qw(cs, cmd->scratch_bo.iova);
1123 tu_cs_emit(cs, seqno);
1124 tu_cs_emit(cs, 0xffffffff);
1125 tu_cs_emit(cs, 0x00000010);
1126
1127 seqno = tu6_emit_event_write(cmd, cs, CACHE_FLUSH_TS, true);
1128
1129 tu_cs_emit_pkt7(cs, CP_UNK_A6XX_14, 4);
1130 tu_cs_emit(cs, 0x00000000);
1131 tu_cs_emit_qw(cs, cmd->scratch_bo.iova);
1132 tu_cs_emit(cs, seqno);
1133 }
1134
1135 static void
1136 update_vsc_pipe(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
1137 {
1138 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
1139
1140 tu_cs_emit_pkt4(cs, REG_A6XX_VSC_BIN_SIZE, 3);
1141 tu_cs_emit(cs, A6XX_VSC_BIN_SIZE_WIDTH(tiling->tile0.extent.width) |
1142 A6XX_VSC_BIN_SIZE_HEIGHT(tiling->tile0.extent.height));
1143 tu_cs_emit_qw(cs, cmd->vsc_data.iova + 32 * cmd->vsc_data_pitch); /* VSC_SIZE_ADDRESS_LO/HI */
1144
1145 tu_cs_emit_pkt4(cs, REG_A6XX_VSC_BIN_COUNT, 1);
1146 tu_cs_emit(cs, A6XX_VSC_BIN_COUNT_NX(tiling->tile_count.width) |
1147 A6XX_VSC_BIN_COUNT_NY(tiling->tile_count.height));
1148
1149 tu_cs_emit_pkt4(cs, REG_A6XX_VSC_PIPE_CONFIG_REG(0), 32);
1150 for (unsigned i = 0; i < 32; i++)
1151 tu_cs_emit(cs, tiling->pipe_config[i]);
1152
1153 tu_cs_emit_pkt4(cs, REG_A6XX_VSC_PIPE_DATA2_ADDRESS_LO, 4);
1154 tu_cs_emit_qw(cs, cmd->vsc_data2.iova);
1155 tu_cs_emit(cs, cmd->vsc_data2_pitch);
1156 tu_cs_emit(cs, cmd->vsc_data2.size);
1157
1158 tu_cs_emit_pkt4(cs, REG_A6XX_VSC_PIPE_DATA_ADDRESS_LO, 4);
1159 tu_cs_emit_qw(cs, cmd->vsc_data.iova);
1160 tu_cs_emit(cs, cmd->vsc_data_pitch);
1161 tu_cs_emit(cs, cmd->vsc_data.size);
1162 }
1163
1164 static void
1165 emit_vsc_overflow_test(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
1166 {
1167 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
1168 const uint32_t used_pipe_count =
1169 tiling->pipe_count.width * tiling->pipe_count.height;
1170
1171 /* Clear vsc_scratch: */
1172 tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 3);
1173 tu_cs_emit_qw(cs, cmd->scratch_bo.iova + VSC_SCRATCH);
1174 tu_cs_emit(cs, 0x0);
1175
1176 /* Check for overflow, write vsc_scratch if detected: */
1177 for (int i = 0; i < used_pipe_count; i++) {
1178 tu_cs_emit_pkt7(cs, CP_COND_WRITE5, 8);
1179 tu_cs_emit(cs, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
1180 CP_COND_WRITE5_0_WRITE_MEMORY);
1181 tu_cs_emit(cs, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_SIZE_REG(i)));
1182 tu_cs_emit(cs, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
1183 tu_cs_emit(cs, CP_COND_WRITE5_3_REF(cmd->vsc_data_pitch));
1184 tu_cs_emit(cs, CP_COND_WRITE5_4_MASK(~0));
1185 tu_cs_emit_qw(cs, cmd->scratch_bo.iova + VSC_SCRATCH);
1186 tu_cs_emit(cs, CP_COND_WRITE5_7_WRITE_DATA(1 + cmd->vsc_data_pitch));
1187
1188 tu_cs_emit_pkt7(cs, CP_COND_WRITE5, 8);
1189 tu_cs_emit(cs, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
1190 CP_COND_WRITE5_0_WRITE_MEMORY);
1191 tu_cs_emit(cs, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_SIZE2_REG(i)));
1192 tu_cs_emit(cs, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
1193 tu_cs_emit(cs, CP_COND_WRITE5_3_REF(cmd->vsc_data2_pitch));
1194 tu_cs_emit(cs, CP_COND_WRITE5_4_MASK(~0));
1195 tu_cs_emit_qw(cs, cmd->scratch_bo.iova + VSC_SCRATCH);
1196 tu_cs_emit(cs, CP_COND_WRITE5_7_WRITE_DATA(3 + cmd->vsc_data2_pitch));
1197 }
1198
1199 tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
1200
1201 tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
1202
1203 tu_cs_emit_pkt7(cs, CP_MEM_TO_REG, 3);
1204 tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(OVERFLOW_FLAG_REG) |
1205 CP_MEM_TO_REG_0_CNT(1 - 1));
1206 tu_cs_emit_qw(cs, cmd->scratch_bo.iova + VSC_SCRATCH);
1207
1208 /*
1209 * This is a bit awkward, we really want a way to invert the
1210 * CP_REG_TEST/CP_COND_REG_EXEC logic, so that we can conditionally
1211 * execute cmds to use hwbinning when a bit is *not* set. This
1212 * dance is to invert OVERFLOW_FLAG_REG
1213 *
1214 * A CP_NOP packet is used to skip executing the 'else' clause
1215 * if (b0 set)..
1216 */
1217
1218 /* b0 will be set if VSC_DATA or VSC_DATA2 overflow: */
1219 tu_cs_emit_pkt7(cs, CP_REG_TEST, 1);
1220 tu_cs_emit(cs, A6XX_CP_REG_TEST_0_REG(OVERFLOW_FLAG_REG) |
1221 A6XX_CP_REG_TEST_0_BIT(0) |
1222 A6XX_CP_REG_TEST_0_UNK25);
1223
1224 tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
1225 tu_cs_emit(cs, 0x10000000);
1226 tu_cs_emit(cs, 7); /* conditionally execute next 7 dwords */
1227
1228 /* if (b0 set) */ {
1229 /*
1230 * On overflow, mirror the value to control->vsc_overflow
1231 * which CPU is checking to detect overflow (see
1232 * check_vsc_overflow())
1233 */
1234 tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
1235 tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(OVERFLOW_FLAG_REG) |
1236 CP_REG_TO_MEM_0_CNT(1 - 1));
1237 tu_cs_emit_qw(cs, cmd->scratch_bo.iova + VSC_OVERFLOW);
1238
1239 tu_cs_emit_pkt4(cs, OVERFLOW_FLAG_REG, 1);
1240 tu_cs_emit(cs, 0x0);
1241
1242 tu_cs_emit_pkt7(cs, CP_NOP, 2); /* skip 'else' when 'if' is taken */
1243 } /* else */ {
1244 tu_cs_emit_pkt4(cs, OVERFLOW_FLAG_REG, 1);
1245 tu_cs_emit(cs, 0x1);
1246 }
1247 }
1248
1249 static void
1250 tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
1251 {
1252 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
1253
1254 uint32_t x1 = tiling->tile0.offset.x;
1255 uint32_t y1 = tiling->tile0.offset.y;
1256 uint32_t x2 = tiling->render_area.offset.x + tiling->render_area.extent.width - 1;
1257 uint32_t y2 = tiling->render_area.offset.x + tiling->render_area.extent.height - 1;
1258
1259 tu6_emit_window_scissor(cmd, cs, x1, y1, x2, y2);
1260
1261 tu6_emit_marker(cmd, cs);
1262 tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
1263 tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BINNING));
1264 tu6_emit_marker(cmd, cs);
1265
1266 tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
1267 tu_cs_emit(cs, 0x1);
1268
1269 tu_cs_emit_pkt7(cs, CP_SET_MODE, 1);
1270 tu_cs_emit(cs, 0x1);
1271
1272 tu_cs_emit_wfi(cs);
1273
1274 tu_cs_emit_pkt4(cs, REG_A6XX_VFD_MODE_CNTL, 1);
1275 tu_cs_emit(cs, A6XX_VFD_MODE_CNTL_BINNING_PASS);
1276
1277 update_vsc_pipe(cmd, cs);
1278
1279 tu_cs_emit_pkt4(cs, REG_A6XX_PC_UNKNOWN_9805, 1);
1280 tu_cs_emit(cs, 0x1);
1281
1282 tu_cs_emit_pkt4(cs, REG_A6XX_SP_UNKNOWN_A0F8, 1);
1283 tu_cs_emit(cs, 0x1);
1284
1285 tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 1);
1286 tu_cs_emit(cs, UNK_2C);
1287
1288 tu_cs_emit_pkt4(cs, REG_A6XX_RB_WINDOW_OFFSET, 1);
1289 tu_cs_emit(cs, A6XX_RB_WINDOW_OFFSET_X(0) |
1290 A6XX_RB_WINDOW_OFFSET_Y(0));
1291
1292 tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
1293 tu_cs_emit(cs, A6XX_SP_TP_WINDOW_OFFSET_X(0) |
1294 A6XX_SP_TP_WINDOW_OFFSET_Y(0));
1295
1296 /* emit IB to binning drawcmds: */
1297 tu_cs_emit_call(cs, &cmd->draw_cs);
1298
1299 tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
1300 tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(0) |
1301 CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
1302 CP_SET_DRAW_STATE__0_GROUP_ID(0));
1303 tu_cs_emit(cs, CP_SET_DRAW_STATE__1_ADDR_LO(0));
1304 tu_cs_emit(cs, CP_SET_DRAW_STATE__2_ADDR_HI(0));
1305
1306 tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 1);
1307 tu_cs_emit(cs, UNK_2D);
1308
1309 tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE, false);
1310 tu6_cache_flush(cmd, cs);
1311
1312 tu_cs_emit_wfi(cs);
1313
1314 tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
1315
1316 emit_vsc_overflow_test(cmd, cs);
1317
1318 tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
1319 tu_cs_emit(cs, 0x0);
1320
1321 tu_cs_emit_pkt7(cs, CP_SET_MODE, 1);
1322 tu_cs_emit(cs, 0x0);
1323
1324 tu_cs_emit_wfi(cs);
1325
1326 tu_cs_emit_pkt4(cs, REG_A6XX_RB_CCU_CNTL, 1);
1327 tu_cs_emit(cs, 0x7c400004);
1328
1329 cmd->wait_for_idle = false;
1330 }
1331
1332 static void
1333 tu6_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
1334 {
1335 VkResult result = tu_cs_reserve_space(cmd->device, cs, 1024);
1336 if (result != VK_SUCCESS) {
1337 cmd->record_result = result;
1338 return;
1339 }
1340
1341 tu6_emit_lrz_flush(cmd, cs);
1342
1343 /* lrz clear? */
1344
1345 tu6_emit_cache_flush(cmd, cs);
1346
1347 tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
1348 tu_cs_emit(cs, 0x0);
1349
1350 /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
1351 tu6_emit_wfi(cmd, cs);
1352 tu_cs_emit_pkt4(cs, REG_A6XX_RB_CCU_CNTL, 1);
1353 tu_cs_emit(cs, 0x7c400004); /* RB_CCU_CNTL */
1354
1355 tu6_emit_zs(cmd, cs);
1356 tu6_emit_mrt(cmd, cs);
1357 tu6_emit_msaa(cmd, cs);
1358
1359 if (use_hw_binning(cmd)) {
1360 tu6_emit_bin_size(cmd, cs, A6XX_RB_BIN_CONTROL_BINNING_PASS | 0x6000000);
1361
1362 tu6_emit_render_cntl(cmd, cs, true);
1363
1364 tu6_emit_binning_pass(cmd, cs);
1365
1366 tu6_emit_bin_size(cmd, cs, A6XX_RB_BIN_CONTROL_USE_VIZ | 0x6000000);
1367
1368 tu_cs_emit_pkt4(cs, REG_A6XX_VFD_MODE_CNTL, 1);
1369 tu_cs_emit(cs, 0x0);
1370
1371 tu_cs_emit_pkt4(cs, REG_A6XX_PC_UNKNOWN_9805, 1);
1372 tu_cs_emit(cs, 0x1);
1373
1374 tu_cs_emit_pkt4(cs, REG_A6XX_SP_UNKNOWN_A0F8, 1);
1375 tu_cs_emit(cs, 0x1);
1376
1377 tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
1378 tu_cs_emit(cs, 0x1);
1379 } else {
1380 tu6_emit_bin_size(cmd, cs, 0x6000000);
1381 }
1382
1383 tu6_emit_render_cntl(cmd, cs, false);
1384
1385 tu_cs_sanity_check(cs);
1386 }
1387
1388 static void
1389 tu6_render_tile(struct tu_cmd_buffer *cmd,
1390 struct tu_cs *cs,
1391 const struct tu_tile *tile)
1392 {
1393 const uint32_t render_tile_space = 256 + tu_cs_get_call_size(&cmd->draw_cs);
1394 VkResult result = tu_cs_reserve_space(cmd->device, cs, render_tile_space);
1395 if (result != VK_SUCCESS) {
1396 cmd->record_result = result;
1397 return;
1398 }
1399
1400 tu6_emit_tile_select(cmd, cs, tile);
1401 tu_cs_emit_ib(cs, &cmd->state.tile_load_ib);
1402
1403 tu_cs_emit_call(cs, &cmd->draw_cs);
1404 cmd->wait_for_idle = true;
1405
1406 if (use_hw_binning(cmd)) {
1407 tu_cs_emit_pkt7(cs, CP_REG_TEST, 1);
1408 tu_cs_emit(cs, A6XX_CP_REG_TEST_0_REG(OVERFLOW_FLAG_REG) |
1409 A6XX_CP_REG_TEST_0_BIT(0) |
1410 A6XX_CP_REG_TEST_0_UNK25);
1411
1412 tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
1413 tu_cs_emit(cs, 0x10000000);
1414 tu_cs_emit(cs, 2); /* conditionally execute next 2 dwords */
1415
1416 /* if (no overflow) */ {
1417 tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
1418 tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(0x5) | 0x10);
1419 }
1420 }
1421
1422 tu_cs_emit_ib(cs, &cmd->state.tile_store_ib);
1423
1424 tu_cs_sanity_check(cs);
1425 }
1426
1427 static void
1428 tu6_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
1429 {
1430 VkResult result = tu_cs_reserve_space(cmd->device, cs, 16);
1431 if (result != VK_SUCCESS) {
1432 cmd->record_result = result;
1433 return;
1434 }
1435
1436 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_CNTL, 1);
1437 tu_cs_emit(cs, A6XX_GRAS_LRZ_CNTL_ENABLE | A6XX_GRAS_LRZ_CNTL_UNK3);
1438
1439 tu6_emit_lrz_flush(cmd, cs);
1440
1441 tu6_emit_event_write(cmd, cs, CACHE_FLUSH_TS, true);
1442
1443 tu_cs_sanity_check(cs);
1444 }
1445
1446 static void
1447 tu_cmd_render_tiles(struct tu_cmd_buffer *cmd)
1448 {
1449 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
1450
1451 tu6_render_begin(cmd, &cmd->cs);
1452
1453 for (uint32_t y = 0; y < tiling->tile_count.height; y++) {
1454 for (uint32_t x = 0; x < tiling->tile_count.width; x++) {
1455 struct tu_tile tile;
1456 tu_tiling_config_get_tile(tiling, cmd->device, x, y, &tile);
1457 tu6_render_tile(cmd, &cmd->cs, &tile);
1458 }
1459 }
1460
1461 tu6_render_end(cmd, &cmd->cs);
1462 }
1463
1464 static void
1465 tu_cmd_prepare_tile_load_ib(struct tu_cmd_buffer *cmd)
1466 {
1467 const uint32_t tile_load_space = 16 + 32 * MAX_RTS;
1468 const struct tu_subpass *subpass = cmd->state.subpass;
1469 struct tu_attachment_state *attachments = cmd->state.attachments;
1470 struct tu_cs sub_cs;
1471
1472 VkResult result = tu_cs_begin_sub_stream(cmd->device, &cmd->tile_cs,
1473 tile_load_space, &sub_cs);
1474 if (result != VK_SUCCESS) {
1475 cmd->record_result = result;
1476 return;
1477 }
1478
1479 /* emit to tile-load sub_cs */
1480 tu6_emit_tile_load(cmd, &sub_cs);
1481
1482 cmd->state.tile_load_ib = tu_cs_end_sub_stream(&cmd->tile_cs, &sub_cs);
1483
1484 for (uint32_t i = 0; i < subpass->color_count; ++i) {
1485 const uint32_t a = subpass->color_attachments[i].attachment;
1486 if (a != VK_ATTACHMENT_UNUSED)
1487 attachments[a].pending_clear_aspects = 0;
1488 }
1489 }
1490
1491 static void
1492 tu_cmd_prepare_tile_store_ib(struct tu_cmd_buffer *cmd)
1493 {
1494 const uint32_t tile_store_space = 32 + 32 * MAX_RTS;
1495 struct tu_cs sub_cs;
1496
1497 VkResult result = tu_cs_begin_sub_stream(cmd->device, &cmd->tile_cs,
1498 tile_store_space, &sub_cs);
1499 if (result != VK_SUCCESS) {
1500 cmd->record_result = result;
1501 return;
1502 }
1503
1504 /* emit to tile-store sub_cs */
1505 tu6_emit_tile_store(cmd, &sub_cs);
1506
1507 cmd->state.tile_store_ib = tu_cs_end_sub_stream(&cmd->tile_cs, &sub_cs);
1508 }
1509
1510 static void
1511 tu_cmd_update_tiling_config(struct tu_cmd_buffer *cmd,
1512 const VkRect2D *render_area)
1513 {
1514 const struct tu_device *dev = cmd->device;
1515 const struct tu_render_pass *pass = cmd->state.pass;
1516 const struct tu_subpass *subpass = cmd->state.subpass;
1517 struct tu_tiling_config *tiling = &cmd->state.tiling_config;
1518
1519 uint32_t buffer_cpp[MAX_RTS + 2];
1520 uint32_t buffer_count = 0;
1521
1522 for (uint32_t i = 0; i < subpass->color_count; ++i) {
1523 const uint32_t a = subpass->color_attachments[i].attachment;
1524 if (a == VK_ATTACHMENT_UNUSED) {
1525 buffer_cpp[buffer_count++] = 0;
1526 continue;
1527 }
1528
1529 const struct tu_render_pass_attachment *att = &pass->attachments[a];
1530 buffer_cpp[buffer_count++] =
1531 vk_format_get_blocksize(att->format) * att->samples;
1532 }
1533
1534 if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
1535 const uint32_t a = subpass->depth_stencil_attachment.attachment;
1536 const struct tu_render_pass_attachment *att = &pass->attachments[a];
1537
1538 /* TODO */
1539 assert(att->format != VK_FORMAT_D32_SFLOAT_S8_UINT);
1540
1541 buffer_cpp[buffer_count++] =
1542 vk_format_get_blocksize(att->format) * att->samples;
1543 }
1544
1545 tu_tiling_config_update(tiling, dev, buffer_cpp, buffer_count,
1546 render_area);
1547 }
1548
1549 const struct tu_dynamic_state default_dynamic_state = {
1550 .viewport =
1551 {
1552 .count = 0,
1553 },
1554 .scissor =
1555 {
1556 .count = 0,
1557 },
1558 .line_width = 1.0f,
1559 .depth_bias =
1560 {
1561 .bias = 0.0f,
1562 .clamp = 0.0f,
1563 .slope = 0.0f,
1564 },
1565 .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f },
1566 .depth_bounds =
1567 {
1568 .min = 0.0f,
1569 .max = 1.0f,
1570 },
1571 .stencil_compare_mask =
1572 {
1573 .front = ~0u,
1574 .back = ~0u,
1575 },
1576 .stencil_write_mask =
1577 {
1578 .front = ~0u,
1579 .back = ~0u,
1580 },
1581 .stencil_reference =
1582 {
1583 .front = 0u,
1584 .back = 0u,
1585 },
1586 };
1587
1588 static void UNUSED /* FINISHME */
1589 tu_bind_dynamic_state(struct tu_cmd_buffer *cmd_buffer,
1590 const struct tu_dynamic_state *src)
1591 {
1592 struct tu_dynamic_state *dest = &cmd_buffer->state.dynamic;
1593 uint32_t copy_mask = src->mask;
1594 uint32_t dest_mask = 0;
1595
1596 tu_use_args(cmd_buffer); /* FINISHME */
1597
1598 /* Make sure to copy the number of viewports/scissors because they can
1599 * only be specified at pipeline creation time.
1600 */
1601 dest->viewport.count = src->viewport.count;
1602 dest->scissor.count = src->scissor.count;
1603 dest->discard_rectangle.count = src->discard_rectangle.count;
1604
1605 if (copy_mask & TU_DYNAMIC_VIEWPORT) {
1606 if (memcmp(&dest->viewport.viewports, &src->viewport.viewports,
1607 src->viewport.count * sizeof(VkViewport))) {
1608 typed_memcpy(dest->viewport.viewports, src->viewport.viewports,
1609 src->viewport.count);
1610 dest_mask |= TU_DYNAMIC_VIEWPORT;
1611 }
1612 }
1613
1614 if (copy_mask & TU_DYNAMIC_SCISSOR) {
1615 if (memcmp(&dest->scissor.scissors, &src->scissor.scissors,
1616 src->scissor.count * sizeof(VkRect2D))) {
1617 typed_memcpy(dest->scissor.scissors, src->scissor.scissors,
1618 src->scissor.count);
1619 dest_mask |= TU_DYNAMIC_SCISSOR;
1620 }
1621 }
1622
1623 if (copy_mask & TU_DYNAMIC_LINE_WIDTH) {
1624 if (dest->line_width != src->line_width) {
1625 dest->line_width = src->line_width;
1626 dest_mask |= TU_DYNAMIC_LINE_WIDTH;
1627 }
1628 }
1629
1630 if (copy_mask & TU_DYNAMIC_DEPTH_BIAS) {
1631 if (memcmp(&dest->depth_bias, &src->depth_bias,
1632 sizeof(src->depth_bias))) {
1633 dest->depth_bias = src->depth_bias;
1634 dest_mask |= TU_DYNAMIC_DEPTH_BIAS;
1635 }
1636 }
1637
1638 if (copy_mask & TU_DYNAMIC_BLEND_CONSTANTS) {
1639 if (memcmp(&dest->blend_constants, &src->blend_constants,
1640 sizeof(src->blend_constants))) {
1641 typed_memcpy(dest->blend_constants, src->blend_constants, 4);
1642 dest_mask |= TU_DYNAMIC_BLEND_CONSTANTS;
1643 }
1644 }
1645
1646 if (copy_mask & TU_DYNAMIC_DEPTH_BOUNDS) {
1647 if (memcmp(&dest->depth_bounds, &src->depth_bounds,
1648 sizeof(src->depth_bounds))) {
1649 dest->depth_bounds = src->depth_bounds;
1650 dest_mask |= TU_DYNAMIC_DEPTH_BOUNDS;
1651 }
1652 }
1653
1654 if (copy_mask & TU_DYNAMIC_STENCIL_COMPARE_MASK) {
1655 if (memcmp(&dest->stencil_compare_mask, &src->stencil_compare_mask,
1656 sizeof(src->stencil_compare_mask))) {
1657 dest->stencil_compare_mask = src->stencil_compare_mask;
1658 dest_mask |= TU_DYNAMIC_STENCIL_COMPARE_MASK;
1659 }
1660 }
1661
1662 if (copy_mask & TU_DYNAMIC_STENCIL_WRITE_MASK) {
1663 if (memcmp(&dest->stencil_write_mask, &src->stencil_write_mask,
1664 sizeof(src->stencil_write_mask))) {
1665 dest->stencil_write_mask = src->stencil_write_mask;
1666 dest_mask |= TU_DYNAMIC_STENCIL_WRITE_MASK;
1667 }
1668 }
1669
1670 if (copy_mask & TU_DYNAMIC_STENCIL_REFERENCE) {
1671 if (memcmp(&dest->stencil_reference, &src->stencil_reference,
1672 sizeof(src->stencil_reference))) {
1673 dest->stencil_reference = src->stencil_reference;
1674 dest_mask |= TU_DYNAMIC_STENCIL_REFERENCE;
1675 }
1676 }
1677
1678 if (copy_mask & TU_DYNAMIC_DISCARD_RECTANGLE) {
1679 if (memcmp(&dest->discard_rectangle.rectangles,
1680 &src->discard_rectangle.rectangles,
1681 src->discard_rectangle.count * sizeof(VkRect2D))) {
1682 typed_memcpy(dest->discard_rectangle.rectangles,
1683 src->discard_rectangle.rectangles,
1684 src->discard_rectangle.count);
1685 dest_mask |= TU_DYNAMIC_DISCARD_RECTANGLE;
1686 }
1687 }
1688 }
1689
1690 static VkResult
1691 tu_create_cmd_buffer(struct tu_device *device,
1692 struct tu_cmd_pool *pool,
1693 VkCommandBufferLevel level,
1694 VkCommandBuffer *pCommandBuffer)
1695 {
1696 struct tu_cmd_buffer *cmd_buffer;
1697 cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8,
1698 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1699 if (cmd_buffer == NULL)
1700 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1701
1702 cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1703 cmd_buffer->device = device;
1704 cmd_buffer->pool = pool;
1705 cmd_buffer->level = level;
1706
1707 if (pool) {
1708 list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
1709 cmd_buffer->queue_family_index = pool->queue_family_index;
1710
1711 } else {
1712 /* Init the pool_link so we can safely call list_del when we destroy
1713 * the command buffer
1714 */
1715 list_inithead(&cmd_buffer->pool_link);
1716 cmd_buffer->queue_family_index = TU_QUEUE_GENERAL;
1717 }
1718
1719 tu_bo_list_init(&cmd_buffer->bo_list);
1720 tu_cs_init(&cmd_buffer->cs, TU_CS_MODE_GROW, 4096);
1721 tu_cs_init(&cmd_buffer->draw_cs, TU_CS_MODE_GROW, 4096);
1722 tu_cs_init(&cmd_buffer->draw_state, TU_CS_MODE_SUB_STREAM, 2048);
1723 tu_cs_init(&cmd_buffer->tile_cs, TU_CS_MODE_SUB_STREAM, 1024);
1724
1725 *pCommandBuffer = tu_cmd_buffer_to_handle(cmd_buffer);
1726
1727 list_inithead(&cmd_buffer->upload.list);
1728
1729 cmd_buffer->marker_reg = REG_A6XX_CP_SCRATCH_REG(
1730 cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY ? 7 : 6);
1731
1732 VkResult result = tu_bo_init_new(device, &cmd_buffer->scratch_bo, 0x1000);
1733 if (result != VK_SUCCESS)
1734 return result;
1735
1736 #define VSC_DATA_SIZE(pitch) ((pitch) * 32 + 0x100) /* extra size to store VSC_SIZE */
1737 #define VSC_DATA2_SIZE(pitch) ((pitch) * 32)
1738
1739 /* TODO: resize on overflow or compute a max size from # of vertices in renderpass?? */
1740 cmd_buffer->vsc_data_pitch = 0x440 * 4;
1741 cmd_buffer->vsc_data2_pitch = 0x1040 * 4;
1742
1743 result = tu_bo_init_new(device, &cmd_buffer->vsc_data, VSC_DATA_SIZE(cmd_buffer->vsc_data_pitch));
1744 if (result != VK_SUCCESS)
1745 goto fail_vsc_data;
1746
1747 result = tu_bo_init_new(device, &cmd_buffer->vsc_data2, VSC_DATA2_SIZE(cmd_buffer->vsc_data2_pitch));
1748 if (result != VK_SUCCESS)
1749 goto fail_vsc_data2;
1750
1751 return VK_SUCCESS;
1752
1753 fail_vsc_data2:
1754 tu_bo_finish(cmd_buffer->device, &cmd_buffer->vsc_data);
1755 fail_vsc_data:
1756 tu_bo_finish(cmd_buffer->device, &cmd_buffer->scratch_bo);
1757 return result;
1758 }
1759
1760 static void
1761 tu_cmd_buffer_destroy(struct tu_cmd_buffer *cmd_buffer)
1762 {
1763 tu_bo_finish(cmd_buffer->device, &cmd_buffer->scratch_bo);
1764 tu_bo_finish(cmd_buffer->device, &cmd_buffer->vsc_data);
1765 tu_bo_finish(cmd_buffer->device, &cmd_buffer->vsc_data2);
1766
1767 list_del(&cmd_buffer->pool_link);
1768
1769 for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++)
1770 free(cmd_buffer->descriptors[i].push_set.set.mapped_ptr);
1771
1772 tu_cs_finish(cmd_buffer->device, &cmd_buffer->cs);
1773 tu_cs_finish(cmd_buffer->device, &cmd_buffer->draw_cs);
1774 tu_cs_finish(cmd_buffer->device, &cmd_buffer->draw_state);
1775 tu_cs_finish(cmd_buffer->device, &cmd_buffer->tile_cs);
1776
1777 tu_bo_list_destroy(&cmd_buffer->bo_list);
1778 vk_free(&cmd_buffer->pool->alloc, cmd_buffer);
1779 }
1780
1781 static VkResult
1782 tu_reset_cmd_buffer(struct tu_cmd_buffer *cmd_buffer)
1783 {
1784 cmd_buffer->wait_for_idle = true;
1785
1786 cmd_buffer->record_result = VK_SUCCESS;
1787
1788 tu_bo_list_reset(&cmd_buffer->bo_list);
1789 tu_cs_reset(cmd_buffer->device, &cmd_buffer->cs);
1790 tu_cs_reset(cmd_buffer->device, &cmd_buffer->draw_cs);
1791 tu_cs_reset(cmd_buffer->device, &cmd_buffer->draw_state);
1792 tu_cs_reset(cmd_buffer->device, &cmd_buffer->tile_cs);
1793
1794 for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++) {
1795 cmd_buffer->descriptors[i].dirty = 0;
1796 cmd_buffer->descriptors[i].valid = 0;
1797 cmd_buffer->descriptors[i].push_dirty = false;
1798 }
1799
1800 cmd_buffer->status = TU_CMD_BUFFER_STATUS_INITIAL;
1801
1802 return cmd_buffer->record_result;
1803 }
1804
1805 static VkResult
1806 tu_cmd_state_setup_attachments(struct tu_cmd_buffer *cmd_buffer,
1807 const VkRenderPassBeginInfo *info)
1808 {
1809 struct tu_cmd_state *state = &cmd_buffer->state;
1810 const struct tu_framebuffer *fb = state->framebuffer;
1811 const struct tu_render_pass *pass = state->pass;
1812
1813 for (uint32_t i = 0; i < fb->attachment_count; ++i) {
1814 const struct tu_image_view *iview = fb->attachments[i].attachment;
1815 tu_bo_list_add(&cmd_buffer->bo_list, iview->image->bo,
1816 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
1817 }
1818
1819 if (pass->attachment_count == 0) {
1820 state->attachments = NULL;
1821 return VK_SUCCESS;
1822 }
1823
1824 state->attachments =
1825 vk_alloc(&cmd_buffer->pool->alloc,
1826 pass->attachment_count * sizeof(state->attachments[0]), 8,
1827 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1828 if (state->attachments == NULL) {
1829 cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
1830 return cmd_buffer->record_result;
1831 }
1832
1833 for (uint32_t i = 0; i < pass->attachment_count; ++i) {
1834 const struct tu_render_pass_attachment *att = &pass->attachments[i];
1835 VkImageAspectFlags att_aspects = vk_format_aspects(att->format);
1836 VkImageAspectFlags clear_aspects = 0;
1837
1838 if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
1839 /* color attachment */
1840 if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1841 clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT;
1842 }
1843 } else {
1844 /* depthstencil attachment */
1845 if ((att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
1846 att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1847 clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
1848 if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
1849 att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
1850 clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
1851 }
1852 if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
1853 att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1854 clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
1855 }
1856 }
1857
1858 state->attachments[i].pending_clear_aspects = clear_aspects;
1859 state->attachments[i].cleared_views = 0;
1860 if (clear_aspects && info) {
1861 assert(info->clearValueCount > i);
1862 state->attachments[i].clear_value = info->pClearValues[i];
1863 }
1864
1865 state->attachments[i].current_layout = att->initial_layout;
1866 }
1867
1868 return VK_SUCCESS;
1869 }
1870
1871 VkResult
1872 tu_AllocateCommandBuffers(VkDevice _device,
1873 const VkCommandBufferAllocateInfo *pAllocateInfo,
1874 VkCommandBuffer *pCommandBuffers)
1875 {
1876 TU_FROM_HANDLE(tu_device, device, _device);
1877 TU_FROM_HANDLE(tu_cmd_pool, pool, pAllocateInfo->commandPool);
1878
1879 VkResult result = VK_SUCCESS;
1880 uint32_t i;
1881
1882 for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
1883
1884 if (!list_is_empty(&pool->free_cmd_buffers)) {
1885 struct tu_cmd_buffer *cmd_buffer = list_first_entry(
1886 &pool->free_cmd_buffers, struct tu_cmd_buffer, pool_link);
1887
1888 list_del(&cmd_buffer->pool_link);
1889 list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
1890
1891 result = tu_reset_cmd_buffer(cmd_buffer);
1892 cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1893 cmd_buffer->level = pAllocateInfo->level;
1894
1895 pCommandBuffers[i] = tu_cmd_buffer_to_handle(cmd_buffer);
1896 } else {
1897 result = tu_create_cmd_buffer(device, pool, pAllocateInfo->level,
1898 &pCommandBuffers[i]);
1899 }
1900 if (result != VK_SUCCESS)
1901 break;
1902 }
1903
1904 if (result != VK_SUCCESS) {
1905 tu_FreeCommandBuffers(_device, pAllocateInfo->commandPool, i,
1906 pCommandBuffers);
1907
1908 /* From the Vulkan 1.0.66 spec:
1909 *
1910 * "vkAllocateCommandBuffers can be used to create multiple
1911 * command buffers. If the creation of any of those command
1912 * buffers fails, the implementation must destroy all
1913 * successfully created command buffer objects from this
1914 * command, set all entries of the pCommandBuffers array to
1915 * NULL and return the error."
1916 */
1917 memset(pCommandBuffers, 0,
1918 sizeof(*pCommandBuffers) * pAllocateInfo->commandBufferCount);
1919 }
1920
1921 return result;
1922 }
1923
1924 void
1925 tu_FreeCommandBuffers(VkDevice device,
1926 VkCommandPool commandPool,
1927 uint32_t commandBufferCount,
1928 const VkCommandBuffer *pCommandBuffers)
1929 {
1930 for (uint32_t i = 0; i < commandBufferCount; i++) {
1931 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, pCommandBuffers[i]);
1932
1933 if (cmd_buffer) {
1934 if (cmd_buffer->pool) {
1935 list_del(&cmd_buffer->pool_link);
1936 list_addtail(&cmd_buffer->pool_link,
1937 &cmd_buffer->pool->free_cmd_buffers);
1938 } else
1939 tu_cmd_buffer_destroy(cmd_buffer);
1940 }
1941 }
1942 }
1943
1944 VkResult
1945 tu_ResetCommandBuffer(VkCommandBuffer commandBuffer,
1946 VkCommandBufferResetFlags flags)
1947 {
1948 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
1949 return tu_reset_cmd_buffer(cmd_buffer);
1950 }
1951
1952 VkResult
1953 tu_BeginCommandBuffer(VkCommandBuffer commandBuffer,
1954 const VkCommandBufferBeginInfo *pBeginInfo)
1955 {
1956 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
1957 VkResult result = VK_SUCCESS;
1958
1959 if (cmd_buffer->status != TU_CMD_BUFFER_STATUS_INITIAL) {
1960 /* If the command buffer has already been resetted with
1961 * vkResetCommandBuffer, no need to do it again.
1962 */
1963 result = tu_reset_cmd_buffer(cmd_buffer);
1964 if (result != VK_SUCCESS)
1965 return result;
1966 }
1967
1968 memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state));
1969 cmd_buffer->usage_flags = pBeginInfo->flags;
1970
1971 tu_cs_begin(&cmd_buffer->cs);
1972 tu_cs_begin(&cmd_buffer->draw_cs);
1973
1974 cmd_buffer->marker_seqno = 0;
1975 cmd_buffer->scratch_seqno = 0;
1976
1977 /* setup initial configuration into command buffer */
1978 if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
1979 switch (cmd_buffer->queue_family_index) {
1980 case TU_QUEUE_GENERAL:
1981 tu6_init_hw(cmd_buffer, &cmd_buffer->cs);
1982 break;
1983 default:
1984 break;
1985 }
1986 }
1987
1988 cmd_buffer->status = TU_CMD_BUFFER_STATUS_RECORDING;
1989
1990 return VK_SUCCESS;
1991 }
1992
1993 void
1994 tu_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
1995 uint32_t firstBinding,
1996 uint32_t bindingCount,
1997 const VkBuffer *pBuffers,
1998 const VkDeviceSize *pOffsets)
1999 {
2000 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2001
2002 assert(firstBinding + bindingCount <= MAX_VBS);
2003
2004 for (uint32_t i = 0; i < bindingCount; i++) {
2005 cmd->state.vb.buffers[firstBinding + i] =
2006 tu_buffer_from_handle(pBuffers[i]);
2007 cmd->state.vb.offsets[firstBinding + i] = pOffsets[i];
2008 }
2009
2010 /* VB states depend on VkPipelineVertexInputStateCreateInfo */
2011 cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS;
2012 }
2013
2014 void
2015 tu_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
2016 VkBuffer buffer,
2017 VkDeviceSize offset,
2018 VkIndexType indexType)
2019 {
2020 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2021 TU_FROM_HANDLE(tu_buffer, buf, buffer);
2022
2023 /* initialize/update the restart index */
2024 if (!cmd->state.index_buffer || cmd->state.index_type != indexType) {
2025 struct tu_cs *draw_cs = &cmd->draw_cs;
2026 VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 2);
2027 if (result != VK_SUCCESS) {
2028 cmd->record_result = result;
2029 return;
2030 }
2031
2032 tu6_emit_restart_index(
2033 draw_cs, indexType == VK_INDEX_TYPE_UINT32 ? 0xffffffff : 0xffff);
2034
2035 tu_cs_sanity_check(draw_cs);
2036 }
2037
2038 /* track the BO */
2039 if (cmd->state.index_buffer != buf)
2040 tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ);
2041
2042 cmd->state.index_buffer = buf;
2043 cmd->state.index_offset = offset;
2044 cmd->state.index_type = indexType;
2045 }
2046
2047 void
2048 tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
2049 VkPipelineBindPoint pipelineBindPoint,
2050 VkPipelineLayout _layout,
2051 uint32_t firstSet,
2052 uint32_t descriptorSetCount,
2053 const VkDescriptorSet *pDescriptorSets,
2054 uint32_t dynamicOffsetCount,
2055 const uint32_t *pDynamicOffsets)
2056 {
2057 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
2058 TU_FROM_HANDLE(tu_pipeline_layout, layout, _layout);
2059 unsigned dyn_idx = 0;
2060
2061 struct tu_descriptor_state *descriptors_state =
2062 tu_get_descriptors_state(cmd_buffer, pipelineBindPoint);
2063
2064 for (unsigned i = 0; i < descriptorSetCount; ++i) {
2065 unsigned idx = i + firstSet;
2066 TU_FROM_HANDLE(tu_descriptor_set, set, pDescriptorSets[i]);
2067
2068 descriptors_state->sets[idx] = set;
2069 descriptors_state->valid |= (1u << idx);
2070
2071 for(unsigned j = 0; j < set->layout->dynamic_offset_count; ++j, ++dyn_idx) {
2072 unsigned idx = j + layout->set[i + firstSet].dynamic_offset_start;
2073 assert(dyn_idx < dynamicOffsetCount);
2074
2075 descriptors_state->dynamic_buffers[idx] =
2076 set->dynamic_descriptors[j].va + pDynamicOffsets[dyn_idx];
2077 }
2078 }
2079
2080 cmd_buffer->state.dirty |= TU_CMD_DIRTY_DESCRIPTOR_SETS;
2081 }
2082
2083 void
2084 tu_CmdPushConstants(VkCommandBuffer commandBuffer,
2085 VkPipelineLayout layout,
2086 VkShaderStageFlags stageFlags,
2087 uint32_t offset,
2088 uint32_t size,
2089 const void *pValues)
2090 {
2091 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
2092 memcpy((void*) cmd_buffer->push_constants + offset, pValues, size);
2093 }
2094
2095 VkResult
2096 tu_EndCommandBuffer(VkCommandBuffer commandBuffer)
2097 {
2098 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
2099
2100 if (cmd_buffer->scratch_seqno) {
2101 tu_bo_list_add(&cmd_buffer->bo_list, &cmd_buffer->scratch_bo,
2102 MSM_SUBMIT_BO_WRITE);
2103 }
2104
2105 if (cmd_buffer->use_vsc_data) {
2106 tu_bo_list_add(&cmd_buffer->bo_list, &cmd_buffer->vsc_data,
2107 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
2108 tu_bo_list_add(&cmd_buffer->bo_list, &cmd_buffer->vsc_data2,
2109 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
2110 }
2111
2112 for (uint32_t i = 0; i < cmd_buffer->draw_cs.bo_count; i++) {
2113 tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->draw_cs.bos[i],
2114 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP);
2115 }
2116
2117 for (uint32_t i = 0; i < cmd_buffer->draw_state.bo_count; i++) {
2118 tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->draw_state.bos[i],
2119 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP);
2120 }
2121
2122 for (uint32_t i = 0; i < cmd_buffer->tile_cs.bo_count; i++) {
2123 tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->tile_cs.bos[i],
2124 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP);
2125 }
2126
2127 tu_cs_end(&cmd_buffer->cs);
2128 tu_cs_end(&cmd_buffer->draw_cs);
2129
2130 assert(!cmd_buffer->state.attachments);
2131
2132 cmd_buffer->status = TU_CMD_BUFFER_STATUS_EXECUTABLE;
2133
2134 return cmd_buffer->record_result;
2135 }
2136
2137 void
2138 tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
2139 VkPipelineBindPoint pipelineBindPoint,
2140 VkPipeline _pipeline)
2141 {
2142 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2143 TU_FROM_HANDLE(tu_pipeline, pipeline, _pipeline);
2144
2145 switch (pipelineBindPoint) {
2146 case VK_PIPELINE_BIND_POINT_GRAPHICS:
2147 cmd->state.pipeline = pipeline;
2148 cmd->state.dirty |= TU_CMD_DIRTY_PIPELINE;
2149 break;
2150 case VK_PIPELINE_BIND_POINT_COMPUTE:
2151 cmd->state.compute_pipeline = pipeline;
2152 cmd->state.dirty |= TU_CMD_DIRTY_COMPUTE_PIPELINE;
2153 break;
2154 default:
2155 unreachable("unrecognized pipeline bind point");
2156 break;
2157 }
2158
2159 tu_bo_list_add(&cmd->bo_list, &pipeline->program.binary_bo,
2160 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP);
2161 for (uint32_t i = 0; i < pipeline->cs.bo_count; i++) {
2162 tu_bo_list_add(&cmd->bo_list, pipeline->cs.bos[i],
2163 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP);
2164 }
2165 }
2166
2167 void
2168 tu_CmdSetViewport(VkCommandBuffer commandBuffer,
2169 uint32_t firstViewport,
2170 uint32_t viewportCount,
2171 const VkViewport *pViewports)
2172 {
2173 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2174 struct tu_cs *draw_cs = &cmd->draw_cs;
2175
2176 VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 12);
2177 if (result != VK_SUCCESS) {
2178 cmd->record_result = result;
2179 return;
2180 }
2181
2182 assert(firstViewport == 0 && viewportCount == 1);
2183 tu6_emit_viewport(draw_cs, pViewports);
2184
2185 tu_cs_sanity_check(draw_cs);
2186 }
2187
2188 void
2189 tu_CmdSetScissor(VkCommandBuffer commandBuffer,
2190 uint32_t firstScissor,
2191 uint32_t scissorCount,
2192 const VkRect2D *pScissors)
2193 {
2194 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2195 struct tu_cs *draw_cs = &cmd->draw_cs;
2196
2197 VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 3);
2198 if (result != VK_SUCCESS) {
2199 cmd->record_result = result;
2200 return;
2201 }
2202
2203 assert(firstScissor == 0 && scissorCount == 1);
2204 tu6_emit_scissor(draw_cs, pScissors);
2205
2206 tu_cs_sanity_check(draw_cs);
2207 }
2208
2209 void
2210 tu_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)
2211 {
2212 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2213
2214 cmd->state.dynamic.line_width = lineWidth;
2215
2216 /* line width depends on VkPipelineRasterizationStateCreateInfo */
2217 cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
2218 }
2219
2220 void
2221 tu_CmdSetDepthBias(VkCommandBuffer commandBuffer,
2222 float depthBiasConstantFactor,
2223 float depthBiasClamp,
2224 float depthBiasSlopeFactor)
2225 {
2226 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2227 struct tu_cs *draw_cs = &cmd->draw_cs;
2228
2229 VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 4);
2230 if (result != VK_SUCCESS) {
2231 cmd->record_result = result;
2232 return;
2233 }
2234
2235 tu6_emit_depth_bias(draw_cs, depthBiasConstantFactor, depthBiasClamp,
2236 depthBiasSlopeFactor);
2237
2238 tu_cs_sanity_check(draw_cs);
2239 }
2240
2241 void
2242 tu_CmdSetBlendConstants(VkCommandBuffer commandBuffer,
2243 const float blendConstants[4])
2244 {
2245 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2246 struct tu_cs *draw_cs = &cmd->draw_cs;
2247
2248 VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 5);
2249 if (result != VK_SUCCESS) {
2250 cmd->record_result = result;
2251 return;
2252 }
2253
2254 tu6_emit_blend_constants(draw_cs, blendConstants);
2255
2256 tu_cs_sanity_check(draw_cs);
2257 }
2258
2259 void
2260 tu_CmdSetDepthBounds(VkCommandBuffer commandBuffer,
2261 float minDepthBounds,
2262 float maxDepthBounds)
2263 {
2264 }
2265
2266 void
2267 tu_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,
2268 VkStencilFaceFlags faceMask,
2269 uint32_t compareMask)
2270 {
2271 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2272
2273 if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
2274 cmd->state.dynamic.stencil_compare_mask.front = compareMask;
2275 if (faceMask & VK_STENCIL_FACE_BACK_BIT)
2276 cmd->state.dynamic.stencil_compare_mask.back = compareMask;
2277
2278 /* the front/back compare masks must be updated together */
2279 cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
2280 }
2281
2282 void
2283 tu_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,
2284 VkStencilFaceFlags faceMask,
2285 uint32_t writeMask)
2286 {
2287 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2288
2289 if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
2290 cmd->state.dynamic.stencil_write_mask.front = writeMask;
2291 if (faceMask & VK_STENCIL_FACE_BACK_BIT)
2292 cmd->state.dynamic.stencil_write_mask.back = writeMask;
2293
2294 /* the front/back write masks must be updated together */
2295 cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
2296 }
2297
2298 void
2299 tu_CmdSetStencilReference(VkCommandBuffer commandBuffer,
2300 VkStencilFaceFlags faceMask,
2301 uint32_t reference)
2302 {
2303 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2304
2305 if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
2306 cmd->state.dynamic.stencil_reference.front = reference;
2307 if (faceMask & VK_STENCIL_FACE_BACK_BIT)
2308 cmd->state.dynamic.stencil_reference.back = reference;
2309
2310 /* the front/back references must be updated together */
2311 cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
2312 }
2313
2314 void
2315 tu_CmdExecuteCommands(VkCommandBuffer commandBuffer,
2316 uint32_t commandBufferCount,
2317 const VkCommandBuffer *pCmdBuffers)
2318 {
2319 }
2320
2321 VkResult
2322 tu_CreateCommandPool(VkDevice _device,
2323 const VkCommandPoolCreateInfo *pCreateInfo,
2324 const VkAllocationCallbacks *pAllocator,
2325 VkCommandPool *pCmdPool)
2326 {
2327 TU_FROM_HANDLE(tu_device, device, _device);
2328 struct tu_cmd_pool *pool;
2329
2330 pool = vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8,
2331 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2332 if (pool == NULL)
2333 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2334
2335 if (pAllocator)
2336 pool->alloc = *pAllocator;
2337 else
2338 pool->alloc = device->alloc;
2339
2340 list_inithead(&pool->cmd_buffers);
2341 list_inithead(&pool->free_cmd_buffers);
2342
2343 pool->queue_family_index = pCreateInfo->queueFamilyIndex;
2344
2345 *pCmdPool = tu_cmd_pool_to_handle(pool);
2346
2347 return VK_SUCCESS;
2348 }
2349
2350 void
2351 tu_DestroyCommandPool(VkDevice _device,
2352 VkCommandPool commandPool,
2353 const VkAllocationCallbacks *pAllocator)
2354 {
2355 TU_FROM_HANDLE(tu_device, device, _device);
2356 TU_FROM_HANDLE(tu_cmd_pool, pool, commandPool);
2357
2358 if (!pool)
2359 return;
2360
2361 list_for_each_entry_safe(struct tu_cmd_buffer, cmd_buffer,
2362 &pool->cmd_buffers, pool_link)
2363 {
2364 tu_cmd_buffer_destroy(cmd_buffer);
2365 }
2366
2367 list_for_each_entry_safe(struct tu_cmd_buffer, cmd_buffer,
2368 &pool->free_cmd_buffers, pool_link)
2369 {
2370 tu_cmd_buffer_destroy(cmd_buffer);
2371 }
2372
2373 vk_free2(&device->alloc, pAllocator, pool);
2374 }
2375
2376 VkResult
2377 tu_ResetCommandPool(VkDevice device,
2378 VkCommandPool commandPool,
2379 VkCommandPoolResetFlags flags)
2380 {
2381 TU_FROM_HANDLE(tu_cmd_pool, pool, commandPool);
2382 VkResult result;
2383
2384 list_for_each_entry(struct tu_cmd_buffer, cmd_buffer, &pool->cmd_buffers,
2385 pool_link)
2386 {
2387 result = tu_reset_cmd_buffer(cmd_buffer);
2388 if (result != VK_SUCCESS)
2389 return result;
2390 }
2391
2392 return VK_SUCCESS;
2393 }
2394
2395 void
2396 tu_TrimCommandPool(VkDevice device,
2397 VkCommandPool commandPool,
2398 VkCommandPoolTrimFlags flags)
2399 {
2400 TU_FROM_HANDLE(tu_cmd_pool, pool, commandPool);
2401
2402 if (!pool)
2403 return;
2404
2405 list_for_each_entry_safe(struct tu_cmd_buffer, cmd_buffer,
2406 &pool->free_cmd_buffers, pool_link)
2407 {
2408 tu_cmd_buffer_destroy(cmd_buffer);
2409 }
2410 }
2411
2412 void
2413 tu_CmdBeginRenderPass(VkCommandBuffer commandBuffer,
2414 const VkRenderPassBeginInfo *pRenderPassBegin,
2415 VkSubpassContents contents)
2416 {
2417 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
2418 TU_FROM_HANDLE(tu_render_pass, pass, pRenderPassBegin->renderPass);
2419 TU_FROM_HANDLE(tu_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
2420 VkResult result;
2421
2422 cmd_buffer->state.pass = pass;
2423 cmd_buffer->state.subpass = pass->subpasses;
2424 cmd_buffer->state.framebuffer = framebuffer;
2425
2426 result = tu_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin);
2427 if (result != VK_SUCCESS)
2428 return;
2429
2430 tu_cmd_update_tiling_config(cmd_buffer, &pRenderPassBegin->renderArea);
2431 tu_cmd_prepare_tile_load_ib(cmd_buffer);
2432 tu_cmd_prepare_tile_store_ib(cmd_buffer);
2433
2434 /* note: use_hw_binning only checks tiling config */
2435 if (use_hw_binning(cmd_buffer))
2436 cmd_buffer->use_vsc_data = true;
2437 }
2438
2439 void
2440 tu_CmdBeginRenderPass2KHR(VkCommandBuffer commandBuffer,
2441 const VkRenderPassBeginInfo *pRenderPassBeginInfo,
2442 const VkSubpassBeginInfoKHR *pSubpassBeginInfo)
2443 {
2444 tu_CmdBeginRenderPass(commandBuffer, pRenderPassBeginInfo,
2445 pSubpassBeginInfo->contents);
2446 }
2447
2448 void
2449 tu_CmdNextSubpass(VkCommandBuffer commandBuffer, VkSubpassContents contents)
2450 {
2451 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2452
2453 tu_cmd_render_tiles(cmd);
2454
2455 cmd->state.subpass++;
2456
2457 tu_cmd_update_tiling_config(cmd, NULL);
2458 tu_cmd_prepare_tile_load_ib(cmd);
2459 tu_cmd_prepare_tile_store_ib(cmd);
2460 }
2461
2462 void
2463 tu_CmdNextSubpass2KHR(VkCommandBuffer commandBuffer,
2464 const VkSubpassBeginInfoKHR *pSubpassBeginInfo,
2465 const VkSubpassEndInfoKHR *pSubpassEndInfo)
2466 {
2467 tu_CmdNextSubpass(commandBuffer, pSubpassBeginInfo->contents);
2468 }
2469
2470 struct tu_draw_info
2471 {
2472 /**
2473 * Number of vertices.
2474 */
2475 uint32_t count;
2476
2477 /**
2478 * Index of the first vertex.
2479 */
2480 int32_t vertex_offset;
2481
2482 /**
2483 * First instance id.
2484 */
2485 uint32_t first_instance;
2486
2487 /**
2488 * Number of instances.
2489 */
2490 uint32_t instance_count;
2491
2492 /**
2493 * First index (indexed draws only).
2494 */
2495 uint32_t first_index;
2496
2497 /**
2498 * Whether it's an indexed draw.
2499 */
2500 bool indexed;
2501
2502 /**
2503 * Indirect draw parameters resource.
2504 */
2505 struct tu_buffer *indirect;
2506 uint64_t indirect_offset;
2507 uint32_t stride;
2508
2509 /**
2510 * Draw count parameters resource.
2511 */
2512 struct tu_buffer *count_buffer;
2513 uint64_t count_buffer_offset;
2514 };
2515
2516 enum tu_draw_state_group_id
2517 {
2518 TU_DRAW_STATE_PROGRAM,
2519 TU_DRAW_STATE_PROGRAM_BINNING,
2520 TU_DRAW_STATE_VI,
2521 TU_DRAW_STATE_VI_BINNING,
2522 TU_DRAW_STATE_VP,
2523 TU_DRAW_STATE_RAST,
2524 TU_DRAW_STATE_DS,
2525 TU_DRAW_STATE_BLEND,
2526 TU_DRAW_STATE_VS_CONST,
2527 TU_DRAW_STATE_FS_CONST,
2528 TU_DRAW_STATE_VS_TEX,
2529 TU_DRAW_STATE_FS_TEX,
2530 TU_DRAW_STATE_FS_IBO,
2531
2532 TU_DRAW_STATE_COUNT,
2533 };
2534
2535 struct tu_draw_state_group
2536 {
2537 enum tu_draw_state_group_id id;
2538 uint32_t enable_mask;
2539 struct tu_cs_entry ib;
2540 };
2541
2542 static struct tu_sampler*
2543 sampler_ptr(struct tu_descriptor_state *descriptors_state,
2544 const struct tu_descriptor_map *map, unsigned i)
2545 {
2546 assert(descriptors_state->valid & (1 << map->set[i]));
2547
2548 struct tu_descriptor_set *set = descriptors_state->sets[map->set[i]];
2549 assert(map->binding[i] < set->layout->binding_count);
2550
2551 const struct tu_descriptor_set_binding_layout *layout =
2552 &set->layout->binding[map->binding[i]];
2553
2554 switch (layout->type) {
2555 case VK_DESCRIPTOR_TYPE_SAMPLER:
2556 return (struct tu_sampler*) &set->mapped_ptr[layout->offset / 4];
2557 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
2558 return (struct tu_sampler*) &set->mapped_ptr[layout->offset / 4 + A6XX_TEX_CONST_DWORDS];
2559 default:
2560 unreachable("unimplemented descriptor type");
2561 break;
2562 }
2563 }
2564
2565 static uint32_t*
2566 texture_ptr(struct tu_descriptor_state *descriptors_state,
2567 const struct tu_descriptor_map *map, unsigned i)
2568 {
2569 assert(descriptors_state->valid & (1 << map->set[i]));
2570
2571 struct tu_descriptor_set *set = descriptors_state->sets[map->set[i]];
2572 assert(map->binding[i] < set->layout->binding_count);
2573
2574 const struct tu_descriptor_set_binding_layout *layout =
2575 &set->layout->binding[map->binding[i]];
2576
2577 switch (layout->type) {
2578 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
2579 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
2580 return &set->mapped_ptr[layout->offset / 4];
2581 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
2582 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
2583 return &set->mapped_ptr[layout->offset / 4];
2584 default:
2585 unreachable("unimplemented descriptor type");
2586 break;
2587 }
2588 }
2589
2590 static uint64_t
2591 buffer_ptr(struct tu_descriptor_state *descriptors_state,
2592 const struct tu_descriptor_map *map,
2593 unsigned i)
2594 {
2595 assert(descriptors_state->valid & (1 << map->set[i]));
2596
2597 struct tu_descriptor_set *set = descriptors_state->sets[map->set[i]];
2598 assert(map->binding[i] < set->layout->binding_count);
2599
2600 const struct tu_descriptor_set_binding_layout *layout =
2601 &set->layout->binding[map->binding[i]];
2602
2603 switch (layout->type) {
2604 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
2605 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
2606 return descriptors_state->dynamic_buffers[layout->dynamic_offset_offset];
2607 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
2608 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
2609 return (uint64_t) set->mapped_ptr[layout->offset / 4 + 1] << 32 |
2610 set->mapped_ptr[layout->offset / 4];
2611 default:
2612 unreachable("unimplemented descriptor type");
2613 break;
2614 }
2615 }
2616
2617 static inline uint32_t
2618 tu6_stage2opcode(gl_shader_stage type)
2619 {
2620 switch (type) {
2621 case MESA_SHADER_VERTEX:
2622 case MESA_SHADER_TESS_CTRL:
2623 case MESA_SHADER_TESS_EVAL:
2624 case MESA_SHADER_GEOMETRY:
2625 return CP_LOAD_STATE6_GEOM;
2626 case MESA_SHADER_FRAGMENT:
2627 case MESA_SHADER_COMPUTE:
2628 case MESA_SHADER_KERNEL:
2629 return CP_LOAD_STATE6_FRAG;
2630 default:
2631 unreachable("bad shader type");
2632 }
2633 }
2634
2635 static inline enum a6xx_state_block
2636 tu6_stage2shadersb(gl_shader_stage type)
2637 {
2638 switch (type) {
2639 case MESA_SHADER_VERTEX:
2640 return SB6_VS_SHADER;
2641 case MESA_SHADER_FRAGMENT:
2642 return SB6_FS_SHADER;
2643 case MESA_SHADER_COMPUTE:
2644 case MESA_SHADER_KERNEL:
2645 return SB6_CS_SHADER;
2646 default:
2647 unreachable("bad shader type");
2648 return ~0;
2649 }
2650 }
2651
2652 static void
2653 tu6_emit_user_consts(struct tu_cs *cs, const struct tu_pipeline *pipeline,
2654 struct tu_descriptor_state *descriptors_state,
2655 gl_shader_stage type,
2656 uint32_t *push_constants)
2657 {
2658 const struct tu_program_descriptor_linkage *link =
2659 &pipeline->program.link[type];
2660 const struct ir3_ubo_analysis_state *state = &link->ubo_state;
2661
2662 for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
2663 if (state->range[i].start < state->range[i].end) {
2664 uint32_t size = state->range[i].end - state->range[i].start;
2665 uint32_t offset = state->range[i].start;
2666
2667 /* and even if the start of the const buffer is before
2668 * first_immediate, the end may not be:
2669 */
2670 size = MIN2(size, (16 * link->constlen) - state->range[i].offset);
2671
2672 if (size == 0)
2673 continue;
2674
2675 /* things should be aligned to vec4: */
2676 debug_assert((state->range[i].offset % 16) == 0);
2677 debug_assert((size % 16) == 0);
2678 debug_assert((offset % 16) == 0);
2679
2680 if (i == 0) {
2681 /* push constants */
2682 tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + (size / 4));
2683 tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(state->range[i].offset / 16) |
2684 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
2685 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
2686 CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
2687 CP_LOAD_STATE6_0_NUM_UNIT(size / 16));
2688 tu_cs_emit(cs, 0);
2689 tu_cs_emit(cs, 0);
2690 for (unsigned i = 0; i < size / 4; i++)
2691 tu_cs_emit(cs, push_constants[i + offset / 4]);
2692 continue;
2693 }
2694
2695 uint64_t va = buffer_ptr(descriptors_state, &link->ubo_map, i - 1);
2696
2697 tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3);
2698 tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(state->range[i].offset / 16) |
2699 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
2700 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
2701 CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
2702 CP_LOAD_STATE6_0_NUM_UNIT(size / 16));
2703 tu_cs_emit_qw(cs, va + offset);
2704 }
2705 }
2706 }
2707
2708 static void
2709 tu6_emit_ubos(struct tu_cs *cs, const struct tu_pipeline *pipeline,
2710 struct tu_descriptor_state *descriptors_state,
2711 gl_shader_stage type)
2712 {
2713 const struct tu_program_descriptor_linkage *link =
2714 &pipeline->program.link[type];
2715
2716 uint32_t num = MIN2(link->ubo_map.num, link->const_state.num_ubos);
2717 uint32_t anum = align(num, 2);
2718 uint32_t i;
2719
2720 if (!num)
2721 return;
2722
2723 tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + (2 * anum));
2724 tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(link->const_state.offsets.ubo) |
2725 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
2726 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
2727 CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
2728 CP_LOAD_STATE6_0_NUM_UNIT(anum/2));
2729 tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
2730 tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
2731
2732 for (i = 0; i < num; i++)
2733 tu_cs_emit_qw(cs, buffer_ptr(descriptors_state, &link->ubo_map, i));
2734
2735 for (; i < anum; i++) {
2736 tu_cs_emit(cs, 0xffffffff);
2737 tu_cs_emit(cs, 0xffffffff);
2738 }
2739 }
2740
2741 static struct tu_cs_entry
2742 tu6_emit_consts(struct tu_cmd_buffer *cmd,
2743 const struct tu_pipeline *pipeline,
2744 struct tu_descriptor_state *descriptors_state,
2745 gl_shader_stage type)
2746 {
2747 struct tu_cs cs;
2748 tu_cs_begin_sub_stream(cmd->device, &cmd->draw_state, 512, &cs); /* TODO: maximum size? */
2749
2750 tu6_emit_user_consts(&cs, pipeline, descriptors_state, type, cmd->push_constants);
2751 tu6_emit_ubos(&cs, pipeline, descriptors_state, type);
2752
2753 return tu_cs_end_sub_stream(&cmd->draw_state, &cs);
2754 }
2755
2756 static VkResult
2757 tu6_emit_textures(struct tu_cmd_buffer *cmd,
2758 gl_shader_stage type,
2759 struct tu_cs_entry *entry,
2760 bool *needs_border)
2761 {
2762 struct tu_device *device = cmd->device;
2763 struct tu_cs *draw_state = &cmd->draw_state;
2764 struct tu_descriptor_state *descriptors_state =
2765 &cmd->descriptors[VK_PIPELINE_BIND_POINT_GRAPHICS];
2766 const struct tu_program_descriptor_linkage *link =
2767 &cmd->state.pipeline->program.link[type];
2768 VkResult result;
2769
2770 if (link->texture_map.num == 0 && link->sampler_map.num == 0) {
2771 *entry = (struct tu_cs_entry) {};
2772 return VK_SUCCESS;
2773 }
2774
2775 /* allocate and fill texture state */
2776 struct ts_cs_memory tex_const;
2777 result = tu_cs_alloc(device, draw_state, link->texture_map.num, A6XX_TEX_CONST_DWORDS, &tex_const);
2778 if (result != VK_SUCCESS)
2779 return result;
2780
2781 for (unsigned i = 0; i < link->texture_map.num; i++) {
2782 memcpy(&tex_const.map[A6XX_TEX_CONST_DWORDS*i],
2783 texture_ptr(descriptors_state, &link->texture_map, i),
2784 A6XX_TEX_CONST_DWORDS*4);
2785 }
2786
2787 /* allocate and fill sampler state */
2788 struct ts_cs_memory tex_samp;
2789 result = tu_cs_alloc(device, draw_state, link->sampler_map.num, A6XX_TEX_SAMP_DWORDS, &tex_samp);
2790 if (result != VK_SUCCESS)
2791 return result;
2792
2793 for (unsigned i = 0; i < link->sampler_map.num; i++) {
2794 struct tu_sampler *sampler = sampler_ptr(descriptors_state, &link->sampler_map, i);
2795 memcpy(&tex_samp.map[A6XX_TEX_SAMP_DWORDS*i], sampler->state, sizeof(sampler->state));
2796 *needs_border |= sampler->needs_border;
2797 }
2798
2799 unsigned tex_samp_reg, tex_const_reg, tex_count_reg;
2800 enum a6xx_state_block sb;
2801
2802 switch (type) {
2803 case MESA_SHADER_VERTEX:
2804 sb = SB6_VS_TEX;
2805 tex_samp_reg = REG_A6XX_SP_VS_TEX_SAMP_LO;
2806 tex_const_reg = REG_A6XX_SP_VS_TEX_CONST_LO;
2807 tex_count_reg = REG_A6XX_SP_VS_TEX_COUNT;
2808 break;
2809 case MESA_SHADER_FRAGMENT:
2810 sb = SB6_FS_TEX;
2811 tex_samp_reg = REG_A6XX_SP_FS_TEX_SAMP_LO;
2812 tex_const_reg = REG_A6XX_SP_FS_TEX_CONST_LO;
2813 tex_count_reg = REG_A6XX_SP_FS_TEX_COUNT;
2814 break;
2815 case MESA_SHADER_COMPUTE:
2816 sb = SB6_CS_TEX;
2817 tex_samp_reg = REG_A6XX_SP_CS_TEX_SAMP_LO;
2818 tex_const_reg = REG_A6XX_SP_CS_TEX_CONST_LO;
2819 tex_count_reg = REG_A6XX_SP_CS_TEX_COUNT;
2820 break;
2821 default:
2822 unreachable("bad state block");
2823 }
2824
2825 struct tu_cs cs;
2826 result = tu_cs_begin_sub_stream(device, draw_state, 16, &cs);
2827 if (result != VK_SUCCESS)
2828 return result;
2829
2830 /* output sampler state: */
2831 tu_cs_emit_pkt7(&cs, tu6_stage2opcode(type), 3);
2832 tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(0) |
2833 CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
2834 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
2835 CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
2836 CP_LOAD_STATE6_0_NUM_UNIT(link->sampler_map.num));
2837 tu_cs_emit_qw(&cs, tex_samp.iova); /* SRC_ADDR_LO/HI */
2838
2839 tu_cs_emit_pkt4(&cs, tex_samp_reg, 2);
2840 tu_cs_emit_qw(&cs, tex_samp.iova); /* SRC_ADDR_LO/HI */
2841
2842 /* emit texture state: */
2843 tu_cs_emit_pkt7(&cs, tu6_stage2opcode(type), 3);
2844 tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(0) |
2845 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
2846 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
2847 CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
2848 CP_LOAD_STATE6_0_NUM_UNIT(link->texture_map.num));
2849 tu_cs_emit_qw(&cs, tex_const.iova); /* SRC_ADDR_LO/HI */
2850
2851 tu_cs_emit_pkt4(&cs, tex_const_reg, 2);
2852 tu_cs_emit_qw(&cs, tex_const.iova); /* SRC_ADDR_LO/HI */
2853
2854 tu_cs_emit_pkt4(&cs, tex_count_reg, 1);
2855 tu_cs_emit(&cs, link->texture_map.num);
2856
2857 *entry = tu_cs_end_sub_stream(draw_state, &cs);
2858 return VK_SUCCESS;
2859 }
2860
2861 static struct tu_cs_entry
2862 tu6_emit_ibo(struct tu_device *device, struct tu_cs *draw_state,
2863 const struct tu_pipeline *pipeline,
2864 struct tu_descriptor_state *descriptors_state,
2865 gl_shader_stage type)
2866 {
2867 const struct tu_program_descriptor_linkage *link =
2868 &pipeline->program.link[type];
2869
2870 uint32_t size = link->image_mapping.num_ibo * A6XX_TEX_CONST_DWORDS;
2871 if (!size)
2872 return (struct tu_cs_entry) {};
2873
2874 struct tu_cs cs;
2875 tu_cs_begin_sub_stream(device, draw_state, size, &cs);
2876
2877 for (unsigned i = 0; i < link->image_mapping.num_ibo; i++) {
2878 unsigned idx = link->image_mapping.ibo_to_image[i];
2879
2880 if (idx & IBO_SSBO) {
2881 idx &= ~IBO_SSBO;
2882
2883 uint64_t va = buffer_ptr(descriptors_state, &link->ssbo_map, idx);
2884 /* We don't expose robustBufferAccess, so leave the size unlimited. */
2885 uint32_t sz = MAX_STORAGE_BUFFER_RANGE / 4;
2886
2887 tu_cs_emit(&cs, A6XX_IBO_0_FMT(TFMT6_32_UINT));
2888 tu_cs_emit(&cs,
2889 A6XX_IBO_1_WIDTH(sz & MASK(15)) |
2890 A6XX_IBO_1_HEIGHT(sz >> 15));
2891 tu_cs_emit(&cs,
2892 A6XX_IBO_2_UNK4 |
2893 A6XX_IBO_2_UNK31 |
2894 A6XX_IBO_2_TYPE(A6XX_TEX_1D));
2895 tu_cs_emit(&cs, 0);
2896 tu_cs_emit_qw(&cs, va);
2897 for (int i = 6; i < A6XX_TEX_CONST_DWORDS; i++)
2898 tu_cs_emit(&cs, 0);
2899 } else {
2900 tu_finishme("Emit images");
2901 }
2902 }
2903
2904 struct tu_cs_entry entry = tu_cs_end_sub_stream(draw_state, &cs);
2905
2906 uint64_t ibo_addr = entry.bo->iova + entry.offset;
2907
2908 tu_cs_begin_sub_stream(device, draw_state, 64, &cs);
2909
2910 /* emit texture state: */
2911 tu_cs_emit_pkt7(&cs, CP_LOAD_STATE6, 3);
2912 tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(0) |
2913 CP_LOAD_STATE6_0_STATE_TYPE(type == MESA_SHADER_COMPUTE ?
2914 ST6_IBO : ST6_SHADER) |
2915 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
2916 CP_LOAD_STATE6_0_STATE_BLOCK(type == MESA_SHADER_COMPUTE ?
2917 SB6_CS_SHADER : SB6_IBO) |
2918 CP_LOAD_STATE6_0_NUM_UNIT(link->image_mapping.num_ibo));
2919 tu_cs_emit_qw(&cs, ibo_addr); /* SRC_ADDR_LO/HI */
2920
2921 tu_cs_emit_pkt4(&cs,
2922 type == MESA_SHADER_COMPUTE ?
2923 REG_A6XX_SP_IBO_LO : REG_A6XX_SP_CS_IBO_LO, 2);
2924 tu_cs_emit_qw(&cs, ibo_addr); /* SRC_ADDR_LO/HI */
2925
2926 return tu_cs_end_sub_stream(draw_state, &cs);
2927 }
2928
2929 struct PACKED bcolor_entry {
2930 uint32_t fp32[4];
2931 uint16_t ui16[4];
2932 int16_t si16[4];
2933 uint16_t fp16[4];
2934 uint16_t rgb565;
2935 uint16_t rgb5a1;
2936 uint16_t rgba4;
2937 uint8_t __pad0[2];
2938 uint8_t ui8[4];
2939 int8_t si8[4];
2940 uint32_t rgb10a2;
2941 uint32_t z24; /* also s8? */
2942 uint16_t srgb[4]; /* appears to duplicate fp16[], but clamped, used for srgb */
2943 uint8_t __pad1[56];
2944 } border_color[] = {
2945 [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = {},
2946 [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = {},
2947 [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = {
2948 .fp32[3] = 0x3f800000,
2949 .ui16[3] = 0xffff,
2950 .si16[3] = 0x7fff,
2951 .fp16[3] = 0x3c00,
2952 .rgb5a1 = 0x8000,
2953 .rgba4 = 0xf000,
2954 .ui8[3] = 0xff,
2955 .si8[3] = 0x7f,
2956 .rgb10a2 = 0xc0000000,
2957 .srgb[3] = 0x3c00,
2958 },
2959 [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = {
2960 .fp32[3] = 1,
2961 .fp16[3] = 1,
2962 },
2963 [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = {
2964 .fp32[0 ... 3] = 0x3f800000,
2965 .ui16[0 ... 3] = 0xffff,
2966 .si16[0 ... 3] = 0x7fff,
2967 .fp16[0 ... 3] = 0x3c00,
2968 .rgb565 = 0xffff,
2969 .rgb5a1 = 0xffff,
2970 .rgba4 = 0xffff,
2971 .ui8[0 ... 3] = 0xff,
2972 .si8[0 ... 3] = 0x7f,
2973 .rgb10a2 = 0xffffffff,
2974 .z24 = 0xffffff,
2975 .srgb[0 ... 3] = 0x3c00,
2976 },
2977 [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = {
2978 .fp32[0 ... 3] = 1,
2979 .fp16[0 ... 3] = 1,
2980 },
2981 };
2982
2983 static VkResult
2984 tu6_emit_border_color(struct tu_cmd_buffer *cmd,
2985 struct tu_cs *cs)
2986 {
2987 STATIC_ASSERT(sizeof(struct bcolor_entry) == 128);
2988
2989 const struct tu_pipeline *pipeline = cmd->state.pipeline;
2990 struct tu_descriptor_state *descriptors_state =
2991 &cmd->descriptors[VK_PIPELINE_BIND_POINT_GRAPHICS];
2992 const struct tu_descriptor_map *vs_sampler =
2993 &pipeline->program.link[MESA_SHADER_VERTEX].sampler_map;
2994 const struct tu_descriptor_map *fs_sampler =
2995 &pipeline->program.link[MESA_SHADER_FRAGMENT].sampler_map;
2996 struct ts_cs_memory ptr;
2997
2998 VkResult result = tu_cs_alloc(cmd->device, &cmd->draw_state,
2999 vs_sampler->num + fs_sampler->num, 128 / 4,
3000 &ptr);
3001 if (result != VK_SUCCESS)
3002 return result;
3003
3004 for (unsigned i = 0; i < vs_sampler->num; i++) {
3005 struct tu_sampler *sampler = sampler_ptr(descriptors_state, vs_sampler, i);
3006 memcpy(ptr.map, &border_color[sampler->border], 128);
3007 ptr.map += 128 / 4;
3008 }
3009
3010 for (unsigned i = 0; i < fs_sampler->num; i++) {
3011 struct tu_sampler *sampler = sampler_ptr(descriptors_state, fs_sampler, i);
3012 memcpy(ptr.map, &border_color[sampler->border], 128);
3013 ptr.map += 128 / 4;
3014 }
3015
3016 tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR_LO, 2);
3017 tu_cs_emit_qw(cs, ptr.iova);
3018 return VK_SUCCESS;
3019 }
3020
3021 static VkResult
3022 tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
3023 struct tu_cs *cs,
3024 const struct tu_draw_info *draw)
3025 {
3026 const struct tu_pipeline *pipeline = cmd->state.pipeline;
3027 const struct tu_dynamic_state *dynamic = &cmd->state.dynamic;
3028 struct tu_draw_state_group draw_state_groups[TU_DRAW_STATE_COUNT];
3029 uint32_t draw_state_group_count = 0;
3030
3031 struct tu_descriptor_state *descriptors_state =
3032 &cmd->descriptors[VK_PIPELINE_BIND_POINT_GRAPHICS];
3033
3034 VkResult result = tu_cs_reserve_space(cmd->device, cs, 256);
3035 if (result != VK_SUCCESS)
3036 return result;
3037
3038 /* TODO lrz */
3039
3040 uint32_t pc_primitive_cntl = 0;
3041 if (pipeline->ia.primitive_restart && draw->indexed)
3042 pc_primitive_cntl |= A6XX_PC_PRIMITIVE_CNTL_0_PRIMITIVE_RESTART;
3043
3044 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9806, 0);
3045 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9990, 0);
3046 tu_cs_emit_write_reg(cs, REG_A6XX_VFD_UNKNOWN_A008, 0);
3047
3048 tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_0, 1);
3049 tu_cs_emit(cs, pc_primitive_cntl);
3050
3051 if (cmd->state.dirty &
3052 (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH) &&
3053 (pipeline->dynamic_state.mask & TU_DYNAMIC_LINE_WIDTH)) {
3054 tu6_emit_gras_su_cntl(cs, pipeline->rast.gras_su_cntl,
3055 dynamic->line_width);
3056 }
3057
3058 if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK) &&
3059 (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_COMPARE_MASK)) {
3060 tu6_emit_stencil_compare_mask(cs, dynamic->stencil_compare_mask.front,
3061 dynamic->stencil_compare_mask.back);
3062 }
3063
3064 if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK) &&
3065 (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_WRITE_MASK)) {
3066 tu6_emit_stencil_write_mask(cs, dynamic->stencil_write_mask.front,
3067 dynamic->stencil_write_mask.back);
3068 }
3069
3070 if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE) &&
3071 (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_REFERENCE)) {
3072 tu6_emit_stencil_reference(cs, dynamic->stencil_reference.front,
3073 dynamic->stencil_reference.back);
3074 }
3075
3076 if (cmd->state.dirty &
3077 (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_VERTEX_BUFFERS)) {
3078 for (uint32_t i = 0; i < pipeline->vi.count; i++) {
3079 const uint32_t binding = pipeline->vi.bindings[i];
3080 const uint32_t stride = pipeline->vi.strides[i];
3081 const struct tu_buffer *buf = cmd->state.vb.buffers[binding];
3082 const VkDeviceSize offset = buf->bo_offset +
3083 cmd->state.vb.offsets[binding] +
3084 pipeline->vi.offsets[i];
3085 const VkDeviceSize size =
3086 offset < buf->bo->size ? buf->bo->size - offset : 0;
3087
3088 tu_cs_emit_pkt4(cs, REG_A6XX_VFD_FETCH(i), 4);
3089 tu_cs_emit_qw(cs, buf->bo->iova + offset);
3090 tu_cs_emit(cs, size);
3091 tu_cs_emit(cs, stride);
3092 }
3093 }
3094
3095 if (cmd->state.dirty & TU_CMD_DIRTY_PIPELINE) {
3096 draw_state_groups[draw_state_group_count++] =
3097 (struct tu_draw_state_group) {
3098 .id = TU_DRAW_STATE_PROGRAM,
3099 .enable_mask = 0x6,
3100 .ib = pipeline->program.state_ib,
3101 };
3102 draw_state_groups[draw_state_group_count++] =
3103 (struct tu_draw_state_group) {
3104 .id = TU_DRAW_STATE_PROGRAM_BINNING,
3105 .enable_mask = 0x1,
3106 .ib = pipeline->program.binning_state_ib,
3107 };
3108 draw_state_groups[draw_state_group_count++] =
3109 (struct tu_draw_state_group) {
3110 .id = TU_DRAW_STATE_VI,
3111 .enable_mask = 0x6,
3112 .ib = pipeline->vi.state_ib,
3113 };
3114 draw_state_groups[draw_state_group_count++] =
3115 (struct tu_draw_state_group) {
3116 .id = TU_DRAW_STATE_VI_BINNING,
3117 .enable_mask = 0x1,
3118 .ib = pipeline->vi.binning_state_ib,
3119 };
3120 draw_state_groups[draw_state_group_count++] =
3121 (struct tu_draw_state_group) {
3122 .id = TU_DRAW_STATE_VP,
3123 .enable_mask = 0x7,
3124 .ib = pipeline->vp.state_ib,
3125 };
3126 draw_state_groups[draw_state_group_count++] =
3127 (struct tu_draw_state_group) {
3128 .id = TU_DRAW_STATE_RAST,
3129 .enable_mask = 0x7,
3130 .ib = pipeline->rast.state_ib,
3131 };
3132 draw_state_groups[draw_state_group_count++] =
3133 (struct tu_draw_state_group) {
3134 .id = TU_DRAW_STATE_DS,
3135 .enable_mask = 0x7,
3136 .ib = pipeline->ds.state_ib,
3137 };
3138 draw_state_groups[draw_state_group_count++] =
3139 (struct tu_draw_state_group) {
3140 .id = TU_DRAW_STATE_BLEND,
3141 .enable_mask = 0x7,
3142 .ib = pipeline->blend.state_ib,
3143 };
3144 }
3145
3146 if (cmd->state.dirty &
3147 (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_DESCRIPTOR_SETS)) {
3148 bool needs_border = false;
3149 struct tu_cs_entry vs_tex, fs_tex;
3150
3151 result = tu6_emit_textures(cmd, MESA_SHADER_VERTEX, &vs_tex, &needs_border);
3152 if (result != VK_SUCCESS)
3153 return result;
3154
3155 result = tu6_emit_textures(cmd, MESA_SHADER_FRAGMENT, &fs_tex, &needs_border);
3156 if (result != VK_SUCCESS)
3157 return result;
3158
3159 draw_state_groups[draw_state_group_count++] =
3160 (struct tu_draw_state_group) {
3161 .id = TU_DRAW_STATE_VS_CONST,
3162 .enable_mask = 0x7,
3163 .ib = tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_VERTEX)
3164 };
3165 draw_state_groups[draw_state_group_count++] =
3166 (struct tu_draw_state_group) {
3167 .id = TU_DRAW_STATE_FS_CONST,
3168 .enable_mask = 0x6,
3169 .ib = tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_FRAGMENT)
3170 };
3171 draw_state_groups[draw_state_group_count++] =
3172 (struct tu_draw_state_group) {
3173 .id = TU_DRAW_STATE_VS_TEX,
3174 .enable_mask = 0x7,
3175 .ib = vs_tex,
3176 };
3177 draw_state_groups[draw_state_group_count++] =
3178 (struct tu_draw_state_group) {
3179 .id = TU_DRAW_STATE_FS_TEX,
3180 .enable_mask = 0x6,
3181 .ib = fs_tex,
3182 };
3183 draw_state_groups[draw_state_group_count++] =
3184 (struct tu_draw_state_group) {
3185 .id = TU_DRAW_STATE_FS_IBO,
3186 .enable_mask = 0x6,
3187 .ib = tu6_emit_ibo(cmd->device, &cmd->draw_state, pipeline,
3188 descriptors_state, MESA_SHADER_FRAGMENT)
3189 };
3190
3191 if (needs_border) {
3192 result = tu6_emit_border_color(cmd, cs);
3193 if (result != VK_SUCCESS)
3194 return result;
3195 }
3196 }
3197
3198 tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * draw_state_group_count);
3199 for (uint32_t i = 0; i < draw_state_group_count; i++) {
3200 const struct tu_draw_state_group *group = &draw_state_groups[i];
3201
3202 uint32_t cp_set_draw_state =
3203 CP_SET_DRAW_STATE__0_COUNT(group->ib.size / 4) |
3204 CP_SET_DRAW_STATE__0_ENABLE_MASK(group->enable_mask) |
3205 CP_SET_DRAW_STATE__0_GROUP_ID(group->id);
3206 uint64_t iova;
3207 if (group->ib.size) {
3208 iova = group->ib.bo->iova + group->ib.offset;
3209 } else {
3210 cp_set_draw_state |= CP_SET_DRAW_STATE__0_DISABLE;
3211 iova = 0;
3212 }
3213
3214 tu_cs_emit(cs, cp_set_draw_state);
3215 tu_cs_emit_qw(cs, iova);
3216 }
3217
3218 tu_cs_sanity_check(cs);
3219
3220 /* track BOs */
3221 if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) {
3222 for (uint32_t i = 0; i < MAX_VBS; i++) {
3223 const struct tu_buffer *buf = cmd->state.vb.buffers[i];
3224 if (buf)
3225 tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ);
3226 }
3227 }
3228 if (cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) {
3229 unsigned i;
3230 for_each_bit(i, descriptors_state->valid) {
3231 struct tu_descriptor_set *set = descriptors_state->sets[i];
3232 for (unsigned j = 0; j < set->layout->buffer_count; ++j)
3233 if (set->descriptors[j]) {
3234 tu_bo_list_add(&cmd->bo_list, set->descriptors[j],
3235 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
3236 }
3237 }
3238 }
3239
3240 /* Fragment shader state overwrites compute shader state, so flag the
3241 * compute pipeline for re-emit.
3242 */
3243 cmd->state.dirty = TU_CMD_DIRTY_COMPUTE_PIPELINE;
3244 return VK_SUCCESS;
3245 }
3246
3247 static void
3248 tu6_emit_draw_direct(struct tu_cmd_buffer *cmd,
3249 struct tu_cs *cs,
3250 const struct tu_draw_info *draw)
3251 {
3252
3253 const enum pc_di_primtype primtype = cmd->state.pipeline->ia.primtype;
3254
3255 tu_cs_emit_pkt4(cs, REG_A6XX_VFD_INDEX_OFFSET, 2);
3256 tu_cs_emit(cs, draw->vertex_offset);
3257 tu_cs_emit(cs, draw->first_instance);
3258
3259 /* TODO hw binning */
3260 if (draw->indexed) {
3261 const enum a4xx_index_size index_size =
3262 tu6_index_size(cmd->state.index_type);
3263 const uint32_t index_bytes =
3264 (cmd->state.index_type == VK_INDEX_TYPE_UINT32) ? 4 : 2;
3265 const struct tu_buffer *buf = cmd->state.index_buffer;
3266 const VkDeviceSize offset = buf->bo_offset + cmd->state.index_offset +
3267 index_bytes * draw->first_index;
3268 const uint32_t size = index_bytes * draw->count;
3269
3270 const uint32_t cp_draw_indx =
3271 CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) |
3272 CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_DMA) |
3273 CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(index_size) |
3274 CP_DRAW_INDX_OFFSET_0_VIS_CULL(USE_VISIBILITY) | 0x2000;
3275
3276 tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 7);
3277 tu_cs_emit(cs, cp_draw_indx);
3278 tu_cs_emit(cs, draw->instance_count);
3279 tu_cs_emit(cs, draw->count);
3280 tu_cs_emit(cs, 0x0); /* XXX */
3281 tu_cs_emit_qw(cs, buf->bo->iova + offset);
3282 tu_cs_emit(cs, size);
3283 } else {
3284 const uint32_t cp_draw_indx =
3285 CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) |
3286 CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX) |
3287 CP_DRAW_INDX_OFFSET_0_VIS_CULL(USE_VISIBILITY) | 0x2000;
3288
3289 tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 3);
3290 tu_cs_emit(cs, cp_draw_indx);
3291 tu_cs_emit(cs, draw->instance_count);
3292 tu_cs_emit(cs, draw->count);
3293 }
3294 }
3295
3296 static void
3297 tu_draw(struct tu_cmd_buffer *cmd, const struct tu_draw_info *draw)
3298 {
3299 struct tu_cs *cs = &cmd->draw_cs;
3300 VkResult result;
3301
3302 result = tu6_bind_draw_states(cmd, cs, draw);
3303 if (result != VK_SUCCESS) {
3304 cmd->record_result = result;
3305 return;
3306 }
3307
3308 result = tu_cs_reserve_space(cmd->device, cs, 32);
3309 if (result != VK_SUCCESS) {
3310 cmd->record_result = result;
3311 return;
3312 }
3313
3314 if (draw->indirect) {
3315 tu_finishme("indirect draw");
3316 return;
3317 }
3318
3319 /* TODO tu6_emit_marker should pick different regs depending on cs */
3320
3321 tu6_emit_marker(cmd, cs);
3322 tu6_emit_draw_direct(cmd, cs, draw);
3323 tu6_emit_marker(cmd, cs);
3324
3325 cmd->wait_for_idle = true;
3326
3327 tu_cs_sanity_check(cs);
3328 }
3329
3330 void
3331 tu_CmdDraw(VkCommandBuffer commandBuffer,
3332 uint32_t vertexCount,
3333 uint32_t instanceCount,
3334 uint32_t firstVertex,
3335 uint32_t firstInstance)
3336 {
3337 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3338 struct tu_draw_info info = {};
3339
3340 info.count = vertexCount;
3341 info.instance_count = instanceCount;
3342 info.first_instance = firstInstance;
3343 info.vertex_offset = firstVertex;
3344
3345 tu_draw(cmd_buffer, &info);
3346 }
3347
3348 void
3349 tu_CmdDrawIndexed(VkCommandBuffer commandBuffer,
3350 uint32_t indexCount,
3351 uint32_t instanceCount,
3352 uint32_t firstIndex,
3353 int32_t vertexOffset,
3354 uint32_t firstInstance)
3355 {
3356 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3357 struct tu_draw_info info = {};
3358
3359 info.indexed = true;
3360 info.count = indexCount;
3361 info.instance_count = instanceCount;
3362 info.first_index = firstIndex;
3363 info.vertex_offset = vertexOffset;
3364 info.first_instance = firstInstance;
3365
3366 tu_draw(cmd_buffer, &info);
3367 }
3368
3369 void
3370 tu_CmdDrawIndirect(VkCommandBuffer commandBuffer,
3371 VkBuffer _buffer,
3372 VkDeviceSize offset,
3373 uint32_t drawCount,
3374 uint32_t stride)
3375 {
3376 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3377 TU_FROM_HANDLE(tu_buffer, buffer, _buffer);
3378 struct tu_draw_info info = {};
3379
3380 info.count = drawCount;
3381 info.indirect = buffer;
3382 info.indirect_offset = offset;
3383 info.stride = stride;
3384
3385 tu_draw(cmd_buffer, &info);
3386 }
3387
3388 void
3389 tu_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
3390 VkBuffer _buffer,
3391 VkDeviceSize offset,
3392 uint32_t drawCount,
3393 uint32_t stride)
3394 {
3395 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3396 TU_FROM_HANDLE(tu_buffer, buffer, _buffer);
3397 struct tu_draw_info info = {};
3398
3399 info.indexed = true;
3400 info.count = drawCount;
3401 info.indirect = buffer;
3402 info.indirect_offset = offset;
3403 info.stride = stride;
3404
3405 tu_draw(cmd_buffer, &info);
3406 }
3407
3408 struct tu_dispatch_info
3409 {
3410 /**
3411 * Determine the layout of the grid (in block units) to be used.
3412 */
3413 uint32_t blocks[3];
3414
3415 /**
3416 * A starting offset for the grid. If unaligned is set, the offset
3417 * must still be aligned.
3418 */
3419 uint32_t offsets[3];
3420 /**
3421 * Whether it's an unaligned compute dispatch.
3422 */
3423 bool unaligned;
3424
3425 /**
3426 * Indirect compute parameters resource.
3427 */
3428 struct tu_buffer *indirect;
3429 uint64_t indirect_offset;
3430 };
3431
3432 static void
3433 tu_emit_compute_driver_params(struct tu_cs *cs, struct tu_pipeline *pipeline,
3434 const struct tu_dispatch_info *info)
3435 {
3436 gl_shader_stage type = MESA_SHADER_COMPUTE;
3437 const struct tu_program_descriptor_linkage *link =
3438 &pipeline->program.link[type];
3439 const struct ir3_const_state *const_state = &link->const_state;
3440 uint32_t offset_dwords = const_state->offsets.driver_param;
3441
3442 if (link->constlen <= offset_dwords)
3443 return;
3444
3445 if (!info->indirect) {
3446 uint32_t driver_params[] = {
3447 info->blocks[0],
3448 info->blocks[1],
3449 info->blocks[2],
3450 pipeline->compute.local_size[0],
3451 pipeline->compute.local_size[1],
3452 pipeline->compute.local_size[2],
3453 };
3454 uint32_t num_consts = MIN2(const_state->num_driver_params,
3455 link->constlen - offset_dwords);
3456 uint32_t align_size = align(num_consts, 4);
3457
3458 /* push constants */
3459 tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + align_size);
3460 tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(offset_dwords / 4) |
3461 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
3462 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
3463 CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
3464 CP_LOAD_STATE6_0_NUM_UNIT(align_size / 4));
3465 tu_cs_emit(cs, 0);
3466 tu_cs_emit(cs, 0);
3467 uint32_t i;
3468 for (i = 0; i < num_consts; i++)
3469 tu_cs_emit(cs, driver_params[i]);
3470 for (; i < align_size; i++)
3471 tu_cs_emit(cs, 0);
3472 } else {
3473 tu_finishme("Indirect driver params");
3474 }
3475 }
3476
3477 static void
3478 tu_dispatch(struct tu_cmd_buffer *cmd,
3479 const struct tu_dispatch_info *info)
3480 {
3481 struct tu_cs *cs = &cmd->cs;
3482 struct tu_pipeline *pipeline = cmd->state.compute_pipeline;
3483 struct tu_descriptor_state *descriptors_state =
3484 &cmd->descriptors[VK_PIPELINE_BIND_POINT_COMPUTE];
3485
3486 VkResult result = tu_cs_reserve_space(cmd->device, cs, 256);
3487 if (result != VK_SUCCESS) {
3488 cmd->record_result = result;
3489 return;
3490 }
3491
3492 if (cmd->state.dirty & TU_CMD_DIRTY_COMPUTE_PIPELINE)
3493 tu_cs_emit_ib(cs, &pipeline->program.state_ib);
3494
3495 struct tu_cs_entry ib;
3496
3497 ib = tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_COMPUTE);
3498 if (ib.size)
3499 tu_cs_emit_ib(cs, &ib);
3500
3501 tu_emit_compute_driver_params(cs, pipeline, info);
3502
3503 bool needs_border;
3504 result = tu6_emit_textures(cmd, MESA_SHADER_COMPUTE, &ib, &needs_border);
3505 if (result != VK_SUCCESS) {
3506 cmd->record_result = result;
3507 return;
3508 }
3509
3510 if (ib.size)
3511 tu_cs_emit_ib(cs, &ib);
3512
3513 if (needs_border)
3514 tu6_emit_border_color(cmd, cs);
3515
3516 ib = tu6_emit_ibo(cmd->device, &cmd->draw_state, pipeline,
3517 descriptors_state, MESA_SHADER_COMPUTE);
3518 if (ib.size)
3519 tu_cs_emit_ib(cs, &ib);
3520
3521 /* track BOs */
3522 if (cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) {
3523 unsigned i;
3524 for_each_bit(i, descriptors_state->valid) {
3525 struct tu_descriptor_set *set = descriptors_state->sets[i];
3526 for (unsigned j = 0; j < set->layout->buffer_count; ++j)
3527 if (set->descriptors[j]) {
3528 tu_bo_list_add(&cmd->bo_list, set->descriptors[j],
3529 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
3530 }
3531 }
3532 }
3533
3534 /* Compute shader state overwrites fragment shader state, so we flag the
3535 * graphics pipeline for re-emit.
3536 */
3537 cmd->state.dirty = TU_CMD_DIRTY_PIPELINE;
3538
3539 tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
3540 tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(0x8));
3541
3542 const uint32_t *local_size = pipeline->compute.local_size;
3543 const uint32_t *num_groups = info->blocks;
3544 tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CS_NDRANGE_0, 7);
3545 tu_cs_emit(cs,
3546 A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM(3) |
3547 A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(local_size[0] - 1) |
3548 A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(local_size[1] - 1) |
3549 A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(local_size[2] - 1));
3550 tu_cs_emit(cs, A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(local_size[0] * num_groups[0]));
3551 tu_cs_emit(cs, 0); /* HLSQ_CS_NDRANGE_2_GLOBALOFF_X */
3552 tu_cs_emit(cs, A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(local_size[1] * num_groups[1]));
3553 tu_cs_emit(cs, 0); /* HLSQ_CS_NDRANGE_4_GLOBALOFF_Y */
3554 tu_cs_emit(cs, A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(local_size[2] * num_groups[2]));
3555 tu_cs_emit(cs, 0); /* HLSQ_CS_NDRANGE_6_GLOBALOFF_Z */
3556
3557 tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CS_KERNEL_GROUP_X, 3);
3558 tu_cs_emit(cs, 1); /* HLSQ_CS_KERNEL_GROUP_X */
3559 tu_cs_emit(cs, 1); /* HLSQ_CS_KERNEL_GROUP_Y */
3560 tu_cs_emit(cs, 1); /* HLSQ_CS_KERNEL_GROUP_Z */
3561
3562 if (info->indirect) {
3563 uint64_t iova = tu_buffer_iova(info->indirect) + info->indirect_offset;
3564
3565 tu_bo_list_add(&cmd->bo_list, info->indirect->bo,
3566 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
3567
3568 tu_cs_emit_pkt7(cs, CP_EXEC_CS_INDIRECT, 4);
3569 tu_cs_emit(cs, 0x00000000);
3570 tu_cs_emit_qw(cs, iova);
3571 tu_cs_emit(cs,
3572 A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX(local_size[0] - 1) |
3573 A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY(local_size[1] - 1) |
3574 A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ(local_size[2] - 1));
3575 } else {
3576 tu_cs_emit_pkt7(cs, CP_EXEC_CS, 4);
3577 tu_cs_emit(cs, 0x00000000);
3578 tu_cs_emit(cs, CP_EXEC_CS_1_NGROUPS_X(info->blocks[0]));
3579 tu_cs_emit(cs, CP_EXEC_CS_2_NGROUPS_Y(info->blocks[1]));
3580 tu_cs_emit(cs, CP_EXEC_CS_3_NGROUPS_Z(info->blocks[2]));
3581 }
3582
3583 tu_cs_emit_wfi(cs);
3584
3585 tu6_emit_cache_flush(cmd, cs);
3586 }
3587
3588 void
3589 tu_CmdDispatchBase(VkCommandBuffer commandBuffer,
3590 uint32_t base_x,
3591 uint32_t base_y,
3592 uint32_t base_z,
3593 uint32_t x,
3594 uint32_t y,
3595 uint32_t z)
3596 {
3597 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3598 struct tu_dispatch_info info = {};
3599
3600 info.blocks[0] = x;
3601 info.blocks[1] = y;
3602 info.blocks[2] = z;
3603
3604 info.offsets[0] = base_x;
3605 info.offsets[1] = base_y;
3606 info.offsets[2] = base_z;
3607 tu_dispatch(cmd_buffer, &info);
3608 }
3609
3610 void
3611 tu_CmdDispatch(VkCommandBuffer commandBuffer,
3612 uint32_t x,
3613 uint32_t y,
3614 uint32_t z)
3615 {
3616 tu_CmdDispatchBase(commandBuffer, 0, 0, 0, x, y, z);
3617 }
3618
3619 void
3620 tu_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
3621 VkBuffer _buffer,
3622 VkDeviceSize offset)
3623 {
3624 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3625 TU_FROM_HANDLE(tu_buffer, buffer, _buffer);
3626 struct tu_dispatch_info info = {};
3627
3628 info.indirect = buffer;
3629 info.indirect_offset = offset;
3630
3631 tu_dispatch(cmd_buffer, &info);
3632 }
3633
3634 void
3635 tu_CmdEndRenderPass(VkCommandBuffer commandBuffer)
3636 {
3637 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3638
3639 tu_cs_end(&cmd_buffer->draw_cs);
3640
3641 tu_cmd_render_tiles(cmd_buffer);
3642
3643 /* discard draw_cs entries now that the tiles are rendered */
3644 tu_cs_discard_entries(&cmd_buffer->draw_cs);
3645 tu_cs_begin(&cmd_buffer->draw_cs);
3646
3647 vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
3648 cmd_buffer->state.attachments = NULL;
3649
3650 cmd_buffer->state.pass = NULL;
3651 cmd_buffer->state.subpass = NULL;
3652 cmd_buffer->state.framebuffer = NULL;
3653 }
3654
3655 void
3656 tu_CmdEndRenderPass2KHR(VkCommandBuffer commandBuffer,
3657 const VkSubpassEndInfoKHR *pSubpassEndInfo)
3658 {
3659 tu_CmdEndRenderPass(commandBuffer);
3660 }
3661
3662 struct tu_barrier_info
3663 {
3664 uint32_t eventCount;
3665 const VkEvent *pEvents;
3666 VkPipelineStageFlags srcStageMask;
3667 };
3668
3669 static void
3670 tu_barrier(struct tu_cmd_buffer *cmd_buffer,
3671 uint32_t memoryBarrierCount,
3672 const VkMemoryBarrier *pMemoryBarriers,
3673 uint32_t bufferMemoryBarrierCount,
3674 const VkBufferMemoryBarrier *pBufferMemoryBarriers,
3675 uint32_t imageMemoryBarrierCount,
3676 const VkImageMemoryBarrier *pImageMemoryBarriers,
3677 const struct tu_barrier_info *info)
3678 {
3679 }
3680
3681 void
3682 tu_CmdPipelineBarrier(VkCommandBuffer commandBuffer,
3683 VkPipelineStageFlags srcStageMask,
3684 VkPipelineStageFlags destStageMask,
3685 VkBool32 byRegion,
3686 uint32_t memoryBarrierCount,
3687 const VkMemoryBarrier *pMemoryBarriers,
3688 uint32_t bufferMemoryBarrierCount,
3689 const VkBufferMemoryBarrier *pBufferMemoryBarriers,
3690 uint32_t imageMemoryBarrierCount,
3691 const VkImageMemoryBarrier *pImageMemoryBarriers)
3692 {
3693 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3694 struct tu_barrier_info info;
3695
3696 info.eventCount = 0;
3697 info.pEvents = NULL;
3698 info.srcStageMask = srcStageMask;
3699
3700 tu_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers,
3701 bufferMemoryBarrierCount, pBufferMemoryBarriers,
3702 imageMemoryBarrierCount, pImageMemoryBarriers, &info);
3703 }
3704
3705 static void
3706 write_event(struct tu_cmd_buffer *cmd_buffer,
3707 struct tu_event *event,
3708 VkPipelineStageFlags stageMask,
3709 unsigned value)
3710 {
3711 }
3712
3713 void
3714 tu_CmdSetEvent(VkCommandBuffer commandBuffer,
3715 VkEvent _event,
3716 VkPipelineStageFlags stageMask)
3717 {
3718 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3719 TU_FROM_HANDLE(tu_event, event, _event);
3720
3721 write_event(cmd_buffer, event, stageMask, 1);
3722 }
3723
3724 void
3725 tu_CmdResetEvent(VkCommandBuffer commandBuffer,
3726 VkEvent _event,
3727 VkPipelineStageFlags stageMask)
3728 {
3729 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3730 TU_FROM_HANDLE(tu_event, event, _event);
3731
3732 write_event(cmd_buffer, event, stageMask, 0);
3733 }
3734
3735 void
3736 tu_CmdWaitEvents(VkCommandBuffer commandBuffer,
3737 uint32_t eventCount,
3738 const VkEvent *pEvents,
3739 VkPipelineStageFlags srcStageMask,
3740 VkPipelineStageFlags dstStageMask,
3741 uint32_t memoryBarrierCount,
3742 const VkMemoryBarrier *pMemoryBarriers,
3743 uint32_t bufferMemoryBarrierCount,
3744 const VkBufferMemoryBarrier *pBufferMemoryBarriers,
3745 uint32_t imageMemoryBarrierCount,
3746 const VkImageMemoryBarrier *pImageMemoryBarriers)
3747 {
3748 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3749 struct tu_barrier_info info;
3750
3751 info.eventCount = eventCount;
3752 info.pEvents = pEvents;
3753 info.srcStageMask = 0;
3754
3755 tu_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers,
3756 bufferMemoryBarrierCount, pBufferMemoryBarriers,
3757 imageMemoryBarrierCount, pImageMemoryBarriers, &info);
3758 }
3759
3760 void
3761 tu_CmdSetDeviceMask(VkCommandBuffer commandBuffer, uint32_t deviceMask)
3762 {
3763 /* No-op */
3764 }