turnip: add hw binning
[mesa.git] / src / freedreno / vulkan / tu_cmd_buffer.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 * DEALINGS IN THE SOFTWARE.
26 */
27
28 #include "tu_private.h"
29
30 #include "registers/adreno_pm4.xml.h"
31 #include "registers/adreno_common.xml.h"
32 #include "registers/a6xx.xml.h"
33
34 #include "vk_format.h"
35
36 #include "tu_cs.h"
37 #include "tu_blit.h"
38
39 #define OVERFLOW_FLAG_REG REG_A6XX_CP_SCRATCH_REG(0)
40
41 void
42 tu_bo_list_init(struct tu_bo_list *list)
43 {
44 list->count = list->capacity = 0;
45 list->bo_infos = NULL;
46 }
47
48 void
49 tu_bo_list_destroy(struct tu_bo_list *list)
50 {
51 free(list->bo_infos);
52 }
53
54 void
55 tu_bo_list_reset(struct tu_bo_list *list)
56 {
57 list->count = 0;
58 }
59
60 /**
61 * \a flags consists of MSM_SUBMIT_BO_FLAGS.
62 */
63 static uint32_t
64 tu_bo_list_add_info(struct tu_bo_list *list,
65 const struct drm_msm_gem_submit_bo *bo_info)
66 {
67 assert(bo_info->handle != 0);
68
69 for (uint32_t i = 0; i < list->count; ++i) {
70 if (list->bo_infos[i].handle == bo_info->handle) {
71 assert(list->bo_infos[i].presumed == bo_info->presumed);
72 list->bo_infos[i].flags |= bo_info->flags;
73 return i;
74 }
75 }
76
77 /* grow list->bo_infos if needed */
78 if (list->count == list->capacity) {
79 uint32_t new_capacity = MAX2(2 * list->count, 16);
80 struct drm_msm_gem_submit_bo *new_bo_infos = realloc(
81 list->bo_infos, new_capacity * sizeof(struct drm_msm_gem_submit_bo));
82 if (!new_bo_infos)
83 return TU_BO_LIST_FAILED;
84 list->bo_infos = new_bo_infos;
85 list->capacity = new_capacity;
86 }
87
88 list->bo_infos[list->count] = *bo_info;
89 return list->count++;
90 }
91
92 uint32_t
93 tu_bo_list_add(struct tu_bo_list *list,
94 const struct tu_bo *bo,
95 uint32_t flags)
96 {
97 return tu_bo_list_add_info(list, &(struct drm_msm_gem_submit_bo) {
98 .flags = flags,
99 .handle = bo->gem_handle,
100 .presumed = bo->iova,
101 });
102 }
103
104 VkResult
105 tu_bo_list_merge(struct tu_bo_list *list, const struct tu_bo_list *other)
106 {
107 for (uint32_t i = 0; i < other->count; i++) {
108 if (tu_bo_list_add_info(list, other->bo_infos + i) == TU_BO_LIST_FAILED)
109 return VK_ERROR_OUT_OF_HOST_MEMORY;
110 }
111
112 return VK_SUCCESS;
113 }
114
115 static VkResult
116 tu_tiling_config_update_gmem_layout(struct tu_tiling_config *tiling,
117 const struct tu_device *dev)
118 {
119 const uint32_t gmem_size = dev->physical_device->gmem_size;
120 uint32_t offset = 0;
121
122 for (uint32_t i = 0; i < tiling->buffer_count; i++) {
123 /* 16KB-aligned */
124 offset = align(offset, 0x4000);
125
126 tiling->gmem_offsets[i] = offset;
127 offset += tiling->tile0.extent.width * tiling->tile0.extent.height *
128 tiling->buffer_cpp[i];
129 }
130
131 return offset <= gmem_size ? VK_SUCCESS : VK_ERROR_OUT_OF_DEVICE_MEMORY;
132 }
133
134 static void
135 tu_tiling_config_update_tile_layout(struct tu_tiling_config *tiling,
136 const struct tu_device *dev)
137 {
138 const uint32_t tile_align_w = dev->physical_device->tile_align_w;
139 const uint32_t tile_align_h = dev->physical_device->tile_align_h;
140 const uint32_t max_tile_width = 1024; /* A6xx */
141
142 tiling->tile0.offset = (VkOffset2D) {
143 .x = tiling->render_area.offset.x & ~(tile_align_w - 1),
144 .y = tiling->render_area.offset.y & ~(tile_align_h - 1),
145 };
146
147 const uint32_t ra_width =
148 tiling->render_area.extent.width +
149 (tiling->render_area.offset.x - tiling->tile0.offset.x);
150 const uint32_t ra_height =
151 tiling->render_area.extent.height +
152 (tiling->render_area.offset.y - tiling->tile0.offset.y);
153
154 /* start from 1 tile */
155 tiling->tile_count = (VkExtent2D) {
156 .width = 1,
157 .height = 1,
158 };
159 tiling->tile0.extent = (VkExtent2D) {
160 .width = align(ra_width, tile_align_w),
161 .height = align(ra_height, tile_align_h),
162 };
163
164 /* do not exceed max tile width */
165 while (tiling->tile0.extent.width > max_tile_width) {
166 tiling->tile_count.width++;
167 tiling->tile0.extent.width =
168 align(ra_width / tiling->tile_count.width, tile_align_w);
169 }
170
171 /* do not exceed gmem size */
172 while (tu_tiling_config_update_gmem_layout(tiling, dev) != VK_SUCCESS) {
173 if (tiling->tile0.extent.width > tiling->tile0.extent.height) {
174 tiling->tile_count.width++;
175 tiling->tile0.extent.width =
176 align(ra_width / tiling->tile_count.width, tile_align_w);
177 } else {
178 tiling->tile_count.height++;
179 tiling->tile0.extent.height =
180 align(ra_height / tiling->tile_count.height, tile_align_h);
181 }
182 }
183 }
184
185 static void
186 tu_tiling_config_update_pipe_layout(struct tu_tiling_config *tiling,
187 const struct tu_device *dev)
188 {
189 const uint32_t max_pipe_count = 32; /* A6xx */
190
191 /* start from 1 tile per pipe */
192 tiling->pipe0 = (VkExtent2D) {
193 .width = 1,
194 .height = 1,
195 };
196 tiling->pipe_count = tiling->tile_count;
197
198 /* do not exceed max pipe count vertically */
199 while (tiling->pipe_count.height > max_pipe_count) {
200 tiling->pipe0.height += 2;
201 tiling->pipe_count.height =
202 (tiling->tile_count.height + tiling->pipe0.height - 1) /
203 tiling->pipe0.height;
204 }
205
206 /* do not exceed max pipe count */
207 while (tiling->pipe_count.width * tiling->pipe_count.height >
208 max_pipe_count) {
209 tiling->pipe0.width += 1;
210 tiling->pipe_count.width =
211 (tiling->tile_count.width + tiling->pipe0.width - 1) /
212 tiling->pipe0.width;
213 }
214 }
215
216 static void
217 tu_tiling_config_update_pipes(struct tu_tiling_config *tiling,
218 const struct tu_device *dev)
219 {
220 const uint32_t max_pipe_count = 32; /* A6xx */
221 const uint32_t used_pipe_count =
222 tiling->pipe_count.width * tiling->pipe_count.height;
223 const VkExtent2D last_pipe = {
224 .width = (tiling->tile_count.width - 1) % tiling->pipe0.width + 1,
225 .height = (tiling->tile_count.height - 1) % tiling->pipe0.height + 1,
226 };
227
228 assert(used_pipe_count <= max_pipe_count);
229 assert(max_pipe_count <= ARRAY_SIZE(tiling->pipe_config));
230
231 for (uint32_t y = 0; y < tiling->pipe_count.height; y++) {
232 for (uint32_t x = 0; x < tiling->pipe_count.width; x++) {
233 const uint32_t pipe_x = tiling->pipe0.width * x;
234 const uint32_t pipe_y = tiling->pipe0.height * y;
235 const uint32_t pipe_w = (x == tiling->pipe_count.width - 1)
236 ? last_pipe.width
237 : tiling->pipe0.width;
238 const uint32_t pipe_h = (y == tiling->pipe_count.height - 1)
239 ? last_pipe.height
240 : tiling->pipe0.height;
241 const uint32_t n = tiling->pipe_count.width * y + x;
242
243 tiling->pipe_config[n] = A6XX_VSC_PIPE_CONFIG_REG_X(pipe_x) |
244 A6XX_VSC_PIPE_CONFIG_REG_Y(pipe_y) |
245 A6XX_VSC_PIPE_CONFIG_REG_W(pipe_w) |
246 A6XX_VSC_PIPE_CONFIG_REG_H(pipe_h);
247 tiling->pipe_sizes[n] = CP_SET_BIN_DATA5_0_VSC_SIZE(pipe_w * pipe_h);
248 }
249 }
250
251 memset(tiling->pipe_config + used_pipe_count, 0,
252 sizeof(uint32_t) * (max_pipe_count - used_pipe_count));
253 }
254
255 static void
256 tu_tiling_config_update(struct tu_tiling_config *tiling,
257 const struct tu_device *dev,
258 const uint32_t *buffer_cpp,
259 uint32_t buffer_count,
260 const VkRect2D *render_area)
261 {
262 /* see if there is any real change */
263 const bool ra_changed =
264 render_area &&
265 memcmp(&tiling->render_area, render_area, sizeof(*render_area));
266 const bool buf_changed = tiling->buffer_count != buffer_count ||
267 memcmp(tiling->buffer_cpp, buffer_cpp,
268 sizeof(*buffer_cpp) * buffer_count);
269 if (!ra_changed && !buf_changed)
270 return;
271
272 if (ra_changed)
273 tiling->render_area = *render_area;
274
275 if (buf_changed) {
276 memcpy(tiling->buffer_cpp, buffer_cpp,
277 sizeof(*buffer_cpp) * buffer_count);
278 tiling->buffer_count = buffer_count;
279 }
280
281 tu_tiling_config_update_tile_layout(tiling, dev);
282 tu_tiling_config_update_pipe_layout(tiling, dev);
283 tu_tiling_config_update_pipes(tiling, dev);
284 }
285
286 static void
287 tu_tiling_config_get_tile(const struct tu_tiling_config *tiling,
288 const struct tu_device *dev,
289 uint32_t tx,
290 uint32_t ty,
291 struct tu_tile *tile)
292 {
293 /* find the pipe and the slot for tile (tx, ty) */
294 const uint32_t px = tx / tiling->pipe0.width;
295 const uint32_t py = ty / tiling->pipe0.height;
296 const uint32_t sx = tx - tiling->pipe0.width * px;
297 const uint32_t sy = ty - tiling->pipe0.height * py;
298
299 assert(tx < tiling->tile_count.width && ty < tiling->tile_count.height);
300 assert(px < tiling->pipe_count.width && py < tiling->pipe_count.height);
301 assert(sx < tiling->pipe0.width && sy < tiling->pipe0.height);
302
303 /* convert to 1D indices */
304 tile->pipe = tiling->pipe_count.width * py + px;
305 tile->slot = tiling->pipe0.width * sy + sx;
306
307 /* get the blit area for the tile */
308 tile->begin = (VkOffset2D) {
309 .x = tiling->tile0.offset.x + tiling->tile0.extent.width * tx,
310 .y = tiling->tile0.offset.y + tiling->tile0.extent.height * ty,
311 };
312 tile->end.x =
313 (tx == tiling->tile_count.width - 1)
314 ? tiling->render_area.offset.x + tiling->render_area.extent.width
315 : tile->begin.x + tiling->tile0.extent.width;
316 tile->end.y =
317 (ty == tiling->tile_count.height - 1)
318 ? tiling->render_area.offset.y + tiling->render_area.extent.height
319 : tile->begin.y + tiling->tile0.extent.height;
320 }
321
322 enum a3xx_msaa_samples
323 tu_msaa_samples(uint32_t samples)
324 {
325 switch (samples) {
326 case 1:
327 return MSAA_ONE;
328 case 2:
329 return MSAA_TWO;
330 case 4:
331 return MSAA_FOUR;
332 case 8:
333 return MSAA_EIGHT;
334 default:
335 assert(!"invalid sample count");
336 return MSAA_ONE;
337 }
338 }
339
340 static enum a4xx_index_size
341 tu6_index_size(VkIndexType type)
342 {
343 switch (type) {
344 case VK_INDEX_TYPE_UINT16:
345 return INDEX4_SIZE_16_BIT;
346 case VK_INDEX_TYPE_UINT32:
347 return INDEX4_SIZE_32_BIT;
348 default:
349 unreachable("invalid VkIndexType");
350 return INDEX4_SIZE_8_BIT;
351 }
352 }
353
354 static void
355 tu6_emit_marker(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
356 {
357 tu_cs_emit_write_reg(cs, cmd->marker_reg, ++cmd->marker_seqno);
358 }
359
360 unsigned
361 tu6_emit_event_write(struct tu_cmd_buffer *cmd,
362 struct tu_cs *cs,
363 enum vgt_event_type event,
364 bool need_seqno)
365 {
366 unsigned seqno = 0;
367
368 tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, need_seqno ? 4 : 1);
369 tu_cs_emit(cs, CP_EVENT_WRITE_0_EVENT(event));
370 if (need_seqno) {
371 tu_cs_emit_qw(cs, cmd->scratch_bo.iova);
372 seqno = ++cmd->scratch_seqno;
373 tu_cs_emit(cs, seqno);
374 }
375
376 return seqno;
377 }
378
379 static void
380 tu6_emit_cache_flush(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
381 {
382 tu6_emit_event_write(cmd, cs, 0x31, false);
383 }
384
385 static void
386 tu6_emit_lrz_flush(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
387 {
388 tu6_emit_event_write(cmd, cs, LRZ_FLUSH, false);
389 }
390
391 static void
392 tu6_emit_wfi(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
393 {
394 if (cmd->wait_for_idle) {
395 tu_cs_emit_wfi(cs);
396 cmd->wait_for_idle = false;
397 }
398 }
399
400 static void
401 tu6_emit_flag_buffer(struct tu_cs *cs, const struct tu_image_view *iview)
402 {
403 uint64_t va = tu_image_ubwc_base(iview->image, iview->base_mip, iview->base_layer);
404 uint32_t pitch = tu_image_ubwc_pitch(iview->image, iview->base_mip);
405 uint32_t size = tu_image_ubwc_size(iview->image, iview->base_mip);
406 if (iview->image->ubwc_size) {
407 tu_cs_emit_qw(cs, va);
408 tu_cs_emit(cs, A6XX_RB_DEPTH_FLAG_BUFFER_PITCH_PITCH(pitch) |
409 A6XX_RB_DEPTH_FLAG_BUFFER_PITCH_ARRAY_PITCH(size >> 2));
410 } else {
411 tu_cs_emit_qw(cs, 0);
412 tu_cs_emit(cs, 0);
413 }
414 }
415
416 static void
417 tu6_emit_zs(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
418 {
419 const struct tu_framebuffer *fb = cmd->state.framebuffer;
420 const struct tu_subpass *subpass = cmd->state.subpass;
421 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
422
423 const uint32_t a = subpass->depth_stencil_attachment.attachment;
424 if (a == VK_ATTACHMENT_UNUSED) {
425 tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6);
426 tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE));
427 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */
428 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */
429 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */
430 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
431 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_BASE_GMEM */
432
433 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
434 tu_cs_emit(cs,
435 A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE));
436
437 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_BUFFER_BASE_LO, 5);
438 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
439 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
440 tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
441 tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */
442 tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI */
443
444 tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCIL_INFO, 1);
445 tu_cs_emit(cs, 0x00000000); /* RB_STENCIL_INFO */
446
447 return;
448 }
449
450 const struct tu_image_view *iview = fb->attachments[a].attachment;
451 enum a6xx_depth_format fmt = tu6_pipe2depth(iview->vk_format);
452
453 tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6);
454 tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
455 tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_PITCH(tu_image_stride(iview->image, iview->base_mip)));
456 tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(iview->image->layer_size));
457 tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer));
458 tu_cs_emit(cs, tiling->gmem_offsets[subpass->color_count]);
459
460 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
461 tu_cs_emit(cs, A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
462
463 tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
464 tu6_emit_flag_buffer(cs, iview);
465
466 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_BUFFER_BASE_LO, 5);
467 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
468 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
469 tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
470 tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */
471 tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI */
472
473 tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCIL_INFO, 1);
474 tu_cs_emit(cs, 0x00000000); /* RB_STENCIL_INFO */
475
476 /* enable zs? */
477 }
478
479 static void
480 tu6_emit_mrt(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
481 {
482 const struct tu_framebuffer *fb = cmd->state.framebuffer;
483 const struct tu_subpass *subpass = cmd->state.subpass;
484 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
485 unsigned char mrt_comp[MAX_RTS] = { 0 };
486 unsigned srgb_cntl = 0;
487
488 for (uint32_t i = 0; i < subpass->color_count; ++i) {
489 uint32_t a = subpass->color_attachments[i].attachment;
490 if (a == VK_ATTACHMENT_UNUSED)
491 continue;
492
493 const struct tu_image_view *iview = fb->attachments[a].attachment;
494 const enum a6xx_tile_mode tile_mode =
495 tu6_get_image_tile_mode(iview->image, iview->base_mip);
496
497 mrt_comp[i] = 0xf;
498
499 if (vk_format_is_srgb(iview->vk_format))
500 srgb_cntl |= (1 << i);
501
502 const struct tu_native_format *format =
503 tu6_get_native_format(iview->vk_format);
504 assert(format && format->rb >= 0);
505
506 tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(i), 6);
507 tu_cs_emit(cs, A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format->rb) |
508 A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
509 A6XX_RB_MRT_BUF_INFO_COLOR_SWAP(format->swap));
510 tu_cs_emit(cs, A6XX_RB_MRT_PITCH(tu_image_stride(iview->image, iview->base_mip)));
511 tu_cs_emit(cs, A6XX_RB_MRT_ARRAY_PITCH(iview->image->layer_size));
512 tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer));
513 tu_cs_emit(
514 cs, tiling->gmem_offsets[i]); /* RB_MRT[i].BASE_GMEM */
515
516 tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_MRT_REG(i), 1);
517 tu_cs_emit(cs, A6XX_SP_FS_MRT_REG_COLOR_FORMAT(format->rb) |
518 COND(vk_format_is_sint(iview->vk_format), A6XX_SP_FS_MRT_REG_COLOR_SINT) |
519 COND(vk_format_is_uint(iview->vk_format), A6XX_SP_FS_MRT_REG_COLOR_UINT));
520
521 tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 3);
522 tu6_emit_flag_buffer(cs, iview);
523 }
524
525 tu_cs_emit_pkt4(cs, REG_A6XX_RB_SRGB_CNTL, 1);
526 tu_cs_emit(cs, srgb_cntl);
527
528 tu_cs_emit_pkt4(cs, REG_A6XX_SP_SRGB_CNTL, 1);
529 tu_cs_emit(cs, srgb_cntl);
530
531 tu_cs_emit_pkt4(cs, REG_A6XX_RB_RENDER_COMPONENTS, 1);
532 tu_cs_emit(cs, A6XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
533 A6XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
534 A6XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
535 A6XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
536 A6XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
537 A6XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
538 A6XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
539 A6XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
540
541 tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_RENDER_COMPONENTS, 1);
542 tu_cs_emit(cs, A6XX_SP_FS_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
543 A6XX_SP_FS_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
544 A6XX_SP_FS_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
545 A6XX_SP_FS_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
546 A6XX_SP_FS_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
547 A6XX_SP_FS_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
548 A6XX_SP_FS_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
549 A6XX_SP_FS_RENDER_COMPONENTS_RT7(mrt_comp[7]));
550 }
551
552 static void
553 tu6_emit_msaa(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
554 {
555 const struct tu_subpass *subpass = cmd->state.subpass;
556 const enum a3xx_msaa_samples samples =
557 tu_msaa_samples(subpass->max_sample_count);
558
559 tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_RAS_MSAA_CNTL, 2);
560 tu_cs_emit(cs, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(samples));
561 tu_cs_emit(cs, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
562 COND(samples == MSAA_ONE, A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
563
564 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_RAS_MSAA_CNTL, 2);
565 tu_cs_emit(cs, A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(samples));
566 tu_cs_emit(cs, A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(samples) |
567 COND(samples == MSAA_ONE, A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE));
568
569 tu_cs_emit_pkt4(cs, REG_A6XX_RB_RAS_MSAA_CNTL, 2);
570 tu_cs_emit(cs, A6XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
571 tu_cs_emit(cs, A6XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
572 COND(samples == MSAA_ONE, A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
573
574 tu_cs_emit_pkt4(cs, REG_A6XX_RB_MSAA_CNTL, 1);
575 tu_cs_emit(cs, A6XX_RB_MSAA_CNTL_SAMPLES(samples));
576 }
577
578 static void
579 tu6_emit_bin_size(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t flags)
580 {
581 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
582 const uint32_t bin_w = tiling->tile0.extent.width;
583 const uint32_t bin_h = tiling->tile0.extent.height;
584
585 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_BIN_CONTROL, 1);
586 tu_cs_emit(cs, A6XX_GRAS_BIN_CONTROL_BINW(bin_w) |
587 A6XX_GRAS_BIN_CONTROL_BINH(bin_h) | flags);
588
589 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BIN_CONTROL, 1);
590 tu_cs_emit(cs, A6XX_RB_BIN_CONTROL_BINW(bin_w) |
591 A6XX_RB_BIN_CONTROL_BINH(bin_h) | flags);
592
593 /* no flag for RB_BIN_CONTROL2... */
594 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BIN_CONTROL2, 1);
595 tu_cs_emit(cs, A6XX_RB_BIN_CONTROL2_BINW(bin_w) |
596 A6XX_RB_BIN_CONTROL2_BINH(bin_h));
597 }
598
599 static void
600 tu6_emit_render_cntl(struct tu_cmd_buffer *cmd,
601 struct tu_cs *cs,
602 bool binning)
603 {
604 uint32_t cntl = 0;
605 cntl |= A6XX_RB_RENDER_CNTL_UNK4;
606 if (binning)
607 cntl |= A6XX_RB_RENDER_CNTL_BINNING;
608
609 tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3);
610 tu_cs_emit(cs, 0x2);
611 tu_cs_emit(cs, REG_A6XX_RB_RENDER_CNTL);
612 tu_cs_emit(cs, cntl);
613 }
614
615 static void
616 tu6_emit_blit_scissor(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
617 {
618 const VkRect2D *render_area = &cmd->state.tiling_config.render_area;
619 const uint32_t x1 = render_area->offset.x;
620 const uint32_t y1 = render_area->offset.y;
621 const uint32_t x2 = x1 + render_area->extent.width - 1;
622 const uint32_t y2 = y1 + render_area->extent.height - 1;
623
624 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
625 tu_cs_emit(cs,
626 A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1));
627 tu_cs_emit(cs,
628 A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2));
629 }
630
631 static void
632 tu6_emit_blit_info(struct tu_cmd_buffer *cmd,
633 struct tu_cs *cs,
634 const struct tu_image_view *iview,
635 uint32_t gmem_offset,
636 uint32_t blit_info)
637 {
638 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1);
639 tu_cs_emit(cs, blit_info);
640
641 const struct tu_native_format *format =
642 tu6_get_native_format(iview->vk_format);
643 assert(format && format->rb >= 0);
644
645 enum a6xx_tile_mode tile_mode =
646 tu6_get_image_tile_mode(iview->image, iview->base_mip);
647 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 5);
648 tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_TILE_MODE(tile_mode) |
649 A6XX_RB_BLIT_DST_INFO_SAMPLES(tu_msaa_samples(iview->image->samples)) |
650 A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format->rb) |
651 A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(format->swap) |
652 COND(iview->image->ubwc_size, A6XX_RB_BLIT_DST_INFO_FLAGS));
653 tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer));
654 tu_cs_emit(cs, A6XX_RB_BLIT_DST_PITCH(tu_image_stride(iview->image, iview->base_mip)));
655 tu_cs_emit(cs, A6XX_RB_BLIT_DST_ARRAY_PITCH(iview->image->layer_size));
656
657 if (iview->image->ubwc_size) {
658 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST_LO, 3);
659 tu6_emit_flag_buffer(cs, iview);
660 }
661
662 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
663 tu_cs_emit(cs, gmem_offset);
664 }
665
666 static void
667 tu6_emit_blit_clear(struct tu_cmd_buffer *cmd,
668 struct tu_cs *cs,
669 const struct tu_image_view *iview,
670 uint32_t gmem_offset,
671 const VkClearValue *clear_value)
672 {
673 const struct tu_native_format *format =
674 tu6_get_native_format(iview->vk_format);
675 assert(format && format->rb >= 0);
676
677 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1);
678 tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format->rb));
679
680 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1);
681 tu_cs_emit(cs, A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf));
682
683 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
684 tu_cs_emit(cs, gmem_offset);
685
686 tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1);
687 tu_cs_emit(cs, 0);
688
689 uint32_t clear_vals[4] = { 0 };
690 tu_pack_clear_value(clear_value, iview->vk_format, clear_vals);
691
692 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
693 tu_cs_emit(cs, clear_vals[0]);
694 tu_cs_emit(cs, clear_vals[1]);
695 tu_cs_emit(cs, clear_vals[2]);
696 tu_cs_emit(cs, clear_vals[3]);
697 }
698
699 static void
700 tu6_emit_blit(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
701 {
702 tu6_emit_marker(cmd, cs);
703 tu6_emit_event_write(cmd, cs, BLIT, false);
704 tu6_emit_marker(cmd, cs);
705 }
706
707 static void
708 tu6_emit_window_scissor(struct tu_cmd_buffer *cmd,
709 struct tu_cs *cs,
710 uint32_t x1,
711 uint32_t y1,
712 uint32_t x2,
713 uint32_t y2)
714 {
715 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
716 tu_cs_emit(cs, A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
717 A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
718 tu_cs_emit(cs, A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
719 A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
720
721 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_RESOLVE_CNTL_1, 2);
722 tu_cs_emit(
723 cs, A6XX_GRAS_RESOLVE_CNTL_1_X(x1) | A6XX_GRAS_RESOLVE_CNTL_1_Y(y1));
724 tu_cs_emit(
725 cs, A6XX_GRAS_RESOLVE_CNTL_2_X(x2) | A6XX_GRAS_RESOLVE_CNTL_2_Y(y2));
726 }
727
728 static void
729 tu6_emit_window_offset(struct tu_cmd_buffer *cmd,
730 struct tu_cs *cs,
731 uint32_t x1,
732 uint32_t y1)
733 {
734 tu_cs_emit_pkt4(cs, REG_A6XX_RB_WINDOW_OFFSET, 1);
735 tu_cs_emit(cs, A6XX_RB_WINDOW_OFFSET_X(x1) | A6XX_RB_WINDOW_OFFSET_Y(y1));
736
737 tu_cs_emit_pkt4(cs, REG_A6XX_RB_WINDOW_OFFSET2, 1);
738 tu_cs_emit(cs,
739 A6XX_RB_WINDOW_OFFSET2_X(x1) | A6XX_RB_WINDOW_OFFSET2_Y(y1));
740
741 tu_cs_emit_pkt4(cs, REG_A6XX_SP_WINDOW_OFFSET, 1);
742 tu_cs_emit(cs, A6XX_SP_WINDOW_OFFSET_X(x1) | A6XX_SP_WINDOW_OFFSET_Y(y1));
743
744 tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
745 tu_cs_emit(
746 cs, A6XX_SP_TP_WINDOW_OFFSET_X(x1) | A6XX_SP_TP_WINDOW_OFFSET_Y(y1));
747 }
748
749 static bool
750 use_hw_binning(struct tu_cmd_buffer *cmd)
751 {
752 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
753
754 if (unlikely(cmd->device->physical_device->instance->debug_flags & TU_DEBUG_NOBIN))
755 return false;
756
757 return (tiling->tile_count.width * tiling->tile_count.height) > 2;
758 }
759
760 static void
761 tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
762 struct tu_cs *cs,
763 const struct tu_tile *tile)
764 {
765 tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
766 tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(0x7));
767
768 tu6_emit_marker(cmd, cs);
769 tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
770 tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_GMEM) | 0x10);
771 tu6_emit_marker(cmd, cs);
772
773 const uint32_t x1 = tile->begin.x;
774 const uint32_t y1 = tile->begin.y;
775 const uint32_t x2 = tile->end.x - 1;
776 const uint32_t y2 = tile->end.y - 1;
777 tu6_emit_window_scissor(cmd, cs, x1, y1, x2, y2);
778 tu6_emit_window_offset(cmd, cs, x1, y1);
779
780 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_OVERRIDE, 1);
781 tu_cs_emit(cs, A6XX_VPC_SO_OVERRIDE_SO_DISABLE);
782
783 if (use_hw_binning(cmd)) {
784 tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
785
786 tu_cs_emit_pkt7(cs, CP_SET_MODE, 1);
787 tu_cs_emit(cs, 0x0);
788
789 tu_cs_emit_pkt7(cs, CP_REG_TEST, 1);
790 tu_cs_emit(cs, A6XX_CP_REG_TEST_0_REG(OVERFLOW_FLAG_REG) |
791 A6XX_CP_REG_TEST_0_BIT(0) |
792 A6XX_CP_REG_TEST_0_UNK25);
793
794 tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
795 tu_cs_emit(cs, 0x10000000);
796 tu_cs_emit(cs, 11); /* conditionally execute next 11 dwords */
797
798 /* if (no overflow) */ {
799 tu_cs_emit_pkt7(cs, CP_SET_BIN_DATA5, 7);
800 tu_cs_emit(cs, cmd->state.tiling_config.pipe_sizes[tile->pipe] |
801 CP_SET_BIN_DATA5_0_VSC_N(tile->slot));
802 tu_cs_emit_qw(cs, cmd->vsc_data.iova + tile->pipe * cmd->vsc_data_pitch);
803 tu_cs_emit_qw(cs, cmd->vsc_data.iova + (tile->pipe * 4) + (32 * cmd->vsc_data_pitch));
804 tu_cs_emit_qw(cs, cmd->vsc_data2.iova + (tile->pipe * cmd->vsc_data2_pitch));
805
806 tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
807 tu_cs_emit(cs, 0x0);
808
809 /* use a NOP packet to skip over the 'else' side: */
810 tu_cs_emit_pkt7(cs, CP_NOP, 2);
811 } /* else */ {
812 tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
813 tu_cs_emit(cs, 0x1);
814 }
815
816 tu_cs_emit_pkt7(cs, CP_SET_MODE, 1);
817 tu_cs_emit(cs, 0x0);
818
819 tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8804, 1);
820 tu_cs_emit(cs, 0x0);
821
822 tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_UNKNOWN_B304, 1);
823 tu_cs_emit(cs, 0x0);
824
825 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_80A4, 1);
826 tu_cs_emit(cs, 0x0);
827 } else {
828 tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
829 tu_cs_emit(cs, 0x1);
830
831 tu_cs_emit_pkt7(cs, CP_SET_MODE, 1);
832 tu_cs_emit(cs, 0x0);
833 }
834 }
835
836 static void
837 tu6_emit_tile_load_attachment(struct tu_cmd_buffer *cmd,
838 struct tu_cs *cs,
839 uint32_t a,
840 uint32_t gmem_index)
841 {
842 const struct tu_framebuffer *fb = cmd->state.framebuffer;
843 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
844 const struct tu_attachment_state *attachments = cmd->state.attachments;
845
846 const struct tu_image_view *iview = fb->attachments[a].attachment;
847 const struct tu_attachment_state *att = attachments + a;
848 if (att->pending_clear_aspects) {
849 tu6_emit_blit_clear(cmd, cs, iview,
850 tiling->gmem_offsets[gmem_index],
851 &att->clear_value);
852 } else {
853 tu6_emit_blit_info(cmd, cs, iview,
854 tiling->gmem_offsets[gmem_index],
855 A6XX_RB_BLIT_INFO_UNK0 | A6XX_RB_BLIT_INFO_GMEM);
856 }
857
858 tu6_emit_blit(cmd, cs);
859 }
860
861 static void
862 tu6_emit_tile_load(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
863 {
864 const struct tu_subpass *subpass = cmd->state.subpass;
865
866 tu6_emit_blit_scissor(cmd, cs);
867
868 for (uint32_t i = 0; i < subpass->color_count; ++i) {
869 const uint32_t a = subpass->color_attachments[i].attachment;
870 if (a != VK_ATTACHMENT_UNUSED)
871 tu6_emit_tile_load_attachment(cmd, cs, a, i);
872 }
873
874 const uint32_t a = subpass->depth_stencil_attachment.attachment;
875 if (a != VK_ATTACHMENT_UNUSED)
876 tu6_emit_tile_load_attachment(cmd, cs, a, subpass->color_count);
877 }
878
879 static void
880 tu6_emit_store_attachment(struct tu_cmd_buffer *cmd,
881 struct tu_cs *cs,
882 uint32_t a,
883 uint32_t gmem_index)
884 {
885 const struct tu_framebuffer *fb = cmd->state.framebuffer;
886 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
887
888 if (a == VK_ATTACHMENT_UNUSED)
889 return;
890
891 tu6_emit_blit_info(cmd, cs, fb->attachments[a].attachment,
892 tiling->gmem_offsets[gmem_index], 0);
893 tu6_emit_blit(cmd, cs);
894 }
895
896 static void
897 tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
898 {
899 const struct tu_subpass *subpass = cmd->state.subpass;
900
901 tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
902 tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(0) |
903 CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
904 CP_SET_DRAW_STATE__0_GROUP_ID(0));
905 tu_cs_emit(cs, CP_SET_DRAW_STATE__1_ADDR_LO(0));
906 tu_cs_emit(cs, CP_SET_DRAW_STATE__2_ADDR_HI(0));
907
908 tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
909 tu_cs_emit(cs, 0x0);
910
911 tu6_emit_marker(cmd, cs);
912 tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
913 tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10);
914 tu6_emit_marker(cmd, cs);
915
916 tu6_emit_blit_scissor(cmd, cs);
917
918 for (uint32_t i = 0; i < subpass->color_count; ++i) {
919 tu6_emit_store_attachment(cmd, cs,
920 subpass->color_attachments[i].attachment,
921 i);
922 if (subpass->resolve_attachments) {
923 tu6_emit_store_attachment(cmd, cs,
924 subpass->resolve_attachments[i].attachment,
925 i);
926 }
927 }
928
929 tu6_emit_store_attachment(cmd, cs,
930 subpass->depth_stencil_attachment.attachment,
931 subpass->color_count);
932 }
933
934 static void
935 tu6_emit_restart_index(struct tu_cs *cs, uint32_t restart_index)
936 {
937 tu_cs_emit_pkt4(cs, REG_A6XX_PC_RESTART_INDEX, 1);
938 tu_cs_emit(cs, restart_index);
939 }
940
941 static void
942 tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
943 {
944 VkResult result = tu_cs_reserve_space(cmd->device, cs, 256);
945 if (result != VK_SUCCESS) {
946 cmd->record_result = result;
947 return;
948 }
949
950 tu6_emit_cache_flush(cmd, cs);
951
952 tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 0xfffff);
953
954 tu_cs_emit_write_reg(cs, REG_A6XX_RB_CCU_CNTL, 0x7c400004);
955 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8E04, 0x00100000);
956 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE04, 0x8);
957 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE00, 0);
958 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE0F, 0x3f);
959 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B605, 0x44);
960 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B600, 0x100000);
961 tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE00, 0x80);
962 tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE01, 0);
963
964 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9600, 0);
965 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8600, 0x880);
966 tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE04, 0);
967 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE03, 0x00000410);
968 tu_cs_emit_write_reg(cs, REG_A6XX_SP_IBO_COUNT, 0);
969 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B182, 0);
970 tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BB11, 0);
971 tu_cs_emit_write_reg(cs, REG_A6XX_UCHE_UNKNOWN_0E12, 0x3200000);
972 tu_cs_emit_write_reg(cs, REG_A6XX_UCHE_CLIENT_PF, 4);
973 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8E01, 0x0);
974 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AB00, 0x5);
975 tu_cs_emit_write_reg(cs, REG_A6XX_VFD_UNKNOWN_A009, 0x00000001);
976 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8811, 0x00000010);
977 tu_cs_emit_write_reg(cs, REG_A6XX_PC_MODE_CNTL, 0x1f);
978
979 tu_cs_emit_write_reg(cs, REG_A6XX_RB_SRGB_CNTL, 0);
980
981 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8101, 0);
982 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SAMPLE_CNTL, 0);
983 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8110, 0);
984
985 tu_cs_emit_write_reg(cs, REG_A6XX_RB_RENDER_CONTROL0, 0x401);
986 tu_cs_emit_write_reg(cs, REG_A6XX_RB_RENDER_CONTROL1, 0);
987 tu_cs_emit_write_reg(cs, REG_A6XX_RB_FS_OUTPUT_CNTL0, 0);
988 tu_cs_emit_write_reg(cs, REG_A6XX_RB_SAMPLE_CNTL, 0);
989 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8818, 0);
990 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8819, 0);
991 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881A, 0);
992 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881B, 0);
993 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881C, 0);
994 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881D, 0);
995 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881E, 0);
996 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_88F0, 0);
997
998 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9101, 0xffff00);
999 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9107, 0);
1000
1001 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9236, 1);
1002 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9300, 0);
1003
1004 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_SO_OVERRIDE,
1005 A6XX_VPC_SO_OVERRIDE_SO_DISABLE);
1006
1007 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9801, 0);
1008 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9806, 0);
1009 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9980, 0);
1010
1011 tu_cs_emit_write_reg(cs, REG_A6XX_PC_PRIMITIVE_CNTL_6, 0);
1012 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9B07, 0);
1013
1014 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A81B, 0);
1015
1016 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B183, 0);
1017
1018 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8099, 0);
1019 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_809B, 0);
1020 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A0, 2);
1021 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80AF, 0);
1022 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9210, 0);
1023 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9211, 0);
1024 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9602, 0);
1025 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9981, 0x3);
1026 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9E72, 0);
1027 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9108, 0x3);
1028 tu_cs_emit_write_reg(cs, REG_A6XX_SP_TP_UNKNOWN_B304, 0);
1029 tu_cs_emit_write_reg(cs, REG_A6XX_SP_TP_UNKNOWN_B309, 0x000000a2);
1030 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8804, 0);
1031 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A4, 0);
1032 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A5, 0);
1033 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A6, 0);
1034 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8805, 0);
1035 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8806, 0);
1036 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8878, 0);
1037 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8879, 0);
1038 tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_CONTROL_5_REG, 0xfc);
1039
1040 tu6_emit_marker(cmd, cs);
1041
1042 tu_cs_emit_write_reg(cs, REG_A6XX_VFD_MODE_CNTL, 0x00000000);
1043
1044 tu_cs_emit_write_reg(cs, REG_A6XX_VFD_UNKNOWN_A008, 0);
1045
1046 tu_cs_emit_write_reg(cs, REG_A6XX_PC_MODE_CNTL, 0x0000001f);
1047
1048 /* we don't use this yet.. probably best to disable.. */
1049 tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
1050 tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(0) |
1051 CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
1052 CP_SET_DRAW_STATE__0_GROUP_ID(0));
1053 tu_cs_emit(cs, CP_SET_DRAW_STATE__1_ADDR_LO(0));
1054 tu_cs_emit(cs, CP_SET_DRAW_STATE__2_ADDR_HI(0));
1055
1056 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_BASE_LO(0), 3);
1057 tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUFFER_BASE_LO_0 */
1058 tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUFFER_BASE_HI_0 */
1059 tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUFFER_SIZE_0 */
1060
1061 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_FLUSH_BASE_LO(0), 2);
1062 tu_cs_emit(cs, 0x00000000); /* VPC_SO_FLUSH_BASE_LO_0 */
1063 tu_cs_emit(cs, 0x00000000); /* VPC_SO_FLUSH_BASE_HI_0 */
1064
1065 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUF_CNTL, 1);
1066 tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUF_CNTL */
1067
1068 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(0), 1);
1069 tu_cs_emit(cs, 0x00000000); /* UNKNOWN_E2AB */
1070
1071 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_BASE_LO(1), 3);
1072 tu_cs_emit(cs, 0x00000000);
1073 tu_cs_emit(cs, 0x00000000);
1074 tu_cs_emit(cs, 0x00000000);
1075
1076 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(1), 6);
1077 tu_cs_emit(cs, 0x00000000);
1078 tu_cs_emit(cs, 0x00000000);
1079 tu_cs_emit(cs, 0x00000000);
1080 tu_cs_emit(cs, 0x00000000);
1081 tu_cs_emit(cs, 0x00000000);
1082 tu_cs_emit(cs, 0x00000000);
1083
1084 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(2), 6);
1085 tu_cs_emit(cs, 0x00000000);
1086 tu_cs_emit(cs, 0x00000000);
1087 tu_cs_emit(cs, 0x00000000);
1088 tu_cs_emit(cs, 0x00000000);
1089 tu_cs_emit(cs, 0x00000000);
1090 tu_cs_emit(cs, 0x00000000);
1091
1092 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(3), 3);
1093 tu_cs_emit(cs, 0x00000000);
1094 tu_cs_emit(cs, 0x00000000);
1095 tu_cs_emit(cs, 0x00000000);
1096
1097 tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_CTRL_REG0, 1);
1098 tu_cs_emit(cs, 0x00000000);
1099
1100 tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_CTRL_REG0, 1);
1101 tu_cs_emit(cs, 0x00000000);
1102
1103 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_CNTL, 1);
1104 tu_cs_emit(cs, 0x00000000);
1105
1106 tu_cs_emit_pkt4(cs, REG_A6XX_RB_LRZ_CNTL, 1);
1107 tu_cs_emit(cs, 0x00000000);
1108
1109 tu_cs_sanity_check(cs);
1110 }
1111
1112 static void
1113 tu6_cache_flush(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
1114 {
1115 unsigned seqno;
1116
1117 seqno = tu6_emit_event_write(cmd, cs, CACHE_FLUSH_AND_INV_EVENT, true);
1118
1119 tu_cs_emit_pkt7(cs, CP_WAIT_REG_MEM, 6);
1120 tu_cs_emit(cs, 0x00000013);
1121 tu_cs_emit_qw(cs, cmd->scratch_bo.iova);
1122 tu_cs_emit(cs, seqno);
1123 tu_cs_emit(cs, 0xffffffff);
1124 tu_cs_emit(cs, 0x00000010);
1125
1126 seqno = tu6_emit_event_write(cmd, cs, CACHE_FLUSH_TS, true);
1127
1128 tu_cs_emit_pkt7(cs, CP_UNK_A6XX_14, 4);
1129 tu_cs_emit(cs, 0x00000000);
1130 tu_cs_emit_qw(cs, cmd->scratch_bo.iova);
1131 tu_cs_emit(cs, seqno);
1132 }
1133
1134 static void
1135 update_vsc_pipe(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
1136 {
1137 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
1138
1139 tu_cs_emit_pkt4(cs, REG_A6XX_VSC_BIN_SIZE, 3);
1140 tu_cs_emit(cs, A6XX_VSC_BIN_SIZE_WIDTH(tiling->tile0.extent.width) |
1141 A6XX_VSC_BIN_SIZE_HEIGHT(tiling->tile0.extent.height));
1142 tu_cs_emit_qw(cs, cmd->vsc_data.iova + 32 * cmd->vsc_data_pitch); /* VSC_SIZE_ADDRESS_LO/HI */
1143
1144 tu_cs_emit_pkt4(cs, REG_A6XX_VSC_BIN_COUNT, 1);
1145 tu_cs_emit(cs, A6XX_VSC_BIN_COUNT_NX(tiling->tile_count.width) |
1146 A6XX_VSC_BIN_COUNT_NY(tiling->tile_count.height));
1147
1148 tu_cs_emit_pkt4(cs, REG_A6XX_VSC_PIPE_CONFIG_REG(0), 32);
1149 for (unsigned i = 0; i < 32; i++)
1150 tu_cs_emit(cs, tiling->pipe_config[i]);
1151
1152 tu_cs_emit_pkt4(cs, REG_A6XX_VSC_PIPE_DATA2_ADDRESS_LO, 4);
1153 tu_cs_emit_qw(cs, cmd->vsc_data2.iova);
1154 tu_cs_emit(cs, cmd->vsc_data2_pitch);
1155 tu_cs_emit(cs, cmd->vsc_data2.size);
1156
1157 tu_cs_emit_pkt4(cs, REG_A6XX_VSC_PIPE_DATA_ADDRESS_LO, 4);
1158 tu_cs_emit_qw(cs, cmd->vsc_data.iova);
1159 tu_cs_emit(cs, cmd->vsc_data_pitch);
1160 tu_cs_emit(cs, cmd->vsc_data.size);
1161 }
1162
1163 static void
1164 emit_vsc_overflow_test(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
1165 {
1166 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
1167 const uint32_t used_pipe_count =
1168 tiling->pipe_count.width * tiling->pipe_count.height;
1169
1170 /* Clear vsc_scratch: */
1171 tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 3);
1172 tu_cs_emit_qw(cs, cmd->scratch_bo.iova + VSC_SCRATCH);
1173 tu_cs_emit(cs, 0x0);
1174
1175 /* Check for overflow, write vsc_scratch if detected: */
1176 for (int i = 0; i < used_pipe_count; i++) {
1177 tu_cs_emit_pkt7(cs, CP_COND_WRITE5, 8);
1178 tu_cs_emit(cs, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
1179 CP_COND_WRITE5_0_WRITE_MEMORY);
1180 tu_cs_emit(cs, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_SIZE_REG(i)));
1181 tu_cs_emit(cs, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
1182 tu_cs_emit(cs, CP_COND_WRITE5_3_REF(cmd->vsc_data_pitch));
1183 tu_cs_emit(cs, CP_COND_WRITE5_4_MASK(~0));
1184 tu_cs_emit_qw(cs, cmd->scratch_bo.iova + VSC_SCRATCH);
1185 tu_cs_emit(cs, CP_COND_WRITE5_7_WRITE_DATA(1 + cmd->vsc_data_pitch));
1186
1187 tu_cs_emit_pkt7(cs, CP_COND_WRITE5, 8);
1188 tu_cs_emit(cs, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
1189 CP_COND_WRITE5_0_WRITE_MEMORY);
1190 tu_cs_emit(cs, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_SIZE2_REG(i)));
1191 tu_cs_emit(cs, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
1192 tu_cs_emit(cs, CP_COND_WRITE5_3_REF(cmd->vsc_data2_pitch));
1193 tu_cs_emit(cs, CP_COND_WRITE5_4_MASK(~0));
1194 tu_cs_emit_qw(cs, cmd->scratch_bo.iova + VSC_SCRATCH);
1195 tu_cs_emit(cs, CP_COND_WRITE5_7_WRITE_DATA(3 + cmd->vsc_data2_pitch));
1196 }
1197
1198 tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
1199
1200 tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
1201
1202 tu_cs_emit_pkt7(cs, CP_MEM_TO_REG, 3);
1203 tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(OVERFLOW_FLAG_REG) |
1204 CP_MEM_TO_REG_0_CNT(1 - 1));
1205 tu_cs_emit_qw(cs, cmd->scratch_bo.iova + VSC_SCRATCH);
1206
1207 /*
1208 * This is a bit awkward, we really want a way to invert the
1209 * CP_REG_TEST/CP_COND_REG_EXEC logic, so that we can conditionally
1210 * execute cmds to use hwbinning when a bit is *not* set. This
1211 * dance is to invert OVERFLOW_FLAG_REG
1212 *
1213 * A CP_NOP packet is used to skip executing the 'else' clause
1214 * if (b0 set)..
1215 */
1216
1217 /* b0 will be set if VSC_DATA or VSC_DATA2 overflow: */
1218 tu_cs_emit_pkt7(cs, CP_REG_TEST, 1);
1219 tu_cs_emit(cs, A6XX_CP_REG_TEST_0_REG(OVERFLOW_FLAG_REG) |
1220 A6XX_CP_REG_TEST_0_BIT(0) |
1221 A6XX_CP_REG_TEST_0_UNK25);
1222
1223 tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
1224 tu_cs_emit(cs, 0x10000000);
1225 tu_cs_emit(cs, 7); /* conditionally execute next 7 dwords */
1226
1227 /* if (b0 set) */ {
1228 /*
1229 * On overflow, mirror the value to control->vsc_overflow
1230 * which CPU is checking to detect overflow (see
1231 * check_vsc_overflow())
1232 */
1233 tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
1234 tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(OVERFLOW_FLAG_REG) |
1235 CP_REG_TO_MEM_0_CNT(1 - 1));
1236 tu_cs_emit_qw(cs, cmd->scratch_bo.iova + VSC_OVERFLOW);
1237
1238 tu_cs_emit_pkt4(cs, OVERFLOW_FLAG_REG, 1);
1239 tu_cs_emit(cs, 0x0);
1240
1241 tu_cs_emit_pkt7(cs, CP_NOP, 2); /* skip 'else' when 'if' is taken */
1242 } /* else */ {
1243 tu_cs_emit_pkt4(cs, OVERFLOW_FLAG_REG, 1);
1244 tu_cs_emit(cs, 0x1);
1245 }
1246 }
1247
1248 static void
1249 tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
1250 {
1251 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
1252
1253 uint32_t x1 = tiling->tile0.offset.x;
1254 uint32_t y1 = tiling->tile0.offset.y;
1255 uint32_t x2 = tiling->render_area.offset.x + tiling->render_area.extent.width - 1;
1256 uint32_t y2 = tiling->render_area.offset.x + tiling->render_area.extent.height - 1;
1257
1258 tu6_emit_window_scissor(cmd, cs, x1, y1, x2, y2);
1259
1260 tu6_emit_marker(cmd, cs);
1261 tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
1262 tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BINNING));
1263 tu6_emit_marker(cmd, cs);
1264
1265 tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
1266 tu_cs_emit(cs, 0x1);
1267
1268 tu_cs_emit_pkt7(cs, CP_SET_MODE, 1);
1269 tu_cs_emit(cs, 0x1);
1270
1271 tu_cs_emit_wfi(cs);
1272
1273 tu_cs_emit_pkt4(cs, REG_A6XX_VFD_MODE_CNTL, 1);
1274 tu_cs_emit(cs, A6XX_VFD_MODE_CNTL_BINNING_PASS);
1275
1276 update_vsc_pipe(cmd, cs);
1277
1278 tu_cs_emit_pkt4(cs, REG_A6XX_PC_UNKNOWN_9805, 1);
1279 tu_cs_emit(cs, 0x1);
1280
1281 tu_cs_emit_pkt4(cs, REG_A6XX_SP_UNKNOWN_A0F8, 1);
1282 tu_cs_emit(cs, 0x1);
1283
1284 tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 1);
1285 tu_cs_emit(cs, UNK_2C);
1286
1287 tu_cs_emit_pkt4(cs, REG_A6XX_RB_WINDOW_OFFSET, 1);
1288 tu_cs_emit(cs, A6XX_RB_WINDOW_OFFSET_X(0) |
1289 A6XX_RB_WINDOW_OFFSET_Y(0));
1290
1291 tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
1292 tu_cs_emit(cs, A6XX_SP_TP_WINDOW_OFFSET_X(0) |
1293 A6XX_SP_TP_WINDOW_OFFSET_Y(0));
1294
1295 /* emit IB to binning drawcmds: */
1296 tu_cs_emit_call(cs, &cmd->draw_cs);
1297
1298 tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
1299 tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(0) |
1300 CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
1301 CP_SET_DRAW_STATE__0_GROUP_ID(0));
1302 tu_cs_emit(cs, CP_SET_DRAW_STATE__1_ADDR_LO(0));
1303 tu_cs_emit(cs, CP_SET_DRAW_STATE__2_ADDR_HI(0));
1304
1305 tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 1);
1306 tu_cs_emit(cs, UNK_2D);
1307
1308 tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE, false);
1309 tu6_cache_flush(cmd, cs);
1310
1311 tu_cs_emit_wfi(cs);
1312
1313 tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
1314
1315 emit_vsc_overflow_test(cmd, cs);
1316
1317 tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
1318 tu_cs_emit(cs, 0x0);
1319
1320 tu_cs_emit_pkt7(cs, CP_SET_MODE, 1);
1321 tu_cs_emit(cs, 0x0);
1322
1323 tu_cs_emit_wfi(cs);
1324
1325 tu_cs_emit_pkt4(cs, REG_A6XX_RB_CCU_CNTL, 1);
1326 tu_cs_emit(cs, 0x7c400004);
1327
1328 cmd->wait_for_idle = false;
1329 }
1330
1331 static void
1332 tu6_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
1333 {
1334 VkResult result = tu_cs_reserve_space(cmd->device, cs, 1024);
1335 if (result != VK_SUCCESS) {
1336 cmd->record_result = result;
1337 return;
1338 }
1339
1340 tu6_emit_lrz_flush(cmd, cs);
1341
1342 /* lrz clear? */
1343
1344 tu6_emit_cache_flush(cmd, cs);
1345
1346 tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
1347 tu_cs_emit(cs, 0x0);
1348
1349 /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
1350 tu6_emit_wfi(cmd, cs);
1351 tu_cs_emit_pkt4(cs, REG_A6XX_RB_CCU_CNTL, 1);
1352 tu_cs_emit(cs, 0x7c400004); /* RB_CCU_CNTL */
1353
1354 tu6_emit_zs(cmd, cs);
1355 tu6_emit_mrt(cmd, cs);
1356 tu6_emit_msaa(cmd, cs);
1357
1358 if (use_hw_binning(cmd)) {
1359 tu6_emit_bin_size(cmd, cs, A6XX_RB_BIN_CONTROL_BINNING_PASS | 0x6000000);
1360
1361 tu6_emit_render_cntl(cmd, cs, true);
1362
1363 tu6_emit_binning_pass(cmd, cs);
1364
1365 tu6_emit_bin_size(cmd, cs, A6XX_RB_BIN_CONTROL_USE_VIZ | 0x6000000);
1366
1367 tu_cs_emit_pkt4(cs, REG_A6XX_VFD_MODE_CNTL, 1);
1368 tu_cs_emit(cs, 0x0);
1369
1370 tu_cs_emit_pkt4(cs, REG_A6XX_PC_UNKNOWN_9805, 1);
1371 tu_cs_emit(cs, 0x1);
1372
1373 tu_cs_emit_pkt4(cs, REG_A6XX_SP_UNKNOWN_A0F8, 1);
1374 tu_cs_emit(cs, 0x1);
1375
1376 tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
1377 tu_cs_emit(cs, 0x1);
1378 } else {
1379 tu6_emit_bin_size(cmd, cs, 0x6000000);
1380 }
1381
1382 tu6_emit_render_cntl(cmd, cs, false);
1383
1384 tu_cs_sanity_check(cs);
1385 }
1386
1387 static void
1388 tu6_render_tile(struct tu_cmd_buffer *cmd,
1389 struct tu_cs *cs,
1390 const struct tu_tile *tile)
1391 {
1392 const uint32_t render_tile_space = 256 + tu_cs_get_call_size(&cmd->draw_cs);
1393 VkResult result = tu_cs_reserve_space(cmd->device, cs, render_tile_space);
1394 if (result != VK_SUCCESS) {
1395 cmd->record_result = result;
1396 return;
1397 }
1398
1399 tu6_emit_tile_select(cmd, cs, tile);
1400 tu_cs_emit_ib(cs, &cmd->state.tile_load_ib);
1401
1402 tu_cs_emit_call(cs, &cmd->draw_cs);
1403 cmd->wait_for_idle = true;
1404
1405 if (use_hw_binning(cmd)) {
1406 tu_cs_emit_pkt7(cs, CP_REG_TEST, 1);
1407 tu_cs_emit(cs, A6XX_CP_REG_TEST_0_REG(OVERFLOW_FLAG_REG) |
1408 A6XX_CP_REG_TEST_0_BIT(0) |
1409 A6XX_CP_REG_TEST_0_UNK25);
1410
1411 tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
1412 tu_cs_emit(cs, 0x10000000);
1413 tu_cs_emit(cs, 2); /* conditionally execute next 2 dwords */
1414
1415 /* if (no overflow) */ {
1416 tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
1417 tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(0x5) | 0x10);
1418 }
1419 }
1420
1421 tu_cs_emit_ib(cs, &cmd->state.tile_store_ib);
1422
1423 tu_cs_sanity_check(cs);
1424 }
1425
1426 static void
1427 tu6_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
1428 {
1429 VkResult result = tu_cs_reserve_space(cmd->device, cs, 16);
1430 if (result != VK_SUCCESS) {
1431 cmd->record_result = result;
1432 return;
1433 }
1434
1435 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_CNTL, 1);
1436 tu_cs_emit(cs, A6XX_GRAS_LRZ_CNTL_ENABLE | A6XX_GRAS_LRZ_CNTL_UNK3);
1437
1438 tu6_emit_lrz_flush(cmd, cs);
1439
1440 tu6_emit_event_write(cmd, cs, CACHE_FLUSH_TS, true);
1441
1442 tu_cs_sanity_check(cs);
1443 }
1444
1445 static void
1446 tu_cmd_render_tiles(struct tu_cmd_buffer *cmd)
1447 {
1448 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
1449
1450 tu6_render_begin(cmd, &cmd->cs);
1451
1452 for (uint32_t y = 0; y < tiling->tile_count.height; y++) {
1453 for (uint32_t x = 0; x < tiling->tile_count.width; x++) {
1454 struct tu_tile tile;
1455 tu_tiling_config_get_tile(tiling, cmd->device, x, y, &tile);
1456 tu6_render_tile(cmd, &cmd->cs, &tile);
1457 }
1458 }
1459
1460 tu6_render_end(cmd, &cmd->cs);
1461 }
1462
1463 static void
1464 tu_cmd_prepare_tile_load_ib(struct tu_cmd_buffer *cmd)
1465 {
1466 const uint32_t tile_load_space = 16 + 32 * MAX_RTS;
1467 const struct tu_subpass *subpass = cmd->state.subpass;
1468 struct tu_attachment_state *attachments = cmd->state.attachments;
1469 struct tu_cs sub_cs;
1470
1471 VkResult result = tu_cs_begin_sub_stream(cmd->device, &cmd->tile_cs,
1472 tile_load_space, &sub_cs);
1473 if (result != VK_SUCCESS) {
1474 cmd->record_result = result;
1475 return;
1476 }
1477
1478 /* emit to tile-load sub_cs */
1479 tu6_emit_tile_load(cmd, &sub_cs);
1480
1481 cmd->state.tile_load_ib = tu_cs_end_sub_stream(&cmd->tile_cs, &sub_cs);
1482
1483 for (uint32_t i = 0; i < subpass->color_count; ++i) {
1484 const uint32_t a = subpass->color_attachments[i].attachment;
1485 if (a != VK_ATTACHMENT_UNUSED)
1486 attachments[a].pending_clear_aspects = 0;
1487 }
1488 }
1489
1490 static void
1491 tu_cmd_prepare_tile_store_ib(struct tu_cmd_buffer *cmd)
1492 {
1493 const uint32_t tile_store_space = 32 + 32 * MAX_RTS;
1494 struct tu_cs sub_cs;
1495
1496 VkResult result = tu_cs_begin_sub_stream(cmd->device, &cmd->tile_cs,
1497 tile_store_space, &sub_cs);
1498 if (result != VK_SUCCESS) {
1499 cmd->record_result = result;
1500 return;
1501 }
1502
1503 /* emit to tile-store sub_cs */
1504 tu6_emit_tile_store(cmd, &sub_cs);
1505
1506 cmd->state.tile_store_ib = tu_cs_end_sub_stream(&cmd->tile_cs, &sub_cs);
1507 }
1508
1509 static void
1510 tu_cmd_update_tiling_config(struct tu_cmd_buffer *cmd,
1511 const VkRect2D *render_area)
1512 {
1513 const struct tu_device *dev = cmd->device;
1514 const struct tu_render_pass *pass = cmd->state.pass;
1515 const struct tu_subpass *subpass = cmd->state.subpass;
1516 struct tu_tiling_config *tiling = &cmd->state.tiling_config;
1517
1518 uint32_t buffer_cpp[MAX_RTS + 2];
1519 uint32_t buffer_count = 0;
1520
1521 for (uint32_t i = 0; i < subpass->color_count; ++i) {
1522 const uint32_t a = subpass->color_attachments[i].attachment;
1523 if (a == VK_ATTACHMENT_UNUSED) {
1524 buffer_cpp[buffer_count++] = 0;
1525 continue;
1526 }
1527
1528 const struct tu_render_pass_attachment *att = &pass->attachments[a];
1529 buffer_cpp[buffer_count++] =
1530 vk_format_get_blocksize(att->format) * att->samples;
1531 }
1532
1533 if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
1534 const uint32_t a = subpass->depth_stencil_attachment.attachment;
1535 const struct tu_render_pass_attachment *att = &pass->attachments[a];
1536
1537 /* TODO */
1538 assert(att->format != VK_FORMAT_D32_SFLOAT_S8_UINT);
1539
1540 buffer_cpp[buffer_count++] =
1541 vk_format_get_blocksize(att->format) * att->samples;
1542 }
1543
1544 tu_tiling_config_update(tiling, dev, buffer_cpp, buffer_count,
1545 render_area);
1546 }
1547
1548 const struct tu_dynamic_state default_dynamic_state = {
1549 .viewport =
1550 {
1551 .count = 0,
1552 },
1553 .scissor =
1554 {
1555 .count = 0,
1556 },
1557 .line_width = 1.0f,
1558 .depth_bias =
1559 {
1560 .bias = 0.0f,
1561 .clamp = 0.0f,
1562 .slope = 0.0f,
1563 },
1564 .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f },
1565 .depth_bounds =
1566 {
1567 .min = 0.0f,
1568 .max = 1.0f,
1569 },
1570 .stencil_compare_mask =
1571 {
1572 .front = ~0u,
1573 .back = ~0u,
1574 },
1575 .stencil_write_mask =
1576 {
1577 .front = ~0u,
1578 .back = ~0u,
1579 },
1580 .stencil_reference =
1581 {
1582 .front = 0u,
1583 .back = 0u,
1584 },
1585 };
1586
1587 static void UNUSED /* FINISHME */
1588 tu_bind_dynamic_state(struct tu_cmd_buffer *cmd_buffer,
1589 const struct tu_dynamic_state *src)
1590 {
1591 struct tu_dynamic_state *dest = &cmd_buffer->state.dynamic;
1592 uint32_t copy_mask = src->mask;
1593 uint32_t dest_mask = 0;
1594
1595 tu_use_args(cmd_buffer); /* FINISHME */
1596
1597 /* Make sure to copy the number of viewports/scissors because they can
1598 * only be specified at pipeline creation time.
1599 */
1600 dest->viewport.count = src->viewport.count;
1601 dest->scissor.count = src->scissor.count;
1602 dest->discard_rectangle.count = src->discard_rectangle.count;
1603
1604 if (copy_mask & TU_DYNAMIC_VIEWPORT) {
1605 if (memcmp(&dest->viewport.viewports, &src->viewport.viewports,
1606 src->viewport.count * sizeof(VkViewport))) {
1607 typed_memcpy(dest->viewport.viewports, src->viewport.viewports,
1608 src->viewport.count);
1609 dest_mask |= TU_DYNAMIC_VIEWPORT;
1610 }
1611 }
1612
1613 if (copy_mask & TU_DYNAMIC_SCISSOR) {
1614 if (memcmp(&dest->scissor.scissors, &src->scissor.scissors,
1615 src->scissor.count * sizeof(VkRect2D))) {
1616 typed_memcpy(dest->scissor.scissors, src->scissor.scissors,
1617 src->scissor.count);
1618 dest_mask |= TU_DYNAMIC_SCISSOR;
1619 }
1620 }
1621
1622 if (copy_mask & TU_DYNAMIC_LINE_WIDTH) {
1623 if (dest->line_width != src->line_width) {
1624 dest->line_width = src->line_width;
1625 dest_mask |= TU_DYNAMIC_LINE_WIDTH;
1626 }
1627 }
1628
1629 if (copy_mask & TU_DYNAMIC_DEPTH_BIAS) {
1630 if (memcmp(&dest->depth_bias, &src->depth_bias,
1631 sizeof(src->depth_bias))) {
1632 dest->depth_bias = src->depth_bias;
1633 dest_mask |= TU_DYNAMIC_DEPTH_BIAS;
1634 }
1635 }
1636
1637 if (copy_mask & TU_DYNAMIC_BLEND_CONSTANTS) {
1638 if (memcmp(&dest->blend_constants, &src->blend_constants,
1639 sizeof(src->blend_constants))) {
1640 typed_memcpy(dest->blend_constants, src->blend_constants, 4);
1641 dest_mask |= TU_DYNAMIC_BLEND_CONSTANTS;
1642 }
1643 }
1644
1645 if (copy_mask & TU_DYNAMIC_DEPTH_BOUNDS) {
1646 if (memcmp(&dest->depth_bounds, &src->depth_bounds,
1647 sizeof(src->depth_bounds))) {
1648 dest->depth_bounds = src->depth_bounds;
1649 dest_mask |= TU_DYNAMIC_DEPTH_BOUNDS;
1650 }
1651 }
1652
1653 if (copy_mask & TU_DYNAMIC_STENCIL_COMPARE_MASK) {
1654 if (memcmp(&dest->stencil_compare_mask, &src->stencil_compare_mask,
1655 sizeof(src->stencil_compare_mask))) {
1656 dest->stencil_compare_mask = src->stencil_compare_mask;
1657 dest_mask |= TU_DYNAMIC_STENCIL_COMPARE_MASK;
1658 }
1659 }
1660
1661 if (copy_mask & TU_DYNAMIC_STENCIL_WRITE_MASK) {
1662 if (memcmp(&dest->stencil_write_mask, &src->stencil_write_mask,
1663 sizeof(src->stencil_write_mask))) {
1664 dest->stencil_write_mask = src->stencil_write_mask;
1665 dest_mask |= TU_DYNAMIC_STENCIL_WRITE_MASK;
1666 }
1667 }
1668
1669 if (copy_mask & TU_DYNAMIC_STENCIL_REFERENCE) {
1670 if (memcmp(&dest->stencil_reference, &src->stencil_reference,
1671 sizeof(src->stencil_reference))) {
1672 dest->stencil_reference = src->stencil_reference;
1673 dest_mask |= TU_DYNAMIC_STENCIL_REFERENCE;
1674 }
1675 }
1676
1677 if (copy_mask & TU_DYNAMIC_DISCARD_RECTANGLE) {
1678 if (memcmp(&dest->discard_rectangle.rectangles,
1679 &src->discard_rectangle.rectangles,
1680 src->discard_rectangle.count * sizeof(VkRect2D))) {
1681 typed_memcpy(dest->discard_rectangle.rectangles,
1682 src->discard_rectangle.rectangles,
1683 src->discard_rectangle.count);
1684 dest_mask |= TU_DYNAMIC_DISCARD_RECTANGLE;
1685 }
1686 }
1687 }
1688
1689 static VkResult
1690 tu_create_cmd_buffer(struct tu_device *device,
1691 struct tu_cmd_pool *pool,
1692 VkCommandBufferLevel level,
1693 VkCommandBuffer *pCommandBuffer)
1694 {
1695 struct tu_cmd_buffer *cmd_buffer;
1696 cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8,
1697 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1698 if (cmd_buffer == NULL)
1699 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1700
1701 cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1702 cmd_buffer->device = device;
1703 cmd_buffer->pool = pool;
1704 cmd_buffer->level = level;
1705
1706 if (pool) {
1707 list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
1708 cmd_buffer->queue_family_index = pool->queue_family_index;
1709
1710 } else {
1711 /* Init the pool_link so we can safely call list_del when we destroy
1712 * the command buffer
1713 */
1714 list_inithead(&cmd_buffer->pool_link);
1715 cmd_buffer->queue_family_index = TU_QUEUE_GENERAL;
1716 }
1717
1718 tu_bo_list_init(&cmd_buffer->bo_list);
1719 tu_cs_init(&cmd_buffer->cs, TU_CS_MODE_GROW, 4096);
1720 tu_cs_init(&cmd_buffer->draw_cs, TU_CS_MODE_GROW, 4096);
1721 tu_cs_init(&cmd_buffer->draw_state, TU_CS_MODE_SUB_STREAM, 2048);
1722 tu_cs_init(&cmd_buffer->tile_cs, TU_CS_MODE_SUB_STREAM, 1024);
1723
1724 *pCommandBuffer = tu_cmd_buffer_to_handle(cmd_buffer);
1725
1726 list_inithead(&cmd_buffer->upload.list);
1727
1728 cmd_buffer->marker_reg = REG_A6XX_CP_SCRATCH_REG(
1729 cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY ? 7 : 6);
1730
1731 VkResult result = tu_bo_init_new(device, &cmd_buffer->scratch_bo, 0x1000);
1732 if (result != VK_SUCCESS)
1733 return result;
1734
1735 #define VSC_DATA_SIZE(pitch) ((pitch) * 32 + 0x100) /* extra size to store VSC_SIZE */
1736 #define VSC_DATA2_SIZE(pitch) ((pitch) * 32)
1737
1738 /* TODO: resize on overflow or compute a max size from # of vertices in renderpass?? */
1739 cmd_buffer->vsc_data_pitch = 0x440 * 4;
1740 cmd_buffer->vsc_data2_pitch = 0x1040 * 4;
1741
1742 result = tu_bo_init_new(device, &cmd_buffer->vsc_data, VSC_DATA_SIZE(cmd_buffer->vsc_data_pitch));
1743 if (result != VK_SUCCESS)
1744 goto fail_vsc_data;
1745
1746 result = tu_bo_init_new(device, &cmd_buffer->vsc_data2, VSC_DATA2_SIZE(cmd_buffer->vsc_data2_pitch));
1747 if (result != VK_SUCCESS)
1748 goto fail_vsc_data2;
1749
1750 return VK_SUCCESS;
1751
1752 fail_vsc_data2:
1753 tu_bo_finish(cmd_buffer->device, &cmd_buffer->vsc_data);
1754 fail_vsc_data:
1755 tu_bo_finish(cmd_buffer->device, &cmd_buffer->scratch_bo);
1756 return result;
1757 }
1758
1759 static void
1760 tu_cmd_buffer_destroy(struct tu_cmd_buffer *cmd_buffer)
1761 {
1762 tu_bo_finish(cmd_buffer->device, &cmd_buffer->scratch_bo);
1763 tu_bo_finish(cmd_buffer->device, &cmd_buffer->vsc_data);
1764 tu_bo_finish(cmd_buffer->device, &cmd_buffer->vsc_data2);
1765
1766 list_del(&cmd_buffer->pool_link);
1767
1768 for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++)
1769 free(cmd_buffer->descriptors[i].push_set.set.mapped_ptr);
1770
1771 tu_cs_finish(cmd_buffer->device, &cmd_buffer->cs);
1772 tu_cs_finish(cmd_buffer->device, &cmd_buffer->draw_cs);
1773 tu_cs_finish(cmd_buffer->device, &cmd_buffer->draw_state);
1774 tu_cs_finish(cmd_buffer->device, &cmd_buffer->tile_cs);
1775
1776 tu_bo_list_destroy(&cmd_buffer->bo_list);
1777 vk_free(&cmd_buffer->pool->alloc, cmd_buffer);
1778 }
1779
1780 static VkResult
1781 tu_reset_cmd_buffer(struct tu_cmd_buffer *cmd_buffer)
1782 {
1783 cmd_buffer->wait_for_idle = true;
1784
1785 cmd_buffer->record_result = VK_SUCCESS;
1786
1787 tu_bo_list_reset(&cmd_buffer->bo_list);
1788 tu_cs_reset(cmd_buffer->device, &cmd_buffer->cs);
1789 tu_cs_reset(cmd_buffer->device, &cmd_buffer->draw_cs);
1790 tu_cs_reset(cmd_buffer->device, &cmd_buffer->draw_state);
1791 tu_cs_reset(cmd_buffer->device, &cmd_buffer->tile_cs);
1792
1793 for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++) {
1794 cmd_buffer->descriptors[i].dirty = 0;
1795 cmd_buffer->descriptors[i].valid = 0;
1796 cmd_buffer->descriptors[i].push_dirty = false;
1797 }
1798
1799 cmd_buffer->status = TU_CMD_BUFFER_STATUS_INITIAL;
1800
1801 return cmd_buffer->record_result;
1802 }
1803
1804 static VkResult
1805 tu_cmd_state_setup_attachments(struct tu_cmd_buffer *cmd_buffer,
1806 const VkRenderPassBeginInfo *info)
1807 {
1808 struct tu_cmd_state *state = &cmd_buffer->state;
1809 const struct tu_framebuffer *fb = state->framebuffer;
1810 const struct tu_render_pass *pass = state->pass;
1811
1812 for (uint32_t i = 0; i < fb->attachment_count; ++i) {
1813 const struct tu_image_view *iview = fb->attachments[i].attachment;
1814 tu_bo_list_add(&cmd_buffer->bo_list, iview->image->bo,
1815 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
1816 }
1817
1818 if (pass->attachment_count == 0) {
1819 state->attachments = NULL;
1820 return VK_SUCCESS;
1821 }
1822
1823 state->attachments =
1824 vk_alloc(&cmd_buffer->pool->alloc,
1825 pass->attachment_count * sizeof(state->attachments[0]), 8,
1826 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1827 if (state->attachments == NULL) {
1828 cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
1829 return cmd_buffer->record_result;
1830 }
1831
1832 for (uint32_t i = 0; i < pass->attachment_count; ++i) {
1833 const struct tu_render_pass_attachment *att = &pass->attachments[i];
1834 VkImageAspectFlags att_aspects = vk_format_aspects(att->format);
1835 VkImageAspectFlags clear_aspects = 0;
1836
1837 if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
1838 /* color attachment */
1839 if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1840 clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT;
1841 }
1842 } else {
1843 /* depthstencil attachment */
1844 if ((att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
1845 att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1846 clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
1847 if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
1848 att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
1849 clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
1850 }
1851 if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
1852 att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1853 clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
1854 }
1855 }
1856
1857 state->attachments[i].pending_clear_aspects = clear_aspects;
1858 state->attachments[i].cleared_views = 0;
1859 if (clear_aspects && info) {
1860 assert(info->clearValueCount > i);
1861 state->attachments[i].clear_value = info->pClearValues[i];
1862 }
1863
1864 state->attachments[i].current_layout = att->initial_layout;
1865 }
1866
1867 return VK_SUCCESS;
1868 }
1869
1870 VkResult
1871 tu_AllocateCommandBuffers(VkDevice _device,
1872 const VkCommandBufferAllocateInfo *pAllocateInfo,
1873 VkCommandBuffer *pCommandBuffers)
1874 {
1875 TU_FROM_HANDLE(tu_device, device, _device);
1876 TU_FROM_HANDLE(tu_cmd_pool, pool, pAllocateInfo->commandPool);
1877
1878 VkResult result = VK_SUCCESS;
1879 uint32_t i;
1880
1881 for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
1882
1883 if (!list_is_empty(&pool->free_cmd_buffers)) {
1884 struct tu_cmd_buffer *cmd_buffer = list_first_entry(
1885 &pool->free_cmd_buffers, struct tu_cmd_buffer, pool_link);
1886
1887 list_del(&cmd_buffer->pool_link);
1888 list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
1889
1890 result = tu_reset_cmd_buffer(cmd_buffer);
1891 cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1892 cmd_buffer->level = pAllocateInfo->level;
1893
1894 pCommandBuffers[i] = tu_cmd_buffer_to_handle(cmd_buffer);
1895 } else {
1896 result = tu_create_cmd_buffer(device, pool, pAllocateInfo->level,
1897 &pCommandBuffers[i]);
1898 }
1899 if (result != VK_SUCCESS)
1900 break;
1901 }
1902
1903 if (result != VK_SUCCESS) {
1904 tu_FreeCommandBuffers(_device, pAllocateInfo->commandPool, i,
1905 pCommandBuffers);
1906
1907 /* From the Vulkan 1.0.66 spec:
1908 *
1909 * "vkAllocateCommandBuffers can be used to create multiple
1910 * command buffers. If the creation of any of those command
1911 * buffers fails, the implementation must destroy all
1912 * successfully created command buffer objects from this
1913 * command, set all entries of the pCommandBuffers array to
1914 * NULL and return the error."
1915 */
1916 memset(pCommandBuffers, 0,
1917 sizeof(*pCommandBuffers) * pAllocateInfo->commandBufferCount);
1918 }
1919
1920 return result;
1921 }
1922
1923 void
1924 tu_FreeCommandBuffers(VkDevice device,
1925 VkCommandPool commandPool,
1926 uint32_t commandBufferCount,
1927 const VkCommandBuffer *pCommandBuffers)
1928 {
1929 for (uint32_t i = 0; i < commandBufferCount; i++) {
1930 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, pCommandBuffers[i]);
1931
1932 if (cmd_buffer) {
1933 if (cmd_buffer->pool) {
1934 list_del(&cmd_buffer->pool_link);
1935 list_addtail(&cmd_buffer->pool_link,
1936 &cmd_buffer->pool->free_cmd_buffers);
1937 } else
1938 tu_cmd_buffer_destroy(cmd_buffer);
1939 }
1940 }
1941 }
1942
1943 VkResult
1944 tu_ResetCommandBuffer(VkCommandBuffer commandBuffer,
1945 VkCommandBufferResetFlags flags)
1946 {
1947 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
1948 return tu_reset_cmd_buffer(cmd_buffer);
1949 }
1950
1951 VkResult
1952 tu_BeginCommandBuffer(VkCommandBuffer commandBuffer,
1953 const VkCommandBufferBeginInfo *pBeginInfo)
1954 {
1955 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
1956 VkResult result = VK_SUCCESS;
1957
1958 if (cmd_buffer->status != TU_CMD_BUFFER_STATUS_INITIAL) {
1959 /* If the command buffer has already been resetted with
1960 * vkResetCommandBuffer, no need to do it again.
1961 */
1962 result = tu_reset_cmd_buffer(cmd_buffer);
1963 if (result != VK_SUCCESS)
1964 return result;
1965 }
1966
1967 memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state));
1968 cmd_buffer->usage_flags = pBeginInfo->flags;
1969
1970 tu_cs_begin(&cmd_buffer->cs);
1971 tu_cs_begin(&cmd_buffer->draw_cs);
1972
1973 cmd_buffer->marker_seqno = 0;
1974 cmd_buffer->scratch_seqno = 0;
1975
1976 /* setup initial configuration into command buffer */
1977 if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
1978 switch (cmd_buffer->queue_family_index) {
1979 case TU_QUEUE_GENERAL:
1980 tu6_init_hw(cmd_buffer, &cmd_buffer->cs);
1981 break;
1982 default:
1983 break;
1984 }
1985 }
1986
1987 cmd_buffer->status = TU_CMD_BUFFER_STATUS_RECORDING;
1988
1989 return VK_SUCCESS;
1990 }
1991
1992 void
1993 tu_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
1994 uint32_t firstBinding,
1995 uint32_t bindingCount,
1996 const VkBuffer *pBuffers,
1997 const VkDeviceSize *pOffsets)
1998 {
1999 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2000
2001 assert(firstBinding + bindingCount <= MAX_VBS);
2002
2003 for (uint32_t i = 0; i < bindingCount; i++) {
2004 cmd->state.vb.buffers[firstBinding + i] =
2005 tu_buffer_from_handle(pBuffers[i]);
2006 cmd->state.vb.offsets[firstBinding + i] = pOffsets[i];
2007 }
2008
2009 /* VB states depend on VkPipelineVertexInputStateCreateInfo */
2010 cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS;
2011 }
2012
2013 void
2014 tu_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
2015 VkBuffer buffer,
2016 VkDeviceSize offset,
2017 VkIndexType indexType)
2018 {
2019 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2020 TU_FROM_HANDLE(tu_buffer, buf, buffer);
2021
2022 /* initialize/update the restart index */
2023 if (!cmd->state.index_buffer || cmd->state.index_type != indexType) {
2024 struct tu_cs *draw_cs = &cmd->draw_cs;
2025 VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 2);
2026 if (result != VK_SUCCESS) {
2027 cmd->record_result = result;
2028 return;
2029 }
2030
2031 tu6_emit_restart_index(
2032 draw_cs, indexType == VK_INDEX_TYPE_UINT32 ? 0xffffffff : 0xffff);
2033
2034 tu_cs_sanity_check(draw_cs);
2035 }
2036
2037 /* track the BO */
2038 if (cmd->state.index_buffer != buf)
2039 tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ);
2040
2041 cmd->state.index_buffer = buf;
2042 cmd->state.index_offset = offset;
2043 cmd->state.index_type = indexType;
2044 }
2045
2046 void
2047 tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
2048 VkPipelineBindPoint pipelineBindPoint,
2049 VkPipelineLayout _layout,
2050 uint32_t firstSet,
2051 uint32_t descriptorSetCount,
2052 const VkDescriptorSet *pDescriptorSets,
2053 uint32_t dynamicOffsetCount,
2054 const uint32_t *pDynamicOffsets)
2055 {
2056 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
2057 TU_FROM_HANDLE(tu_pipeline_layout, layout, _layout);
2058 unsigned dyn_idx = 0;
2059
2060 struct tu_descriptor_state *descriptors_state =
2061 tu_get_descriptors_state(cmd_buffer, pipelineBindPoint);
2062
2063 for (unsigned i = 0; i < descriptorSetCount; ++i) {
2064 unsigned idx = i + firstSet;
2065 TU_FROM_HANDLE(tu_descriptor_set, set, pDescriptorSets[i]);
2066
2067 descriptors_state->sets[idx] = set;
2068 descriptors_state->valid |= (1u << idx);
2069
2070 for(unsigned j = 0; j < set->layout->dynamic_offset_count; ++j, ++dyn_idx) {
2071 unsigned idx = j + layout->set[i + firstSet].dynamic_offset_start;
2072 assert(dyn_idx < dynamicOffsetCount);
2073
2074 descriptors_state->dynamic_buffers[idx] =
2075 set->dynamic_descriptors[j].va + pDynamicOffsets[dyn_idx];
2076 }
2077 }
2078
2079 cmd_buffer->state.dirty |= TU_CMD_DIRTY_DESCRIPTOR_SETS;
2080 }
2081
2082 void
2083 tu_CmdPushConstants(VkCommandBuffer commandBuffer,
2084 VkPipelineLayout layout,
2085 VkShaderStageFlags stageFlags,
2086 uint32_t offset,
2087 uint32_t size,
2088 const void *pValues)
2089 {
2090 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
2091 memcpy((void*) cmd_buffer->push_constants + offset, pValues, size);
2092 }
2093
2094 VkResult
2095 tu_EndCommandBuffer(VkCommandBuffer commandBuffer)
2096 {
2097 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
2098
2099 if (cmd_buffer->scratch_seqno) {
2100 tu_bo_list_add(&cmd_buffer->bo_list, &cmd_buffer->scratch_bo,
2101 MSM_SUBMIT_BO_WRITE);
2102 }
2103
2104 if (cmd_buffer->use_vsc_data) {
2105 tu_bo_list_add(&cmd_buffer->bo_list, &cmd_buffer->vsc_data,
2106 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
2107 tu_bo_list_add(&cmd_buffer->bo_list, &cmd_buffer->vsc_data2,
2108 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
2109 }
2110
2111 for (uint32_t i = 0; i < cmd_buffer->draw_cs.bo_count; i++) {
2112 tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->draw_cs.bos[i],
2113 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP);
2114 }
2115
2116 for (uint32_t i = 0; i < cmd_buffer->draw_state.bo_count; i++) {
2117 tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->draw_state.bos[i],
2118 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP);
2119 }
2120
2121 for (uint32_t i = 0; i < cmd_buffer->tile_cs.bo_count; i++) {
2122 tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->tile_cs.bos[i],
2123 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP);
2124 }
2125
2126 tu_cs_end(&cmd_buffer->cs);
2127 tu_cs_end(&cmd_buffer->draw_cs);
2128
2129 assert(!cmd_buffer->state.attachments);
2130
2131 cmd_buffer->status = TU_CMD_BUFFER_STATUS_EXECUTABLE;
2132
2133 return cmd_buffer->record_result;
2134 }
2135
2136 void
2137 tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
2138 VkPipelineBindPoint pipelineBindPoint,
2139 VkPipeline _pipeline)
2140 {
2141 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2142 TU_FROM_HANDLE(tu_pipeline, pipeline, _pipeline);
2143
2144 switch (pipelineBindPoint) {
2145 case VK_PIPELINE_BIND_POINT_GRAPHICS:
2146 cmd->state.pipeline = pipeline;
2147 cmd->state.dirty |= TU_CMD_DIRTY_PIPELINE;
2148 break;
2149 case VK_PIPELINE_BIND_POINT_COMPUTE:
2150 cmd->state.compute_pipeline = pipeline;
2151 cmd->state.dirty |= TU_CMD_DIRTY_COMPUTE_PIPELINE;
2152 break;
2153 default:
2154 unreachable("unrecognized pipeline bind point");
2155 break;
2156 }
2157
2158 tu_bo_list_add(&cmd->bo_list, &pipeline->program.binary_bo,
2159 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP);
2160 for (uint32_t i = 0; i < pipeline->cs.bo_count; i++) {
2161 tu_bo_list_add(&cmd->bo_list, pipeline->cs.bos[i],
2162 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP);
2163 }
2164 }
2165
2166 void
2167 tu_CmdSetViewport(VkCommandBuffer commandBuffer,
2168 uint32_t firstViewport,
2169 uint32_t viewportCount,
2170 const VkViewport *pViewports)
2171 {
2172 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2173 struct tu_cs *draw_cs = &cmd->draw_cs;
2174
2175 VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 12);
2176 if (result != VK_SUCCESS) {
2177 cmd->record_result = result;
2178 return;
2179 }
2180
2181 assert(firstViewport == 0 && viewportCount == 1);
2182 tu6_emit_viewport(draw_cs, pViewports);
2183
2184 tu_cs_sanity_check(draw_cs);
2185 }
2186
2187 void
2188 tu_CmdSetScissor(VkCommandBuffer commandBuffer,
2189 uint32_t firstScissor,
2190 uint32_t scissorCount,
2191 const VkRect2D *pScissors)
2192 {
2193 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2194 struct tu_cs *draw_cs = &cmd->draw_cs;
2195
2196 VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 3);
2197 if (result != VK_SUCCESS) {
2198 cmd->record_result = result;
2199 return;
2200 }
2201
2202 assert(firstScissor == 0 && scissorCount == 1);
2203 tu6_emit_scissor(draw_cs, pScissors);
2204
2205 tu_cs_sanity_check(draw_cs);
2206 }
2207
2208 void
2209 tu_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)
2210 {
2211 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2212
2213 cmd->state.dynamic.line_width = lineWidth;
2214
2215 /* line width depends on VkPipelineRasterizationStateCreateInfo */
2216 cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
2217 }
2218
2219 void
2220 tu_CmdSetDepthBias(VkCommandBuffer commandBuffer,
2221 float depthBiasConstantFactor,
2222 float depthBiasClamp,
2223 float depthBiasSlopeFactor)
2224 {
2225 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2226 struct tu_cs *draw_cs = &cmd->draw_cs;
2227
2228 VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 4);
2229 if (result != VK_SUCCESS) {
2230 cmd->record_result = result;
2231 return;
2232 }
2233
2234 tu6_emit_depth_bias(draw_cs, depthBiasConstantFactor, depthBiasClamp,
2235 depthBiasSlopeFactor);
2236
2237 tu_cs_sanity_check(draw_cs);
2238 }
2239
2240 void
2241 tu_CmdSetBlendConstants(VkCommandBuffer commandBuffer,
2242 const float blendConstants[4])
2243 {
2244 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2245 struct tu_cs *draw_cs = &cmd->draw_cs;
2246
2247 VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 5);
2248 if (result != VK_SUCCESS) {
2249 cmd->record_result = result;
2250 return;
2251 }
2252
2253 tu6_emit_blend_constants(draw_cs, blendConstants);
2254
2255 tu_cs_sanity_check(draw_cs);
2256 }
2257
2258 void
2259 tu_CmdSetDepthBounds(VkCommandBuffer commandBuffer,
2260 float minDepthBounds,
2261 float maxDepthBounds)
2262 {
2263 }
2264
2265 void
2266 tu_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,
2267 VkStencilFaceFlags faceMask,
2268 uint32_t compareMask)
2269 {
2270 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2271
2272 if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
2273 cmd->state.dynamic.stencil_compare_mask.front = compareMask;
2274 if (faceMask & VK_STENCIL_FACE_BACK_BIT)
2275 cmd->state.dynamic.stencil_compare_mask.back = compareMask;
2276
2277 /* the front/back compare masks must be updated together */
2278 cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
2279 }
2280
2281 void
2282 tu_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,
2283 VkStencilFaceFlags faceMask,
2284 uint32_t writeMask)
2285 {
2286 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2287
2288 if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
2289 cmd->state.dynamic.stencil_write_mask.front = writeMask;
2290 if (faceMask & VK_STENCIL_FACE_BACK_BIT)
2291 cmd->state.dynamic.stencil_write_mask.back = writeMask;
2292
2293 /* the front/back write masks must be updated together */
2294 cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
2295 }
2296
2297 void
2298 tu_CmdSetStencilReference(VkCommandBuffer commandBuffer,
2299 VkStencilFaceFlags faceMask,
2300 uint32_t reference)
2301 {
2302 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2303
2304 if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
2305 cmd->state.dynamic.stencil_reference.front = reference;
2306 if (faceMask & VK_STENCIL_FACE_BACK_BIT)
2307 cmd->state.dynamic.stencil_reference.back = reference;
2308
2309 /* the front/back references must be updated together */
2310 cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
2311 }
2312
2313 void
2314 tu_CmdExecuteCommands(VkCommandBuffer commandBuffer,
2315 uint32_t commandBufferCount,
2316 const VkCommandBuffer *pCmdBuffers)
2317 {
2318 }
2319
2320 VkResult
2321 tu_CreateCommandPool(VkDevice _device,
2322 const VkCommandPoolCreateInfo *pCreateInfo,
2323 const VkAllocationCallbacks *pAllocator,
2324 VkCommandPool *pCmdPool)
2325 {
2326 TU_FROM_HANDLE(tu_device, device, _device);
2327 struct tu_cmd_pool *pool;
2328
2329 pool = vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8,
2330 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2331 if (pool == NULL)
2332 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2333
2334 if (pAllocator)
2335 pool->alloc = *pAllocator;
2336 else
2337 pool->alloc = device->alloc;
2338
2339 list_inithead(&pool->cmd_buffers);
2340 list_inithead(&pool->free_cmd_buffers);
2341
2342 pool->queue_family_index = pCreateInfo->queueFamilyIndex;
2343
2344 *pCmdPool = tu_cmd_pool_to_handle(pool);
2345
2346 return VK_SUCCESS;
2347 }
2348
2349 void
2350 tu_DestroyCommandPool(VkDevice _device,
2351 VkCommandPool commandPool,
2352 const VkAllocationCallbacks *pAllocator)
2353 {
2354 TU_FROM_HANDLE(tu_device, device, _device);
2355 TU_FROM_HANDLE(tu_cmd_pool, pool, commandPool);
2356
2357 if (!pool)
2358 return;
2359
2360 list_for_each_entry_safe(struct tu_cmd_buffer, cmd_buffer,
2361 &pool->cmd_buffers, pool_link)
2362 {
2363 tu_cmd_buffer_destroy(cmd_buffer);
2364 }
2365
2366 list_for_each_entry_safe(struct tu_cmd_buffer, cmd_buffer,
2367 &pool->free_cmd_buffers, pool_link)
2368 {
2369 tu_cmd_buffer_destroy(cmd_buffer);
2370 }
2371
2372 vk_free2(&device->alloc, pAllocator, pool);
2373 }
2374
2375 VkResult
2376 tu_ResetCommandPool(VkDevice device,
2377 VkCommandPool commandPool,
2378 VkCommandPoolResetFlags flags)
2379 {
2380 TU_FROM_HANDLE(tu_cmd_pool, pool, commandPool);
2381 VkResult result;
2382
2383 list_for_each_entry(struct tu_cmd_buffer, cmd_buffer, &pool->cmd_buffers,
2384 pool_link)
2385 {
2386 result = tu_reset_cmd_buffer(cmd_buffer);
2387 if (result != VK_SUCCESS)
2388 return result;
2389 }
2390
2391 return VK_SUCCESS;
2392 }
2393
2394 void
2395 tu_TrimCommandPool(VkDevice device,
2396 VkCommandPool commandPool,
2397 VkCommandPoolTrimFlags flags)
2398 {
2399 TU_FROM_HANDLE(tu_cmd_pool, pool, commandPool);
2400
2401 if (!pool)
2402 return;
2403
2404 list_for_each_entry_safe(struct tu_cmd_buffer, cmd_buffer,
2405 &pool->free_cmd_buffers, pool_link)
2406 {
2407 tu_cmd_buffer_destroy(cmd_buffer);
2408 }
2409 }
2410
2411 void
2412 tu_CmdBeginRenderPass(VkCommandBuffer commandBuffer,
2413 const VkRenderPassBeginInfo *pRenderPassBegin,
2414 VkSubpassContents contents)
2415 {
2416 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
2417 TU_FROM_HANDLE(tu_render_pass, pass, pRenderPassBegin->renderPass);
2418 TU_FROM_HANDLE(tu_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
2419 VkResult result;
2420
2421 cmd_buffer->state.pass = pass;
2422 cmd_buffer->state.subpass = pass->subpasses;
2423 cmd_buffer->state.framebuffer = framebuffer;
2424
2425 result = tu_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin);
2426 if (result != VK_SUCCESS)
2427 return;
2428
2429 tu_cmd_update_tiling_config(cmd_buffer, &pRenderPassBegin->renderArea);
2430 tu_cmd_prepare_tile_load_ib(cmd_buffer);
2431 tu_cmd_prepare_tile_store_ib(cmd_buffer);
2432
2433 /* note: use_hw_binning only checks tiling config */
2434 if (use_hw_binning(cmd_buffer))
2435 cmd_buffer->use_vsc_data = true;
2436 }
2437
2438 void
2439 tu_CmdBeginRenderPass2KHR(VkCommandBuffer commandBuffer,
2440 const VkRenderPassBeginInfo *pRenderPassBeginInfo,
2441 const VkSubpassBeginInfoKHR *pSubpassBeginInfo)
2442 {
2443 tu_CmdBeginRenderPass(commandBuffer, pRenderPassBeginInfo,
2444 pSubpassBeginInfo->contents);
2445 }
2446
2447 void
2448 tu_CmdNextSubpass(VkCommandBuffer commandBuffer, VkSubpassContents contents)
2449 {
2450 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2451
2452 tu_cmd_render_tiles(cmd);
2453
2454 cmd->state.subpass++;
2455
2456 tu_cmd_update_tiling_config(cmd, NULL);
2457 tu_cmd_prepare_tile_load_ib(cmd);
2458 tu_cmd_prepare_tile_store_ib(cmd);
2459 }
2460
2461 void
2462 tu_CmdNextSubpass2KHR(VkCommandBuffer commandBuffer,
2463 const VkSubpassBeginInfoKHR *pSubpassBeginInfo,
2464 const VkSubpassEndInfoKHR *pSubpassEndInfo)
2465 {
2466 tu_CmdNextSubpass(commandBuffer, pSubpassBeginInfo->contents);
2467 }
2468
2469 struct tu_draw_info
2470 {
2471 /**
2472 * Number of vertices.
2473 */
2474 uint32_t count;
2475
2476 /**
2477 * Index of the first vertex.
2478 */
2479 int32_t vertex_offset;
2480
2481 /**
2482 * First instance id.
2483 */
2484 uint32_t first_instance;
2485
2486 /**
2487 * Number of instances.
2488 */
2489 uint32_t instance_count;
2490
2491 /**
2492 * First index (indexed draws only).
2493 */
2494 uint32_t first_index;
2495
2496 /**
2497 * Whether it's an indexed draw.
2498 */
2499 bool indexed;
2500
2501 /**
2502 * Indirect draw parameters resource.
2503 */
2504 struct tu_buffer *indirect;
2505 uint64_t indirect_offset;
2506 uint32_t stride;
2507
2508 /**
2509 * Draw count parameters resource.
2510 */
2511 struct tu_buffer *count_buffer;
2512 uint64_t count_buffer_offset;
2513 };
2514
2515 enum tu_draw_state_group_id
2516 {
2517 TU_DRAW_STATE_PROGRAM,
2518 TU_DRAW_STATE_PROGRAM_BINNING,
2519 TU_DRAW_STATE_VI,
2520 TU_DRAW_STATE_VI_BINNING,
2521 TU_DRAW_STATE_VP,
2522 TU_DRAW_STATE_RAST,
2523 TU_DRAW_STATE_DS,
2524 TU_DRAW_STATE_BLEND,
2525 TU_DRAW_STATE_VS_CONST,
2526 TU_DRAW_STATE_FS_CONST,
2527 TU_DRAW_STATE_VS_TEX,
2528 TU_DRAW_STATE_FS_TEX,
2529 TU_DRAW_STATE_FS_IBO,
2530
2531 TU_DRAW_STATE_COUNT,
2532 };
2533
2534 struct tu_draw_state_group
2535 {
2536 enum tu_draw_state_group_id id;
2537 uint32_t enable_mask;
2538 struct tu_cs_entry ib;
2539 };
2540
2541 static struct tu_sampler*
2542 sampler_ptr(struct tu_descriptor_state *descriptors_state,
2543 const struct tu_descriptor_map *map, unsigned i)
2544 {
2545 assert(descriptors_state->valid & (1 << map->set[i]));
2546
2547 struct tu_descriptor_set *set = descriptors_state->sets[map->set[i]];
2548 assert(map->binding[i] < set->layout->binding_count);
2549
2550 const struct tu_descriptor_set_binding_layout *layout =
2551 &set->layout->binding[map->binding[i]];
2552
2553 switch (layout->type) {
2554 case VK_DESCRIPTOR_TYPE_SAMPLER:
2555 return (struct tu_sampler*) &set->mapped_ptr[layout->offset / 4];
2556 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
2557 return (struct tu_sampler*) &set->mapped_ptr[layout->offset / 4 + A6XX_TEX_CONST_DWORDS];
2558 default:
2559 unreachable("unimplemented descriptor type");
2560 break;
2561 }
2562 }
2563
2564 static uint32_t*
2565 texture_ptr(struct tu_descriptor_state *descriptors_state,
2566 const struct tu_descriptor_map *map, unsigned i)
2567 {
2568 assert(descriptors_state->valid & (1 << map->set[i]));
2569
2570 struct tu_descriptor_set *set = descriptors_state->sets[map->set[i]];
2571 assert(map->binding[i] < set->layout->binding_count);
2572
2573 const struct tu_descriptor_set_binding_layout *layout =
2574 &set->layout->binding[map->binding[i]];
2575
2576 switch (layout->type) {
2577 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
2578 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
2579 return &set->mapped_ptr[layout->offset / 4];
2580 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
2581 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
2582 return &set->mapped_ptr[layout->offset / 4];
2583 default:
2584 unreachable("unimplemented descriptor type");
2585 break;
2586 }
2587 }
2588
2589 static uint64_t
2590 buffer_ptr(struct tu_descriptor_state *descriptors_state,
2591 const struct tu_descriptor_map *map,
2592 unsigned i)
2593 {
2594 assert(descriptors_state->valid & (1 << map->set[i]));
2595
2596 struct tu_descriptor_set *set = descriptors_state->sets[map->set[i]];
2597 assert(map->binding[i] < set->layout->binding_count);
2598
2599 const struct tu_descriptor_set_binding_layout *layout =
2600 &set->layout->binding[map->binding[i]];
2601
2602 switch (layout->type) {
2603 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
2604 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
2605 return descriptors_state->dynamic_buffers[layout->dynamic_offset_offset];
2606 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
2607 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
2608 return (uint64_t) set->mapped_ptr[layout->offset / 4 + 1] << 32 |
2609 set->mapped_ptr[layout->offset / 4];
2610 default:
2611 unreachable("unimplemented descriptor type");
2612 break;
2613 }
2614 }
2615
2616 static inline uint32_t
2617 tu6_stage2opcode(gl_shader_stage type)
2618 {
2619 switch (type) {
2620 case MESA_SHADER_VERTEX:
2621 case MESA_SHADER_TESS_CTRL:
2622 case MESA_SHADER_TESS_EVAL:
2623 case MESA_SHADER_GEOMETRY:
2624 return CP_LOAD_STATE6_GEOM;
2625 case MESA_SHADER_FRAGMENT:
2626 case MESA_SHADER_COMPUTE:
2627 case MESA_SHADER_KERNEL:
2628 return CP_LOAD_STATE6_FRAG;
2629 default:
2630 unreachable("bad shader type");
2631 }
2632 }
2633
2634 static inline enum a6xx_state_block
2635 tu6_stage2shadersb(gl_shader_stage type)
2636 {
2637 switch (type) {
2638 case MESA_SHADER_VERTEX:
2639 return SB6_VS_SHADER;
2640 case MESA_SHADER_FRAGMENT:
2641 return SB6_FS_SHADER;
2642 case MESA_SHADER_COMPUTE:
2643 case MESA_SHADER_KERNEL:
2644 return SB6_CS_SHADER;
2645 default:
2646 unreachable("bad shader type");
2647 return ~0;
2648 }
2649 }
2650
2651 static void
2652 tu6_emit_user_consts(struct tu_cs *cs, const struct tu_pipeline *pipeline,
2653 struct tu_descriptor_state *descriptors_state,
2654 gl_shader_stage type,
2655 uint32_t *push_constants)
2656 {
2657 const struct tu_program_descriptor_linkage *link =
2658 &pipeline->program.link[type];
2659 const struct ir3_ubo_analysis_state *state = &link->ubo_state;
2660
2661 for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
2662 if (state->range[i].start < state->range[i].end) {
2663 uint32_t size = state->range[i].end - state->range[i].start;
2664 uint32_t offset = state->range[i].start;
2665
2666 /* and even if the start of the const buffer is before
2667 * first_immediate, the end may not be:
2668 */
2669 size = MIN2(size, (16 * link->constlen) - state->range[i].offset);
2670
2671 if (size == 0)
2672 continue;
2673
2674 /* things should be aligned to vec4: */
2675 debug_assert((state->range[i].offset % 16) == 0);
2676 debug_assert((size % 16) == 0);
2677 debug_assert((offset % 16) == 0);
2678
2679 if (i == 0) {
2680 /* push constants */
2681 tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + (size / 4));
2682 tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(state->range[i].offset / 16) |
2683 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
2684 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
2685 CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
2686 CP_LOAD_STATE6_0_NUM_UNIT(size / 16));
2687 tu_cs_emit(cs, 0);
2688 tu_cs_emit(cs, 0);
2689 for (unsigned i = 0; i < size / 4; i++)
2690 tu_cs_emit(cs, push_constants[i + offset / 4]);
2691 continue;
2692 }
2693
2694 uint64_t va = buffer_ptr(descriptors_state, &link->ubo_map, i - 1);
2695
2696 tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3);
2697 tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(state->range[i].offset / 16) |
2698 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
2699 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
2700 CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
2701 CP_LOAD_STATE6_0_NUM_UNIT(size / 16));
2702 tu_cs_emit_qw(cs, va + offset);
2703 }
2704 }
2705 }
2706
2707 static void
2708 tu6_emit_ubos(struct tu_cs *cs, const struct tu_pipeline *pipeline,
2709 struct tu_descriptor_state *descriptors_state,
2710 gl_shader_stage type)
2711 {
2712 const struct tu_program_descriptor_linkage *link =
2713 &pipeline->program.link[type];
2714
2715 uint32_t num = MIN2(link->ubo_map.num, link->const_state.num_ubos);
2716 uint32_t anum = align(num, 2);
2717 uint32_t i;
2718
2719 if (!num)
2720 return;
2721
2722 tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + (2 * anum));
2723 tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(link->const_state.offsets.ubo) |
2724 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
2725 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
2726 CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
2727 CP_LOAD_STATE6_0_NUM_UNIT(anum/2));
2728 tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
2729 tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
2730
2731 for (i = 0; i < num; i++)
2732 tu_cs_emit_qw(cs, buffer_ptr(descriptors_state, &link->ubo_map, i));
2733
2734 for (; i < anum; i++) {
2735 tu_cs_emit(cs, 0xffffffff);
2736 tu_cs_emit(cs, 0xffffffff);
2737 }
2738 }
2739
2740 static struct tu_cs_entry
2741 tu6_emit_consts(struct tu_cmd_buffer *cmd,
2742 const struct tu_pipeline *pipeline,
2743 struct tu_descriptor_state *descriptors_state,
2744 gl_shader_stage type)
2745 {
2746 struct tu_cs cs;
2747 tu_cs_begin_sub_stream(cmd->device, &cmd->draw_state, 512, &cs); /* TODO: maximum size? */
2748
2749 tu6_emit_user_consts(&cs, pipeline, descriptors_state, type, cmd->push_constants);
2750 tu6_emit_ubos(&cs, pipeline, descriptors_state, type);
2751
2752 return tu_cs_end_sub_stream(&cmd->draw_state, &cs);
2753 }
2754
2755 static VkResult
2756 tu6_emit_textures(struct tu_cmd_buffer *cmd,
2757 gl_shader_stage type,
2758 struct tu_cs_entry *entry,
2759 bool *needs_border)
2760 {
2761 struct tu_device *device = cmd->device;
2762 struct tu_cs *draw_state = &cmd->draw_state;
2763 struct tu_descriptor_state *descriptors_state =
2764 &cmd->descriptors[VK_PIPELINE_BIND_POINT_GRAPHICS];
2765 const struct tu_program_descriptor_linkage *link =
2766 &cmd->state.pipeline->program.link[type];
2767 VkResult result;
2768
2769 if (link->texture_map.num == 0 && link->sampler_map.num == 0) {
2770 *entry = (struct tu_cs_entry) {};
2771 return VK_SUCCESS;
2772 }
2773
2774 /* allocate and fill texture state */
2775 struct ts_cs_memory tex_const;
2776 result = tu_cs_alloc(device, draw_state, link->texture_map.num, A6XX_TEX_CONST_DWORDS, &tex_const);
2777 if (result != VK_SUCCESS)
2778 return result;
2779
2780 for (unsigned i = 0; i < link->texture_map.num; i++) {
2781 memcpy(&tex_const.map[A6XX_TEX_CONST_DWORDS*i],
2782 texture_ptr(descriptors_state, &link->texture_map, i),
2783 A6XX_TEX_CONST_DWORDS*4);
2784 }
2785
2786 /* allocate and fill sampler state */
2787 struct ts_cs_memory tex_samp;
2788 result = tu_cs_alloc(device, draw_state, link->sampler_map.num, A6XX_TEX_SAMP_DWORDS, &tex_samp);
2789 if (result != VK_SUCCESS)
2790 return result;
2791
2792 for (unsigned i = 0; i < link->sampler_map.num; i++) {
2793 struct tu_sampler *sampler = sampler_ptr(descriptors_state, &link->sampler_map, i);
2794 memcpy(&tex_samp.map[A6XX_TEX_SAMP_DWORDS*i], sampler->state, sizeof(sampler->state));
2795 *needs_border |= sampler->needs_border;
2796 }
2797
2798 unsigned tex_samp_reg, tex_const_reg, tex_count_reg;
2799 enum a6xx_state_block sb;
2800
2801 switch (type) {
2802 case MESA_SHADER_VERTEX:
2803 sb = SB6_VS_TEX;
2804 tex_samp_reg = REG_A6XX_SP_VS_TEX_SAMP_LO;
2805 tex_const_reg = REG_A6XX_SP_VS_TEX_CONST_LO;
2806 tex_count_reg = REG_A6XX_SP_VS_TEX_COUNT;
2807 break;
2808 case MESA_SHADER_FRAGMENT:
2809 sb = SB6_FS_TEX;
2810 tex_samp_reg = REG_A6XX_SP_FS_TEX_SAMP_LO;
2811 tex_const_reg = REG_A6XX_SP_FS_TEX_CONST_LO;
2812 tex_count_reg = REG_A6XX_SP_FS_TEX_COUNT;
2813 break;
2814 case MESA_SHADER_COMPUTE:
2815 sb = SB6_CS_TEX;
2816 tex_samp_reg = REG_A6XX_SP_CS_TEX_SAMP_LO;
2817 tex_const_reg = REG_A6XX_SP_CS_TEX_CONST_LO;
2818 tex_count_reg = REG_A6XX_SP_CS_TEX_COUNT;
2819 break;
2820 default:
2821 unreachable("bad state block");
2822 }
2823
2824 struct tu_cs cs;
2825 result = tu_cs_begin_sub_stream(device, draw_state, 16, &cs);
2826 if (result != VK_SUCCESS)
2827 return result;
2828
2829 /* output sampler state: */
2830 tu_cs_emit_pkt7(&cs, tu6_stage2opcode(type), 3);
2831 tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(0) |
2832 CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
2833 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
2834 CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
2835 CP_LOAD_STATE6_0_NUM_UNIT(link->sampler_map.num));
2836 tu_cs_emit_qw(&cs, tex_samp.iova); /* SRC_ADDR_LO/HI */
2837
2838 tu_cs_emit_pkt4(&cs, tex_samp_reg, 2);
2839 tu_cs_emit_qw(&cs, tex_samp.iova); /* SRC_ADDR_LO/HI */
2840
2841 /* emit texture state: */
2842 tu_cs_emit_pkt7(&cs, tu6_stage2opcode(type), 3);
2843 tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(0) |
2844 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
2845 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
2846 CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
2847 CP_LOAD_STATE6_0_NUM_UNIT(link->texture_map.num));
2848 tu_cs_emit_qw(&cs, tex_const.iova); /* SRC_ADDR_LO/HI */
2849
2850 tu_cs_emit_pkt4(&cs, tex_const_reg, 2);
2851 tu_cs_emit_qw(&cs, tex_const.iova); /* SRC_ADDR_LO/HI */
2852
2853 tu_cs_emit_pkt4(&cs, tex_count_reg, 1);
2854 tu_cs_emit(&cs, link->texture_map.num);
2855
2856 *entry = tu_cs_end_sub_stream(draw_state, &cs);
2857 return VK_SUCCESS;
2858 }
2859
2860 static struct tu_cs_entry
2861 tu6_emit_ibo(struct tu_device *device, struct tu_cs *draw_state,
2862 const struct tu_pipeline *pipeline,
2863 struct tu_descriptor_state *descriptors_state,
2864 gl_shader_stage type)
2865 {
2866 const struct tu_program_descriptor_linkage *link =
2867 &pipeline->program.link[type];
2868
2869 uint32_t size = link->image_mapping.num_ibo * A6XX_TEX_CONST_DWORDS;
2870 if (!size)
2871 return (struct tu_cs_entry) {};
2872
2873 struct tu_cs cs;
2874 tu_cs_begin_sub_stream(device, draw_state, size, &cs);
2875
2876 for (unsigned i = 0; i < link->image_mapping.num_ibo; i++) {
2877 unsigned idx = link->image_mapping.ibo_to_image[i];
2878
2879 if (idx & IBO_SSBO) {
2880 idx &= ~IBO_SSBO;
2881
2882 uint64_t va = buffer_ptr(descriptors_state, &link->ssbo_map, idx);
2883 /* We don't expose robustBufferAccess, so leave the size unlimited. */
2884 uint32_t sz = MAX_STORAGE_BUFFER_RANGE / 4;
2885
2886 tu_cs_emit(&cs, A6XX_IBO_0_FMT(TFMT6_32_UINT));
2887 tu_cs_emit(&cs,
2888 A6XX_IBO_1_WIDTH(sz & MASK(15)) |
2889 A6XX_IBO_1_HEIGHT(sz >> 15));
2890 tu_cs_emit(&cs,
2891 A6XX_IBO_2_UNK4 |
2892 A6XX_IBO_2_UNK31 |
2893 A6XX_IBO_2_TYPE(A6XX_TEX_1D));
2894 tu_cs_emit(&cs, 0);
2895 tu_cs_emit_qw(&cs, va);
2896 for (int i = 6; i < A6XX_TEX_CONST_DWORDS; i++)
2897 tu_cs_emit(&cs, 0);
2898 } else {
2899 tu_finishme("Emit images");
2900 }
2901 }
2902
2903 struct tu_cs_entry entry = tu_cs_end_sub_stream(draw_state, &cs);
2904
2905 uint64_t ibo_addr = entry.bo->iova + entry.offset;
2906
2907 tu_cs_begin_sub_stream(device, draw_state, 64, &cs);
2908
2909 /* emit texture state: */
2910 tu_cs_emit_pkt7(&cs, CP_LOAD_STATE6, 3);
2911 tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(0) |
2912 CP_LOAD_STATE6_0_STATE_TYPE(type == MESA_SHADER_COMPUTE ?
2913 ST6_IBO : ST6_SHADER) |
2914 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
2915 CP_LOAD_STATE6_0_STATE_BLOCK(type == MESA_SHADER_COMPUTE ?
2916 SB6_CS_SHADER : SB6_IBO) |
2917 CP_LOAD_STATE6_0_NUM_UNIT(link->image_mapping.num_ibo));
2918 tu_cs_emit_qw(&cs, ibo_addr); /* SRC_ADDR_LO/HI */
2919
2920 tu_cs_emit_pkt4(&cs,
2921 type == MESA_SHADER_COMPUTE ?
2922 REG_A6XX_SP_IBO_LO : REG_A6XX_SP_CS_IBO_LO, 2);
2923 tu_cs_emit_qw(&cs, ibo_addr); /* SRC_ADDR_LO/HI */
2924
2925 return tu_cs_end_sub_stream(draw_state, &cs);
2926 }
2927
2928 struct PACKED bcolor_entry {
2929 uint32_t fp32[4];
2930 uint16_t ui16[4];
2931 int16_t si16[4];
2932 uint16_t fp16[4];
2933 uint16_t rgb565;
2934 uint16_t rgb5a1;
2935 uint16_t rgba4;
2936 uint8_t __pad0[2];
2937 uint8_t ui8[4];
2938 int8_t si8[4];
2939 uint32_t rgb10a2;
2940 uint32_t z24; /* also s8? */
2941 uint16_t srgb[4]; /* appears to duplicate fp16[], but clamped, used for srgb */
2942 uint8_t __pad1[56];
2943 } border_color[] = {
2944 [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = {},
2945 [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = {},
2946 [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = {
2947 .fp32[3] = 0x3f800000,
2948 .ui16[3] = 0xffff,
2949 .si16[3] = 0x7fff,
2950 .fp16[3] = 0x3c00,
2951 .rgb5a1 = 0x8000,
2952 .rgba4 = 0xf000,
2953 .ui8[3] = 0xff,
2954 .si8[3] = 0x7f,
2955 .rgb10a2 = 0xc0000000,
2956 .srgb[3] = 0x3c00,
2957 },
2958 [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = {
2959 .fp32[3] = 1,
2960 .fp16[3] = 1,
2961 },
2962 [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = {
2963 .fp32[0 ... 3] = 0x3f800000,
2964 .ui16[0 ... 3] = 0xffff,
2965 .si16[0 ... 3] = 0x7fff,
2966 .fp16[0 ... 3] = 0x3c00,
2967 .rgb565 = 0xffff,
2968 .rgb5a1 = 0xffff,
2969 .rgba4 = 0xffff,
2970 .ui8[0 ... 3] = 0xff,
2971 .si8[0 ... 3] = 0x7f,
2972 .rgb10a2 = 0xffffffff,
2973 .z24 = 0xffffff,
2974 .srgb[0 ... 3] = 0x3c00,
2975 },
2976 [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = {
2977 .fp32[0 ... 3] = 1,
2978 .fp16[0 ... 3] = 1,
2979 },
2980 };
2981
2982 static VkResult
2983 tu6_emit_border_color(struct tu_cmd_buffer *cmd,
2984 struct tu_cs *cs)
2985 {
2986 STATIC_ASSERT(sizeof(struct bcolor_entry) == 128);
2987
2988 const struct tu_pipeline *pipeline = cmd->state.pipeline;
2989 struct tu_descriptor_state *descriptors_state =
2990 &cmd->descriptors[VK_PIPELINE_BIND_POINT_GRAPHICS];
2991 const struct tu_descriptor_map *vs_sampler =
2992 &pipeline->program.link[MESA_SHADER_VERTEX].sampler_map;
2993 const struct tu_descriptor_map *fs_sampler =
2994 &pipeline->program.link[MESA_SHADER_FRAGMENT].sampler_map;
2995 struct ts_cs_memory ptr;
2996
2997 VkResult result = tu_cs_alloc(cmd->device, &cmd->draw_state,
2998 vs_sampler->num + fs_sampler->num, 128 / 4,
2999 &ptr);
3000 if (result != VK_SUCCESS)
3001 return result;
3002
3003 for (unsigned i = 0; i < vs_sampler->num; i++) {
3004 struct tu_sampler *sampler = sampler_ptr(descriptors_state, vs_sampler, i);
3005 memcpy(ptr.map, &border_color[sampler->border], 128);
3006 ptr.map += 128 / 4;
3007 }
3008
3009 for (unsigned i = 0; i < fs_sampler->num; i++) {
3010 struct tu_sampler *sampler = sampler_ptr(descriptors_state, fs_sampler, i);
3011 memcpy(ptr.map, &border_color[sampler->border], 128);
3012 ptr.map += 128 / 4;
3013 }
3014
3015 tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR_LO, 2);
3016 tu_cs_emit_qw(cs, ptr.iova);
3017 return VK_SUCCESS;
3018 }
3019
3020 static VkResult
3021 tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
3022 struct tu_cs *cs,
3023 const struct tu_draw_info *draw)
3024 {
3025 const struct tu_pipeline *pipeline = cmd->state.pipeline;
3026 const struct tu_dynamic_state *dynamic = &cmd->state.dynamic;
3027 struct tu_draw_state_group draw_state_groups[TU_DRAW_STATE_COUNT];
3028 uint32_t draw_state_group_count = 0;
3029
3030 struct tu_descriptor_state *descriptors_state =
3031 &cmd->descriptors[VK_PIPELINE_BIND_POINT_GRAPHICS];
3032
3033 VkResult result = tu_cs_reserve_space(cmd->device, cs, 256);
3034 if (result != VK_SUCCESS)
3035 return result;
3036
3037 /* TODO lrz */
3038
3039 uint32_t pc_primitive_cntl = 0;
3040 if (pipeline->ia.primitive_restart && draw->indexed)
3041 pc_primitive_cntl |= A6XX_PC_PRIMITIVE_CNTL_0_PRIMITIVE_RESTART;
3042
3043 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9806, 0);
3044 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9990, 0);
3045 tu_cs_emit_write_reg(cs, REG_A6XX_VFD_UNKNOWN_A008, 0);
3046
3047 tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_0, 1);
3048 tu_cs_emit(cs, pc_primitive_cntl);
3049
3050 if (cmd->state.dirty &
3051 (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH) &&
3052 (pipeline->dynamic_state.mask & TU_DYNAMIC_LINE_WIDTH)) {
3053 tu6_emit_gras_su_cntl(cs, pipeline->rast.gras_su_cntl,
3054 dynamic->line_width);
3055 }
3056
3057 if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK) &&
3058 (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_COMPARE_MASK)) {
3059 tu6_emit_stencil_compare_mask(cs, dynamic->stencil_compare_mask.front,
3060 dynamic->stencil_compare_mask.back);
3061 }
3062
3063 if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK) &&
3064 (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_WRITE_MASK)) {
3065 tu6_emit_stencil_write_mask(cs, dynamic->stencil_write_mask.front,
3066 dynamic->stencil_write_mask.back);
3067 }
3068
3069 if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE) &&
3070 (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_REFERENCE)) {
3071 tu6_emit_stencil_reference(cs, dynamic->stencil_reference.front,
3072 dynamic->stencil_reference.back);
3073 }
3074
3075 if (cmd->state.dirty &
3076 (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_VERTEX_BUFFERS)) {
3077 for (uint32_t i = 0; i < pipeline->vi.count; i++) {
3078 const uint32_t binding = pipeline->vi.bindings[i];
3079 const uint32_t stride = pipeline->vi.strides[i];
3080 const struct tu_buffer *buf = cmd->state.vb.buffers[binding];
3081 const VkDeviceSize offset = buf->bo_offset +
3082 cmd->state.vb.offsets[binding] +
3083 pipeline->vi.offsets[i];
3084 const VkDeviceSize size =
3085 offset < buf->bo->size ? buf->bo->size - offset : 0;
3086
3087 tu_cs_emit_pkt4(cs, REG_A6XX_VFD_FETCH(i), 4);
3088 tu_cs_emit_qw(cs, buf->bo->iova + offset);
3089 tu_cs_emit(cs, size);
3090 tu_cs_emit(cs, stride);
3091 }
3092 }
3093
3094 if (cmd->state.dirty & TU_CMD_DIRTY_PIPELINE) {
3095 draw_state_groups[draw_state_group_count++] =
3096 (struct tu_draw_state_group) {
3097 .id = TU_DRAW_STATE_PROGRAM,
3098 .enable_mask = 0x6,
3099 .ib = pipeline->program.state_ib,
3100 };
3101 draw_state_groups[draw_state_group_count++] =
3102 (struct tu_draw_state_group) {
3103 .id = TU_DRAW_STATE_PROGRAM_BINNING,
3104 .enable_mask = 0x1,
3105 .ib = pipeline->program.binning_state_ib,
3106 };
3107 draw_state_groups[draw_state_group_count++] =
3108 (struct tu_draw_state_group) {
3109 .id = TU_DRAW_STATE_VI,
3110 .enable_mask = 0x6,
3111 .ib = pipeline->vi.state_ib,
3112 };
3113 draw_state_groups[draw_state_group_count++] =
3114 (struct tu_draw_state_group) {
3115 .id = TU_DRAW_STATE_VI_BINNING,
3116 .enable_mask = 0x1,
3117 .ib = pipeline->vi.binning_state_ib,
3118 };
3119 draw_state_groups[draw_state_group_count++] =
3120 (struct tu_draw_state_group) {
3121 .id = TU_DRAW_STATE_VP,
3122 .enable_mask = 0x7,
3123 .ib = pipeline->vp.state_ib,
3124 };
3125 draw_state_groups[draw_state_group_count++] =
3126 (struct tu_draw_state_group) {
3127 .id = TU_DRAW_STATE_RAST,
3128 .enable_mask = 0x7,
3129 .ib = pipeline->rast.state_ib,
3130 };
3131 draw_state_groups[draw_state_group_count++] =
3132 (struct tu_draw_state_group) {
3133 .id = TU_DRAW_STATE_DS,
3134 .enable_mask = 0x7,
3135 .ib = pipeline->ds.state_ib,
3136 };
3137 draw_state_groups[draw_state_group_count++] =
3138 (struct tu_draw_state_group) {
3139 .id = TU_DRAW_STATE_BLEND,
3140 .enable_mask = 0x7,
3141 .ib = pipeline->blend.state_ib,
3142 };
3143 }
3144
3145 if (cmd->state.dirty &
3146 (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_DESCRIPTOR_SETS)) {
3147 bool needs_border = false;
3148 struct tu_cs_entry vs_tex, fs_tex;
3149
3150 result = tu6_emit_textures(cmd, MESA_SHADER_VERTEX, &vs_tex, &needs_border);
3151 if (result != VK_SUCCESS)
3152 return result;
3153
3154 result = tu6_emit_textures(cmd, MESA_SHADER_FRAGMENT, &fs_tex, &needs_border);
3155 if (result != VK_SUCCESS)
3156 return result;
3157
3158 draw_state_groups[draw_state_group_count++] =
3159 (struct tu_draw_state_group) {
3160 .id = TU_DRAW_STATE_VS_CONST,
3161 .enable_mask = 0x7,
3162 .ib = tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_VERTEX)
3163 };
3164 draw_state_groups[draw_state_group_count++] =
3165 (struct tu_draw_state_group) {
3166 .id = TU_DRAW_STATE_FS_CONST,
3167 .enable_mask = 0x6,
3168 .ib = tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_FRAGMENT)
3169 };
3170 draw_state_groups[draw_state_group_count++] =
3171 (struct tu_draw_state_group) {
3172 .id = TU_DRAW_STATE_VS_TEX,
3173 .enable_mask = 0x7,
3174 .ib = vs_tex,
3175 };
3176 draw_state_groups[draw_state_group_count++] =
3177 (struct tu_draw_state_group) {
3178 .id = TU_DRAW_STATE_FS_TEX,
3179 .enable_mask = 0x6,
3180 .ib = fs_tex,
3181 };
3182 draw_state_groups[draw_state_group_count++] =
3183 (struct tu_draw_state_group) {
3184 .id = TU_DRAW_STATE_FS_IBO,
3185 .enable_mask = 0x6,
3186 .ib = tu6_emit_ibo(cmd->device, &cmd->draw_state, pipeline,
3187 descriptors_state, MESA_SHADER_FRAGMENT)
3188 };
3189
3190 if (needs_border) {
3191 result = tu6_emit_border_color(cmd, cs);
3192 if (result != VK_SUCCESS)
3193 return result;
3194 }
3195 }
3196
3197 tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * draw_state_group_count);
3198 for (uint32_t i = 0; i < draw_state_group_count; i++) {
3199 const struct tu_draw_state_group *group = &draw_state_groups[i];
3200
3201 uint32_t cp_set_draw_state =
3202 CP_SET_DRAW_STATE__0_COUNT(group->ib.size / 4) |
3203 CP_SET_DRAW_STATE__0_ENABLE_MASK(group->enable_mask) |
3204 CP_SET_DRAW_STATE__0_GROUP_ID(group->id);
3205 uint64_t iova;
3206 if (group->ib.size) {
3207 iova = group->ib.bo->iova + group->ib.offset;
3208 } else {
3209 cp_set_draw_state |= CP_SET_DRAW_STATE__0_DISABLE;
3210 iova = 0;
3211 }
3212
3213 tu_cs_emit(cs, cp_set_draw_state);
3214 tu_cs_emit_qw(cs, iova);
3215 }
3216
3217 tu_cs_sanity_check(cs);
3218
3219 /* track BOs */
3220 if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) {
3221 for (uint32_t i = 0; i < MAX_VBS; i++) {
3222 const struct tu_buffer *buf = cmd->state.vb.buffers[i];
3223 if (buf)
3224 tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ);
3225 }
3226 }
3227 if (cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) {
3228 unsigned i;
3229 for_each_bit(i, descriptors_state->valid) {
3230 struct tu_descriptor_set *set = descriptors_state->sets[i];
3231 for (unsigned j = 0; j < set->layout->buffer_count; ++j)
3232 if (set->descriptors[j]) {
3233 tu_bo_list_add(&cmd->bo_list, set->descriptors[j],
3234 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
3235 }
3236 }
3237 }
3238
3239 /* Fragment shader state overwrites compute shader state, so flag the
3240 * compute pipeline for re-emit.
3241 */
3242 cmd->state.dirty = TU_CMD_DIRTY_COMPUTE_PIPELINE;
3243 return VK_SUCCESS;
3244 }
3245
3246 static void
3247 tu6_emit_draw_direct(struct tu_cmd_buffer *cmd,
3248 struct tu_cs *cs,
3249 const struct tu_draw_info *draw)
3250 {
3251
3252 const enum pc_di_primtype primtype = cmd->state.pipeline->ia.primtype;
3253
3254 tu_cs_emit_pkt4(cs, REG_A6XX_VFD_INDEX_OFFSET, 2);
3255 tu_cs_emit(cs, draw->vertex_offset);
3256 tu_cs_emit(cs, draw->first_instance);
3257
3258 /* TODO hw binning */
3259 if (draw->indexed) {
3260 const enum a4xx_index_size index_size =
3261 tu6_index_size(cmd->state.index_type);
3262 const uint32_t index_bytes =
3263 (cmd->state.index_type == VK_INDEX_TYPE_UINT32) ? 4 : 2;
3264 const struct tu_buffer *buf = cmd->state.index_buffer;
3265 const VkDeviceSize offset = buf->bo_offset + cmd->state.index_offset +
3266 index_bytes * draw->first_index;
3267 const uint32_t size = index_bytes * draw->count;
3268
3269 const uint32_t cp_draw_indx =
3270 CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) |
3271 CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_DMA) |
3272 CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(index_size) |
3273 CP_DRAW_INDX_OFFSET_0_VIS_CULL(USE_VISIBILITY) | 0x2000;
3274
3275 tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 7);
3276 tu_cs_emit(cs, cp_draw_indx);
3277 tu_cs_emit(cs, draw->instance_count);
3278 tu_cs_emit(cs, draw->count);
3279 tu_cs_emit(cs, 0x0); /* XXX */
3280 tu_cs_emit_qw(cs, buf->bo->iova + offset);
3281 tu_cs_emit(cs, size);
3282 } else {
3283 const uint32_t cp_draw_indx =
3284 CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) |
3285 CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX) |
3286 CP_DRAW_INDX_OFFSET_0_VIS_CULL(USE_VISIBILITY) | 0x2000;
3287
3288 tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 3);
3289 tu_cs_emit(cs, cp_draw_indx);
3290 tu_cs_emit(cs, draw->instance_count);
3291 tu_cs_emit(cs, draw->count);
3292 }
3293 }
3294
3295 static void
3296 tu_draw(struct tu_cmd_buffer *cmd, const struct tu_draw_info *draw)
3297 {
3298 struct tu_cs *cs = &cmd->draw_cs;
3299 VkResult result;
3300
3301 result = tu6_bind_draw_states(cmd, cs, draw);
3302 if (result != VK_SUCCESS) {
3303 cmd->record_result = result;
3304 return;
3305 }
3306
3307 result = tu_cs_reserve_space(cmd->device, cs, 32);
3308 if (result != VK_SUCCESS) {
3309 cmd->record_result = result;
3310 return;
3311 }
3312
3313 if (draw->indirect) {
3314 tu_finishme("indirect draw");
3315 return;
3316 }
3317
3318 /* TODO tu6_emit_marker should pick different regs depending on cs */
3319
3320 tu6_emit_marker(cmd, cs);
3321 tu6_emit_draw_direct(cmd, cs, draw);
3322 tu6_emit_marker(cmd, cs);
3323
3324 cmd->wait_for_idle = true;
3325
3326 tu_cs_sanity_check(cs);
3327 }
3328
3329 void
3330 tu_CmdDraw(VkCommandBuffer commandBuffer,
3331 uint32_t vertexCount,
3332 uint32_t instanceCount,
3333 uint32_t firstVertex,
3334 uint32_t firstInstance)
3335 {
3336 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3337 struct tu_draw_info info = {};
3338
3339 info.count = vertexCount;
3340 info.instance_count = instanceCount;
3341 info.first_instance = firstInstance;
3342 info.vertex_offset = firstVertex;
3343
3344 tu_draw(cmd_buffer, &info);
3345 }
3346
3347 void
3348 tu_CmdDrawIndexed(VkCommandBuffer commandBuffer,
3349 uint32_t indexCount,
3350 uint32_t instanceCount,
3351 uint32_t firstIndex,
3352 int32_t vertexOffset,
3353 uint32_t firstInstance)
3354 {
3355 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3356 struct tu_draw_info info = {};
3357
3358 info.indexed = true;
3359 info.count = indexCount;
3360 info.instance_count = instanceCount;
3361 info.first_index = firstIndex;
3362 info.vertex_offset = vertexOffset;
3363 info.first_instance = firstInstance;
3364
3365 tu_draw(cmd_buffer, &info);
3366 }
3367
3368 void
3369 tu_CmdDrawIndirect(VkCommandBuffer commandBuffer,
3370 VkBuffer _buffer,
3371 VkDeviceSize offset,
3372 uint32_t drawCount,
3373 uint32_t stride)
3374 {
3375 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3376 TU_FROM_HANDLE(tu_buffer, buffer, _buffer);
3377 struct tu_draw_info info = {};
3378
3379 info.count = drawCount;
3380 info.indirect = buffer;
3381 info.indirect_offset = offset;
3382 info.stride = stride;
3383
3384 tu_draw(cmd_buffer, &info);
3385 }
3386
3387 void
3388 tu_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
3389 VkBuffer _buffer,
3390 VkDeviceSize offset,
3391 uint32_t drawCount,
3392 uint32_t stride)
3393 {
3394 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3395 TU_FROM_HANDLE(tu_buffer, buffer, _buffer);
3396 struct tu_draw_info info = {};
3397
3398 info.indexed = true;
3399 info.count = drawCount;
3400 info.indirect = buffer;
3401 info.indirect_offset = offset;
3402 info.stride = stride;
3403
3404 tu_draw(cmd_buffer, &info);
3405 }
3406
3407 struct tu_dispatch_info
3408 {
3409 /**
3410 * Determine the layout of the grid (in block units) to be used.
3411 */
3412 uint32_t blocks[3];
3413
3414 /**
3415 * A starting offset for the grid. If unaligned is set, the offset
3416 * must still be aligned.
3417 */
3418 uint32_t offsets[3];
3419 /**
3420 * Whether it's an unaligned compute dispatch.
3421 */
3422 bool unaligned;
3423
3424 /**
3425 * Indirect compute parameters resource.
3426 */
3427 struct tu_buffer *indirect;
3428 uint64_t indirect_offset;
3429 };
3430
3431 static void
3432 tu_emit_compute_driver_params(struct tu_cs *cs, struct tu_pipeline *pipeline,
3433 const struct tu_dispatch_info *info)
3434 {
3435 gl_shader_stage type = MESA_SHADER_COMPUTE;
3436 const struct tu_program_descriptor_linkage *link =
3437 &pipeline->program.link[type];
3438 const struct ir3_const_state *const_state = &link->const_state;
3439 uint32_t offset_dwords = const_state->offsets.driver_param;
3440
3441 if (link->constlen <= offset_dwords)
3442 return;
3443
3444 if (!info->indirect) {
3445 uint32_t driver_params[] = {
3446 info->blocks[0],
3447 info->blocks[1],
3448 info->blocks[2],
3449 pipeline->compute.local_size[0],
3450 pipeline->compute.local_size[1],
3451 pipeline->compute.local_size[2],
3452 };
3453 uint32_t num_consts = MIN2(const_state->num_driver_params,
3454 link->constlen - offset_dwords);
3455 uint32_t align_size = align(num_consts, 4);
3456
3457 /* push constants */
3458 tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + align_size);
3459 tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(offset_dwords / 4) |
3460 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
3461 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
3462 CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
3463 CP_LOAD_STATE6_0_NUM_UNIT(align_size / 4));
3464 tu_cs_emit(cs, 0);
3465 tu_cs_emit(cs, 0);
3466 uint32_t i;
3467 for (i = 0; i < num_consts; i++)
3468 tu_cs_emit(cs, driver_params[i]);
3469 for (; i < align_size; i++)
3470 tu_cs_emit(cs, 0);
3471 } else {
3472 tu_finishme("Indirect driver params");
3473 }
3474 }
3475
3476 static void
3477 tu_dispatch(struct tu_cmd_buffer *cmd,
3478 const struct tu_dispatch_info *info)
3479 {
3480 struct tu_cs *cs = &cmd->cs;
3481 struct tu_pipeline *pipeline = cmd->state.compute_pipeline;
3482 struct tu_descriptor_state *descriptors_state =
3483 &cmd->descriptors[VK_PIPELINE_BIND_POINT_COMPUTE];
3484
3485 VkResult result = tu_cs_reserve_space(cmd->device, cs, 256);
3486 if (result != VK_SUCCESS) {
3487 cmd->record_result = result;
3488 return;
3489 }
3490
3491 if (cmd->state.dirty & TU_CMD_DIRTY_COMPUTE_PIPELINE)
3492 tu_cs_emit_ib(cs, &pipeline->program.state_ib);
3493
3494 struct tu_cs_entry ib;
3495
3496 ib = tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_COMPUTE);
3497 if (ib.size)
3498 tu_cs_emit_ib(cs, &ib);
3499
3500 tu_emit_compute_driver_params(cs, pipeline, info);
3501
3502 bool needs_border;
3503 result = tu6_emit_textures(cmd, MESA_SHADER_COMPUTE, &ib, &needs_border);
3504 if (result != VK_SUCCESS) {
3505 cmd->record_result = result;
3506 return;
3507 }
3508
3509 if (ib.size)
3510 tu_cs_emit_ib(cs, &ib);
3511
3512 if (needs_border)
3513 tu6_emit_border_color(cmd, cs);
3514
3515 ib = tu6_emit_ibo(cmd->device, &cmd->draw_state, pipeline,
3516 descriptors_state, MESA_SHADER_COMPUTE);
3517 if (ib.size)
3518 tu_cs_emit_ib(cs, &ib);
3519
3520 /* track BOs */
3521 if (cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) {
3522 unsigned i;
3523 for_each_bit(i, descriptors_state->valid) {
3524 struct tu_descriptor_set *set = descriptors_state->sets[i];
3525 for (unsigned j = 0; j < set->layout->buffer_count; ++j)
3526 if (set->descriptors[j]) {
3527 tu_bo_list_add(&cmd->bo_list, set->descriptors[j],
3528 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
3529 }
3530 }
3531 }
3532
3533 /* Compute shader state overwrites fragment shader state, so we flag the
3534 * graphics pipeline for re-emit.
3535 */
3536 cmd->state.dirty = TU_CMD_DIRTY_PIPELINE;
3537
3538 tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
3539 tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(0x8));
3540
3541 const uint32_t *local_size = pipeline->compute.local_size;
3542 const uint32_t *num_groups = info->blocks;
3543 tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CS_NDRANGE_0, 7);
3544 tu_cs_emit(cs,
3545 A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM(3) |
3546 A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(local_size[0] - 1) |
3547 A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(local_size[1] - 1) |
3548 A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(local_size[2] - 1));
3549 tu_cs_emit(cs, A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(local_size[0] * num_groups[0]));
3550 tu_cs_emit(cs, 0); /* HLSQ_CS_NDRANGE_2_GLOBALOFF_X */
3551 tu_cs_emit(cs, A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(local_size[1] * num_groups[1]));
3552 tu_cs_emit(cs, 0); /* HLSQ_CS_NDRANGE_4_GLOBALOFF_Y */
3553 tu_cs_emit(cs, A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(local_size[2] * num_groups[2]));
3554 tu_cs_emit(cs, 0); /* HLSQ_CS_NDRANGE_6_GLOBALOFF_Z */
3555
3556 tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CS_KERNEL_GROUP_X, 3);
3557 tu_cs_emit(cs, 1); /* HLSQ_CS_KERNEL_GROUP_X */
3558 tu_cs_emit(cs, 1); /* HLSQ_CS_KERNEL_GROUP_Y */
3559 tu_cs_emit(cs, 1); /* HLSQ_CS_KERNEL_GROUP_Z */
3560
3561 if (info->indirect) {
3562 uint64_t iova = tu_buffer_iova(info->indirect) + info->indirect_offset;
3563
3564 tu_bo_list_add(&cmd->bo_list, info->indirect->bo,
3565 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
3566
3567 tu_cs_emit_pkt7(cs, CP_EXEC_CS_INDIRECT, 4);
3568 tu_cs_emit(cs, 0x00000000);
3569 tu_cs_emit_qw(cs, iova);
3570 tu_cs_emit(cs,
3571 A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX(local_size[0] - 1) |
3572 A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY(local_size[1] - 1) |
3573 A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ(local_size[2] - 1));
3574 } else {
3575 tu_cs_emit_pkt7(cs, CP_EXEC_CS, 4);
3576 tu_cs_emit(cs, 0x00000000);
3577 tu_cs_emit(cs, CP_EXEC_CS_1_NGROUPS_X(info->blocks[0]));
3578 tu_cs_emit(cs, CP_EXEC_CS_2_NGROUPS_Y(info->blocks[1]));
3579 tu_cs_emit(cs, CP_EXEC_CS_3_NGROUPS_Z(info->blocks[2]));
3580 }
3581
3582 tu_cs_emit_wfi(cs);
3583
3584 tu6_emit_cache_flush(cmd, cs);
3585 }
3586
3587 void
3588 tu_CmdDispatchBase(VkCommandBuffer commandBuffer,
3589 uint32_t base_x,
3590 uint32_t base_y,
3591 uint32_t base_z,
3592 uint32_t x,
3593 uint32_t y,
3594 uint32_t z)
3595 {
3596 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3597 struct tu_dispatch_info info = {};
3598
3599 info.blocks[0] = x;
3600 info.blocks[1] = y;
3601 info.blocks[2] = z;
3602
3603 info.offsets[0] = base_x;
3604 info.offsets[1] = base_y;
3605 info.offsets[2] = base_z;
3606 tu_dispatch(cmd_buffer, &info);
3607 }
3608
3609 void
3610 tu_CmdDispatch(VkCommandBuffer commandBuffer,
3611 uint32_t x,
3612 uint32_t y,
3613 uint32_t z)
3614 {
3615 tu_CmdDispatchBase(commandBuffer, 0, 0, 0, x, y, z);
3616 }
3617
3618 void
3619 tu_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
3620 VkBuffer _buffer,
3621 VkDeviceSize offset)
3622 {
3623 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3624 TU_FROM_HANDLE(tu_buffer, buffer, _buffer);
3625 struct tu_dispatch_info info = {};
3626
3627 info.indirect = buffer;
3628 info.indirect_offset = offset;
3629
3630 tu_dispatch(cmd_buffer, &info);
3631 }
3632
3633 void
3634 tu_CmdEndRenderPass(VkCommandBuffer commandBuffer)
3635 {
3636 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3637
3638 tu_cs_end(&cmd_buffer->draw_cs);
3639
3640 tu_cmd_render_tiles(cmd_buffer);
3641
3642 /* discard draw_cs entries now that the tiles are rendered */
3643 tu_cs_discard_entries(&cmd_buffer->draw_cs);
3644 tu_cs_begin(&cmd_buffer->draw_cs);
3645
3646 vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
3647 cmd_buffer->state.attachments = NULL;
3648
3649 cmd_buffer->state.pass = NULL;
3650 cmd_buffer->state.subpass = NULL;
3651 cmd_buffer->state.framebuffer = NULL;
3652 }
3653
3654 void
3655 tu_CmdEndRenderPass2KHR(VkCommandBuffer commandBuffer,
3656 const VkSubpassEndInfoKHR *pSubpassEndInfo)
3657 {
3658 tu_CmdEndRenderPass(commandBuffer);
3659 }
3660
3661 struct tu_barrier_info
3662 {
3663 uint32_t eventCount;
3664 const VkEvent *pEvents;
3665 VkPipelineStageFlags srcStageMask;
3666 };
3667
3668 static void
3669 tu_barrier(struct tu_cmd_buffer *cmd_buffer,
3670 uint32_t memoryBarrierCount,
3671 const VkMemoryBarrier *pMemoryBarriers,
3672 uint32_t bufferMemoryBarrierCount,
3673 const VkBufferMemoryBarrier *pBufferMemoryBarriers,
3674 uint32_t imageMemoryBarrierCount,
3675 const VkImageMemoryBarrier *pImageMemoryBarriers,
3676 const struct tu_barrier_info *info)
3677 {
3678 }
3679
3680 void
3681 tu_CmdPipelineBarrier(VkCommandBuffer commandBuffer,
3682 VkPipelineStageFlags srcStageMask,
3683 VkPipelineStageFlags destStageMask,
3684 VkBool32 byRegion,
3685 uint32_t memoryBarrierCount,
3686 const VkMemoryBarrier *pMemoryBarriers,
3687 uint32_t bufferMemoryBarrierCount,
3688 const VkBufferMemoryBarrier *pBufferMemoryBarriers,
3689 uint32_t imageMemoryBarrierCount,
3690 const VkImageMemoryBarrier *pImageMemoryBarriers)
3691 {
3692 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3693 struct tu_barrier_info info;
3694
3695 info.eventCount = 0;
3696 info.pEvents = NULL;
3697 info.srcStageMask = srcStageMask;
3698
3699 tu_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers,
3700 bufferMemoryBarrierCount, pBufferMemoryBarriers,
3701 imageMemoryBarrierCount, pImageMemoryBarriers, &info);
3702 }
3703
3704 static void
3705 write_event(struct tu_cmd_buffer *cmd_buffer,
3706 struct tu_event *event,
3707 VkPipelineStageFlags stageMask,
3708 unsigned value)
3709 {
3710 }
3711
3712 void
3713 tu_CmdSetEvent(VkCommandBuffer commandBuffer,
3714 VkEvent _event,
3715 VkPipelineStageFlags stageMask)
3716 {
3717 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3718 TU_FROM_HANDLE(tu_event, event, _event);
3719
3720 write_event(cmd_buffer, event, stageMask, 1);
3721 }
3722
3723 void
3724 tu_CmdResetEvent(VkCommandBuffer commandBuffer,
3725 VkEvent _event,
3726 VkPipelineStageFlags stageMask)
3727 {
3728 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3729 TU_FROM_HANDLE(tu_event, event, _event);
3730
3731 write_event(cmd_buffer, event, stageMask, 0);
3732 }
3733
3734 void
3735 tu_CmdWaitEvents(VkCommandBuffer commandBuffer,
3736 uint32_t eventCount,
3737 const VkEvent *pEvents,
3738 VkPipelineStageFlags srcStageMask,
3739 VkPipelineStageFlags dstStageMask,
3740 uint32_t memoryBarrierCount,
3741 const VkMemoryBarrier *pMemoryBarriers,
3742 uint32_t bufferMemoryBarrierCount,
3743 const VkBufferMemoryBarrier *pBufferMemoryBarriers,
3744 uint32_t imageMemoryBarrierCount,
3745 const VkImageMemoryBarrier *pImageMemoryBarriers)
3746 {
3747 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3748 struct tu_barrier_info info;
3749
3750 info.eventCount = eventCount;
3751 info.pEvents = pEvents;
3752 info.srcStageMask = 0;
3753
3754 tu_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers,
3755 bufferMemoryBarrierCount, pBufferMemoryBarriers,
3756 imageMemoryBarrierCount, pImageMemoryBarriers, &info);
3757 }
3758
3759 void
3760 tu_CmdSetDeviceMask(VkCommandBuffer commandBuffer, uint32_t deviceMask)
3761 {
3762 /* No-op */
3763 }