turnip: add dirty bit for push constants
[mesa.git] / src / freedreno / vulkan / tu_cmd_buffer.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 * DEALINGS IN THE SOFTWARE.
26 */
27
28 #include "tu_private.h"
29
30 #include "registers/adreno_pm4.xml.h"
31 #include "registers/adreno_common.xml.h"
32 #include "registers/a6xx.xml.h"
33
34 #include "vk_format.h"
35
36 #include "tu_cs.h"
37 #include "tu_blit.h"
38
39 #define OVERFLOW_FLAG_REG REG_A6XX_CP_SCRATCH_REG(0)
40
41 void
42 tu_bo_list_init(struct tu_bo_list *list)
43 {
44 list->count = list->capacity = 0;
45 list->bo_infos = NULL;
46 }
47
48 void
49 tu_bo_list_destroy(struct tu_bo_list *list)
50 {
51 free(list->bo_infos);
52 }
53
54 void
55 tu_bo_list_reset(struct tu_bo_list *list)
56 {
57 list->count = 0;
58 }
59
60 /**
61 * \a flags consists of MSM_SUBMIT_BO_FLAGS.
62 */
63 static uint32_t
64 tu_bo_list_add_info(struct tu_bo_list *list,
65 const struct drm_msm_gem_submit_bo *bo_info)
66 {
67 assert(bo_info->handle != 0);
68
69 for (uint32_t i = 0; i < list->count; ++i) {
70 if (list->bo_infos[i].handle == bo_info->handle) {
71 assert(list->bo_infos[i].presumed == bo_info->presumed);
72 list->bo_infos[i].flags |= bo_info->flags;
73 return i;
74 }
75 }
76
77 /* grow list->bo_infos if needed */
78 if (list->count == list->capacity) {
79 uint32_t new_capacity = MAX2(2 * list->count, 16);
80 struct drm_msm_gem_submit_bo *new_bo_infos = realloc(
81 list->bo_infos, new_capacity * sizeof(struct drm_msm_gem_submit_bo));
82 if (!new_bo_infos)
83 return TU_BO_LIST_FAILED;
84 list->bo_infos = new_bo_infos;
85 list->capacity = new_capacity;
86 }
87
88 list->bo_infos[list->count] = *bo_info;
89 return list->count++;
90 }
91
92 uint32_t
93 tu_bo_list_add(struct tu_bo_list *list,
94 const struct tu_bo *bo,
95 uint32_t flags)
96 {
97 return tu_bo_list_add_info(list, &(struct drm_msm_gem_submit_bo) {
98 .flags = flags,
99 .handle = bo->gem_handle,
100 .presumed = bo->iova,
101 });
102 }
103
104 VkResult
105 tu_bo_list_merge(struct tu_bo_list *list, const struct tu_bo_list *other)
106 {
107 for (uint32_t i = 0; i < other->count; i++) {
108 if (tu_bo_list_add_info(list, other->bo_infos + i) == TU_BO_LIST_FAILED)
109 return VK_ERROR_OUT_OF_HOST_MEMORY;
110 }
111
112 return VK_SUCCESS;
113 }
114
115 static VkResult
116 tu_tiling_config_update_gmem_layout(struct tu_tiling_config *tiling,
117 const struct tu_device *dev)
118 {
119 const uint32_t gmem_size = dev->physical_device->gmem_size;
120 uint32_t offset = 0;
121
122 for (uint32_t i = 0; i < tiling->buffer_count; i++) {
123 /* 16KB-aligned */
124 offset = align(offset, 0x4000);
125
126 tiling->gmem_offsets[i] = offset;
127 offset += tiling->tile0.extent.width * tiling->tile0.extent.height *
128 tiling->buffer_cpp[i];
129 }
130
131 return offset <= gmem_size ? VK_SUCCESS : VK_ERROR_OUT_OF_DEVICE_MEMORY;
132 }
133
134 static void
135 tu_tiling_config_update_tile_layout(struct tu_tiling_config *tiling,
136 const struct tu_device *dev)
137 {
138 const uint32_t tile_align_w = dev->physical_device->tile_align_w;
139 const uint32_t tile_align_h = dev->physical_device->tile_align_h;
140 const uint32_t max_tile_width = 1024; /* A6xx */
141
142 tiling->tile0.offset = (VkOffset2D) {
143 .x = tiling->render_area.offset.x & ~(tile_align_w - 1),
144 .y = tiling->render_area.offset.y & ~(tile_align_h - 1),
145 };
146
147 const uint32_t ra_width =
148 tiling->render_area.extent.width +
149 (tiling->render_area.offset.x - tiling->tile0.offset.x);
150 const uint32_t ra_height =
151 tiling->render_area.extent.height +
152 (tiling->render_area.offset.y - tiling->tile0.offset.y);
153
154 /* start from 1 tile */
155 tiling->tile_count = (VkExtent2D) {
156 .width = 1,
157 .height = 1,
158 };
159 tiling->tile0.extent = (VkExtent2D) {
160 .width = align(ra_width, tile_align_w),
161 .height = align(ra_height, tile_align_h),
162 };
163
164 /* do not exceed max tile width */
165 while (tiling->tile0.extent.width > max_tile_width) {
166 tiling->tile_count.width++;
167 tiling->tile0.extent.width =
168 align(ra_width / tiling->tile_count.width, tile_align_w);
169 }
170
171 /* do not exceed gmem size */
172 while (tu_tiling_config_update_gmem_layout(tiling, dev) != VK_SUCCESS) {
173 if (tiling->tile0.extent.width > MAX2(tile_align_w, tiling->tile0.extent.height)) {
174 tiling->tile_count.width++;
175 tiling->tile0.extent.width =
176 align(DIV_ROUND_UP(ra_width, tiling->tile_count.width), tile_align_w);
177 } else {
178 /* if this assert fails then layout is impossible.. */
179 assert(tiling->tile0.extent.height > tile_align_h);
180 tiling->tile_count.height++;
181 tiling->tile0.extent.height =
182 align(DIV_ROUND_UP(ra_height, tiling->tile_count.height), tile_align_h);
183 }
184 }
185 }
186
187 static void
188 tu_tiling_config_update_pipe_layout(struct tu_tiling_config *tiling,
189 const struct tu_device *dev)
190 {
191 const uint32_t max_pipe_count = 32; /* A6xx */
192
193 /* start from 1 tile per pipe */
194 tiling->pipe0 = (VkExtent2D) {
195 .width = 1,
196 .height = 1,
197 };
198 tiling->pipe_count = tiling->tile_count;
199
200 /* do not exceed max pipe count vertically */
201 while (tiling->pipe_count.height > max_pipe_count) {
202 tiling->pipe0.height += 2;
203 tiling->pipe_count.height =
204 (tiling->tile_count.height + tiling->pipe0.height - 1) /
205 tiling->pipe0.height;
206 }
207
208 /* do not exceed max pipe count */
209 while (tiling->pipe_count.width * tiling->pipe_count.height >
210 max_pipe_count) {
211 tiling->pipe0.width += 1;
212 tiling->pipe_count.width =
213 (tiling->tile_count.width + tiling->pipe0.width - 1) /
214 tiling->pipe0.width;
215 }
216 }
217
218 static void
219 tu_tiling_config_update_pipes(struct tu_tiling_config *tiling,
220 const struct tu_device *dev)
221 {
222 const uint32_t max_pipe_count = 32; /* A6xx */
223 const uint32_t used_pipe_count =
224 tiling->pipe_count.width * tiling->pipe_count.height;
225 const VkExtent2D last_pipe = {
226 .width = (tiling->tile_count.width - 1) % tiling->pipe0.width + 1,
227 .height = (tiling->tile_count.height - 1) % tiling->pipe0.height + 1,
228 };
229
230 assert(used_pipe_count <= max_pipe_count);
231 assert(max_pipe_count <= ARRAY_SIZE(tiling->pipe_config));
232
233 for (uint32_t y = 0; y < tiling->pipe_count.height; y++) {
234 for (uint32_t x = 0; x < tiling->pipe_count.width; x++) {
235 const uint32_t pipe_x = tiling->pipe0.width * x;
236 const uint32_t pipe_y = tiling->pipe0.height * y;
237 const uint32_t pipe_w = (x == tiling->pipe_count.width - 1)
238 ? last_pipe.width
239 : tiling->pipe0.width;
240 const uint32_t pipe_h = (y == tiling->pipe_count.height - 1)
241 ? last_pipe.height
242 : tiling->pipe0.height;
243 const uint32_t n = tiling->pipe_count.width * y + x;
244
245 tiling->pipe_config[n] = A6XX_VSC_PIPE_CONFIG_REG_X(pipe_x) |
246 A6XX_VSC_PIPE_CONFIG_REG_Y(pipe_y) |
247 A6XX_VSC_PIPE_CONFIG_REG_W(pipe_w) |
248 A6XX_VSC_PIPE_CONFIG_REG_H(pipe_h);
249 tiling->pipe_sizes[n] = CP_SET_BIN_DATA5_0_VSC_SIZE(pipe_w * pipe_h);
250 }
251 }
252
253 memset(tiling->pipe_config + used_pipe_count, 0,
254 sizeof(uint32_t) * (max_pipe_count - used_pipe_count));
255 }
256
257 static void
258 tu_tiling_config_update(struct tu_tiling_config *tiling,
259 const struct tu_device *dev,
260 const uint32_t *buffer_cpp,
261 uint32_t buffer_count,
262 const VkRect2D *render_area)
263 {
264 /* see if there is any real change */
265 const bool ra_changed =
266 render_area &&
267 memcmp(&tiling->render_area, render_area, sizeof(*render_area));
268 const bool buf_changed = tiling->buffer_count != buffer_count ||
269 memcmp(tiling->buffer_cpp, buffer_cpp,
270 sizeof(*buffer_cpp) * buffer_count);
271 if (!ra_changed && !buf_changed)
272 return;
273
274 if (ra_changed)
275 tiling->render_area = *render_area;
276
277 if (buf_changed) {
278 memcpy(tiling->buffer_cpp, buffer_cpp,
279 sizeof(*buffer_cpp) * buffer_count);
280 tiling->buffer_count = buffer_count;
281 }
282
283 tu_tiling_config_update_tile_layout(tiling, dev);
284 tu_tiling_config_update_pipe_layout(tiling, dev);
285 tu_tiling_config_update_pipes(tiling, dev);
286 }
287
288 static void
289 tu_tiling_config_get_tile(const struct tu_tiling_config *tiling,
290 const struct tu_device *dev,
291 uint32_t tx,
292 uint32_t ty,
293 struct tu_tile *tile)
294 {
295 /* find the pipe and the slot for tile (tx, ty) */
296 const uint32_t px = tx / tiling->pipe0.width;
297 const uint32_t py = ty / tiling->pipe0.height;
298 const uint32_t sx = tx - tiling->pipe0.width * px;
299 const uint32_t sy = ty - tiling->pipe0.height * py;
300
301 assert(tx < tiling->tile_count.width && ty < tiling->tile_count.height);
302 assert(px < tiling->pipe_count.width && py < tiling->pipe_count.height);
303 assert(sx < tiling->pipe0.width && sy < tiling->pipe0.height);
304
305 /* convert to 1D indices */
306 tile->pipe = tiling->pipe_count.width * py + px;
307 tile->slot = tiling->pipe0.width * sy + sx;
308
309 /* get the blit area for the tile */
310 tile->begin = (VkOffset2D) {
311 .x = tiling->tile0.offset.x + tiling->tile0.extent.width * tx,
312 .y = tiling->tile0.offset.y + tiling->tile0.extent.height * ty,
313 };
314 tile->end.x =
315 (tx == tiling->tile_count.width - 1)
316 ? tiling->render_area.offset.x + tiling->render_area.extent.width
317 : tile->begin.x + tiling->tile0.extent.width;
318 tile->end.y =
319 (ty == tiling->tile_count.height - 1)
320 ? tiling->render_area.offset.y + tiling->render_area.extent.height
321 : tile->begin.y + tiling->tile0.extent.height;
322 }
323
324 enum a3xx_msaa_samples
325 tu_msaa_samples(uint32_t samples)
326 {
327 switch (samples) {
328 case 1:
329 return MSAA_ONE;
330 case 2:
331 return MSAA_TWO;
332 case 4:
333 return MSAA_FOUR;
334 case 8:
335 return MSAA_EIGHT;
336 default:
337 assert(!"invalid sample count");
338 return MSAA_ONE;
339 }
340 }
341
342 static enum a4xx_index_size
343 tu6_index_size(VkIndexType type)
344 {
345 switch (type) {
346 case VK_INDEX_TYPE_UINT16:
347 return INDEX4_SIZE_16_BIT;
348 case VK_INDEX_TYPE_UINT32:
349 return INDEX4_SIZE_32_BIT;
350 default:
351 unreachable("invalid VkIndexType");
352 return INDEX4_SIZE_8_BIT;
353 }
354 }
355
356 static void
357 tu6_emit_marker(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
358 {
359 tu_cs_emit_write_reg(cs, cmd->marker_reg, ++cmd->marker_seqno);
360 }
361
362 unsigned
363 tu6_emit_event_write(struct tu_cmd_buffer *cmd,
364 struct tu_cs *cs,
365 enum vgt_event_type event,
366 bool need_seqno)
367 {
368 unsigned seqno = 0;
369
370 tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, need_seqno ? 4 : 1);
371 tu_cs_emit(cs, CP_EVENT_WRITE_0_EVENT(event));
372 if (need_seqno) {
373 tu_cs_emit_qw(cs, cmd->scratch_bo.iova);
374 seqno = ++cmd->scratch_seqno;
375 tu_cs_emit(cs, seqno);
376 }
377
378 return seqno;
379 }
380
381 static void
382 tu6_emit_cache_flush(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
383 {
384 tu6_emit_event_write(cmd, cs, 0x31, false);
385 }
386
387 static void
388 tu6_emit_lrz_flush(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
389 {
390 tu6_emit_event_write(cmd, cs, LRZ_FLUSH, false);
391 }
392
393 static void
394 tu6_emit_wfi(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
395 {
396 if (cmd->wait_for_idle) {
397 tu_cs_emit_wfi(cs);
398 cmd->wait_for_idle = false;
399 }
400 }
401
402 static void
403 tu6_emit_flag_buffer(struct tu_cs *cs, const struct tu_image_view *iview)
404 {
405 uint64_t va = tu_image_ubwc_base(iview->image, iview->base_mip, iview->base_layer);
406 uint32_t pitch = tu_image_ubwc_pitch(iview->image, iview->base_mip);
407 uint32_t size = tu_image_ubwc_size(iview->image, iview->base_mip);
408 if (iview->image->layout.ubwc_size) {
409 tu_cs_emit_qw(cs, va);
410 tu_cs_emit(cs, A6XX_RB_DEPTH_FLAG_BUFFER_PITCH_PITCH(pitch) |
411 A6XX_RB_DEPTH_FLAG_BUFFER_PITCH_ARRAY_PITCH(size >> 2));
412 } else {
413 tu_cs_emit_qw(cs, 0);
414 tu_cs_emit(cs, 0);
415 }
416 }
417
418 static void
419 tu6_emit_zs(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
420 {
421 const struct tu_framebuffer *fb = cmd->state.framebuffer;
422 const struct tu_subpass *subpass = cmd->state.subpass;
423 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
424
425 const uint32_t a = subpass->depth_stencil_attachment.attachment;
426 if (a == VK_ATTACHMENT_UNUSED) {
427 tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6);
428 tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE));
429 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */
430 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */
431 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */
432 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
433 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_BASE_GMEM */
434
435 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
436 tu_cs_emit(cs,
437 A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE));
438
439 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_BUFFER_BASE_LO, 5);
440 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
441 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
442 tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
443 tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */
444 tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI */
445
446 tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCIL_INFO, 1);
447 tu_cs_emit(cs, 0x00000000); /* RB_STENCIL_INFO */
448
449 return;
450 }
451
452 const struct tu_image_view *iview = fb->attachments[a].attachment;
453 enum a6xx_depth_format fmt = tu6_pipe2depth(iview->vk_format);
454
455 tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6);
456 tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
457 tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_PITCH(tu_image_stride(iview->image, iview->base_mip)));
458 tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(iview->image->layout.layer_size));
459 tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer));
460 tu_cs_emit(cs, tiling->gmem_offsets[subpass->color_count]);
461
462 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
463 tu_cs_emit(cs, A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
464
465 tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
466 tu6_emit_flag_buffer(cs, iview);
467
468 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_BUFFER_BASE_LO, 5);
469 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
470 tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
471 tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
472 tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */
473 tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI */
474
475 tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCIL_INFO, 1);
476 tu_cs_emit(cs, 0x00000000); /* RB_STENCIL_INFO */
477
478 /* enable zs? */
479 }
480
481 static void
482 tu6_emit_mrt(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
483 {
484 const struct tu_framebuffer *fb = cmd->state.framebuffer;
485 const struct tu_subpass *subpass = cmd->state.subpass;
486 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
487 unsigned char mrt_comp[MAX_RTS] = { 0 };
488 unsigned srgb_cntl = 0;
489
490 for (uint32_t i = 0; i < subpass->color_count; ++i) {
491 uint32_t a = subpass->color_attachments[i].attachment;
492 if (a == VK_ATTACHMENT_UNUSED)
493 continue;
494
495 const struct tu_image_view *iview = fb->attachments[a].attachment;
496 const enum a6xx_tile_mode tile_mode =
497 tu6_get_image_tile_mode(iview->image, iview->base_mip);
498
499 mrt_comp[i] = 0xf;
500
501 if (vk_format_is_srgb(iview->vk_format))
502 srgb_cntl |= (1 << i);
503
504 const struct tu_native_format *format =
505 tu6_get_native_format(iview->vk_format);
506 assert(format && format->rb >= 0);
507
508 tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(i), 6);
509 tu_cs_emit(cs, A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format->rb) |
510 A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
511 A6XX_RB_MRT_BUF_INFO_COLOR_SWAP(format->swap));
512 tu_cs_emit(cs, A6XX_RB_MRT_PITCH(tu_image_stride(iview->image, iview->base_mip)));
513 tu_cs_emit(cs, A6XX_RB_MRT_ARRAY_PITCH(iview->image->layout.layer_size));
514 tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer));
515 tu_cs_emit(
516 cs, tiling->gmem_offsets[i]); /* RB_MRT[i].BASE_GMEM */
517
518 tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_MRT_REG(i), 1);
519 tu_cs_emit(cs, A6XX_SP_FS_MRT_REG_COLOR_FORMAT(format->rb) |
520 COND(vk_format_is_sint(iview->vk_format), A6XX_SP_FS_MRT_REG_COLOR_SINT) |
521 COND(vk_format_is_uint(iview->vk_format), A6XX_SP_FS_MRT_REG_COLOR_UINT));
522
523 tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 3);
524 tu6_emit_flag_buffer(cs, iview);
525 }
526
527 tu_cs_emit_pkt4(cs, REG_A6XX_RB_SRGB_CNTL, 1);
528 tu_cs_emit(cs, srgb_cntl);
529
530 tu_cs_emit_pkt4(cs, REG_A6XX_SP_SRGB_CNTL, 1);
531 tu_cs_emit(cs, srgb_cntl);
532
533 tu_cs_emit_pkt4(cs, REG_A6XX_RB_RENDER_COMPONENTS, 1);
534 tu_cs_emit(cs, A6XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
535 A6XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
536 A6XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
537 A6XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
538 A6XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
539 A6XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
540 A6XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
541 A6XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
542
543 tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_RENDER_COMPONENTS, 1);
544 tu_cs_emit(cs, A6XX_SP_FS_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
545 A6XX_SP_FS_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
546 A6XX_SP_FS_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
547 A6XX_SP_FS_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
548 A6XX_SP_FS_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
549 A6XX_SP_FS_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
550 A6XX_SP_FS_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
551 A6XX_SP_FS_RENDER_COMPONENTS_RT7(mrt_comp[7]));
552 }
553
554 static void
555 tu6_emit_msaa(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
556 {
557 const struct tu_subpass *subpass = cmd->state.subpass;
558 const enum a3xx_msaa_samples samples =
559 tu_msaa_samples(subpass->max_sample_count);
560
561 tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_RAS_MSAA_CNTL, 2);
562 tu_cs_emit(cs, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(samples));
563 tu_cs_emit(cs, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
564 COND(samples == MSAA_ONE, A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
565
566 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_RAS_MSAA_CNTL, 2);
567 tu_cs_emit(cs, A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(samples));
568 tu_cs_emit(cs, A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(samples) |
569 COND(samples == MSAA_ONE, A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE));
570
571 tu_cs_emit_pkt4(cs, REG_A6XX_RB_RAS_MSAA_CNTL, 2);
572 tu_cs_emit(cs, A6XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
573 tu_cs_emit(cs, A6XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
574 COND(samples == MSAA_ONE, A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
575
576 tu_cs_emit_pkt4(cs, REG_A6XX_RB_MSAA_CNTL, 1);
577 tu_cs_emit(cs, A6XX_RB_MSAA_CNTL_SAMPLES(samples));
578 }
579
580 static void
581 tu6_emit_bin_size(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t flags)
582 {
583 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
584 const uint32_t bin_w = tiling->tile0.extent.width;
585 const uint32_t bin_h = tiling->tile0.extent.height;
586
587 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_BIN_CONTROL, 1);
588 tu_cs_emit(cs, A6XX_GRAS_BIN_CONTROL_BINW(bin_w) |
589 A6XX_GRAS_BIN_CONTROL_BINH(bin_h) | flags);
590
591 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BIN_CONTROL, 1);
592 tu_cs_emit(cs, A6XX_RB_BIN_CONTROL_BINW(bin_w) |
593 A6XX_RB_BIN_CONTROL_BINH(bin_h) | flags);
594
595 /* no flag for RB_BIN_CONTROL2... */
596 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BIN_CONTROL2, 1);
597 tu_cs_emit(cs, A6XX_RB_BIN_CONTROL2_BINW(bin_w) |
598 A6XX_RB_BIN_CONTROL2_BINH(bin_h));
599 }
600
601 static void
602 tu6_emit_render_cntl(struct tu_cmd_buffer *cmd,
603 struct tu_cs *cs,
604 bool binning)
605 {
606 uint32_t cntl = 0;
607 cntl |= A6XX_RB_RENDER_CNTL_UNK4;
608 if (binning)
609 cntl |= A6XX_RB_RENDER_CNTL_BINNING;
610
611 tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3);
612 tu_cs_emit(cs, 0x2);
613 tu_cs_emit(cs, REG_A6XX_RB_RENDER_CNTL);
614 tu_cs_emit(cs, cntl);
615 }
616
617 static void
618 tu6_emit_blit_scissor(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
619 {
620 const VkRect2D *render_area = &cmd->state.tiling_config.render_area;
621 const uint32_t x1 = render_area->offset.x;
622 const uint32_t y1 = render_area->offset.y;
623 const uint32_t x2 = x1 + render_area->extent.width - 1;
624 const uint32_t y2 = y1 + render_area->extent.height - 1;
625
626 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
627 tu_cs_emit(cs,
628 A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1));
629 tu_cs_emit(cs,
630 A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2));
631 }
632
633 static void
634 tu6_emit_blit_info(struct tu_cmd_buffer *cmd,
635 struct tu_cs *cs,
636 const struct tu_image_view *iview,
637 uint32_t gmem_offset,
638 uint32_t blit_info)
639 {
640 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1);
641 tu_cs_emit(cs, blit_info);
642
643 const struct tu_native_format *format =
644 tu6_get_native_format(iview->vk_format);
645 assert(format && format->rb >= 0);
646
647 enum a6xx_tile_mode tile_mode =
648 tu6_get_image_tile_mode(iview->image, iview->base_mip);
649 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 5);
650 tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_TILE_MODE(tile_mode) |
651 A6XX_RB_BLIT_DST_INFO_SAMPLES(tu_msaa_samples(iview->image->samples)) |
652 A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format->rb) |
653 A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(format->swap) |
654 COND(iview->image->layout.ubwc_size,
655 A6XX_RB_BLIT_DST_INFO_FLAGS));
656 tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer));
657 tu_cs_emit(cs, A6XX_RB_BLIT_DST_PITCH(tu_image_stride(iview->image, iview->base_mip)));
658 tu_cs_emit(cs, A6XX_RB_BLIT_DST_ARRAY_PITCH(iview->image->layout.layer_size));
659
660 if (iview->image->layout.ubwc_size) {
661 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST_LO, 3);
662 tu6_emit_flag_buffer(cs, iview);
663 }
664
665 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
666 tu_cs_emit(cs, gmem_offset);
667 }
668
669 static void
670 tu6_emit_blit_clear(struct tu_cmd_buffer *cmd,
671 struct tu_cs *cs,
672 const struct tu_image_view *iview,
673 uint32_t gmem_offset,
674 const VkClearValue *clear_value)
675 {
676 const struct tu_native_format *format =
677 tu6_get_native_format(iview->vk_format);
678 assert(format && format->rb >= 0);
679
680 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1);
681 tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format->rb));
682
683 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1);
684 tu_cs_emit(cs, A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf));
685
686 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
687 tu_cs_emit(cs, gmem_offset);
688
689 tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1);
690 tu_cs_emit(cs, 0);
691
692 uint32_t clear_vals[4] = { 0 };
693 tu_pack_clear_value(clear_value, iview->vk_format, clear_vals);
694
695 tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
696 tu_cs_emit(cs, clear_vals[0]);
697 tu_cs_emit(cs, clear_vals[1]);
698 tu_cs_emit(cs, clear_vals[2]);
699 tu_cs_emit(cs, clear_vals[3]);
700 }
701
702 static void
703 tu6_emit_blit(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
704 {
705 tu6_emit_marker(cmd, cs);
706 tu6_emit_event_write(cmd, cs, BLIT, false);
707 tu6_emit_marker(cmd, cs);
708 }
709
710 static void
711 tu6_emit_window_scissor(struct tu_cmd_buffer *cmd,
712 struct tu_cs *cs,
713 uint32_t x1,
714 uint32_t y1,
715 uint32_t x2,
716 uint32_t y2)
717 {
718 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
719 tu_cs_emit(cs, A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
720 A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
721 tu_cs_emit(cs, A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
722 A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
723
724 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_RESOLVE_CNTL_1, 2);
725 tu_cs_emit(
726 cs, A6XX_GRAS_RESOLVE_CNTL_1_X(x1) | A6XX_GRAS_RESOLVE_CNTL_1_Y(y1));
727 tu_cs_emit(
728 cs, A6XX_GRAS_RESOLVE_CNTL_2_X(x2) | A6XX_GRAS_RESOLVE_CNTL_2_Y(y2));
729 }
730
731 static void
732 tu6_emit_window_offset(struct tu_cmd_buffer *cmd,
733 struct tu_cs *cs,
734 uint32_t x1,
735 uint32_t y1)
736 {
737 tu_cs_emit_pkt4(cs, REG_A6XX_RB_WINDOW_OFFSET, 1);
738 tu_cs_emit(cs, A6XX_RB_WINDOW_OFFSET_X(x1) | A6XX_RB_WINDOW_OFFSET_Y(y1));
739
740 tu_cs_emit_pkt4(cs, REG_A6XX_RB_WINDOW_OFFSET2, 1);
741 tu_cs_emit(cs,
742 A6XX_RB_WINDOW_OFFSET2_X(x1) | A6XX_RB_WINDOW_OFFSET2_Y(y1));
743
744 tu_cs_emit_pkt4(cs, REG_A6XX_SP_WINDOW_OFFSET, 1);
745 tu_cs_emit(cs, A6XX_SP_WINDOW_OFFSET_X(x1) | A6XX_SP_WINDOW_OFFSET_Y(y1));
746
747 tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
748 tu_cs_emit(
749 cs, A6XX_SP_TP_WINDOW_OFFSET_X(x1) | A6XX_SP_TP_WINDOW_OFFSET_Y(y1));
750 }
751
752 static bool
753 use_hw_binning(struct tu_cmd_buffer *cmd)
754 {
755 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
756
757 if (unlikely(cmd->device->physical_device->instance->debug_flags & TU_DEBUG_NOBIN))
758 return false;
759
760 return (tiling->tile_count.width * tiling->tile_count.height) > 2;
761 }
762
763 static void
764 tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
765 struct tu_cs *cs,
766 const struct tu_tile *tile)
767 {
768 tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
769 tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(0x7));
770
771 tu6_emit_marker(cmd, cs);
772 tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
773 tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_GMEM) | 0x10);
774 tu6_emit_marker(cmd, cs);
775
776 const uint32_t x1 = tile->begin.x;
777 const uint32_t y1 = tile->begin.y;
778 const uint32_t x2 = tile->end.x - 1;
779 const uint32_t y2 = tile->end.y - 1;
780 tu6_emit_window_scissor(cmd, cs, x1, y1, x2, y2);
781 tu6_emit_window_offset(cmd, cs, x1, y1);
782
783 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_OVERRIDE, 1);
784 tu_cs_emit(cs, A6XX_VPC_SO_OVERRIDE_SO_DISABLE);
785
786 if (use_hw_binning(cmd)) {
787 tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
788
789 tu_cs_emit_pkt7(cs, CP_SET_MODE, 1);
790 tu_cs_emit(cs, 0x0);
791
792 tu_cs_emit_pkt7(cs, CP_REG_TEST, 1);
793 tu_cs_emit(cs, A6XX_CP_REG_TEST_0_REG(OVERFLOW_FLAG_REG) |
794 A6XX_CP_REG_TEST_0_BIT(0) |
795 A6XX_CP_REG_TEST_0_UNK25);
796
797 tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
798 tu_cs_emit(cs, 0x10000000);
799 tu_cs_emit(cs, 11); /* conditionally execute next 11 dwords */
800
801 /* if (no overflow) */ {
802 tu_cs_emit_pkt7(cs, CP_SET_BIN_DATA5, 7);
803 tu_cs_emit(cs, cmd->state.tiling_config.pipe_sizes[tile->pipe] |
804 CP_SET_BIN_DATA5_0_VSC_N(tile->slot));
805 tu_cs_emit_qw(cs, cmd->vsc_data.iova + tile->pipe * cmd->vsc_data_pitch);
806 tu_cs_emit_qw(cs, cmd->vsc_data.iova + (tile->pipe * 4) + (32 * cmd->vsc_data_pitch));
807 tu_cs_emit_qw(cs, cmd->vsc_data2.iova + (tile->pipe * cmd->vsc_data2_pitch));
808
809 tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
810 tu_cs_emit(cs, 0x0);
811
812 /* use a NOP packet to skip over the 'else' side: */
813 tu_cs_emit_pkt7(cs, CP_NOP, 2);
814 } /* else */ {
815 tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
816 tu_cs_emit(cs, 0x1);
817 }
818
819 tu_cs_emit_pkt7(cs, CP_SET_MODE, 1);
820 tu_cs_emit(cs, 0x0);
821
822 tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8804, 1);
823 tu_cs_emit(cs, 0x0);
824
825 tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_UNKNOWN_B304, 1);
826 tu_cs_emit(cs, 0x0);
827
828 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_80A4, 1);
829 tu_cs_emit(cs, 0x0);
830 } else {
831 tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
832 tu_cs_emit(cs, 0x1);
833
834 tu_cs_emit_pkt7(cs, CP_SET_MODE, 1);
835 tu_cs_emit(cs, 0x0);
836 }
837 }
838
839 static void
840 tu6_emit_tile_load_attachment(struct tu_cmd_buffer *cmd,
841 struct tu_cs *cs,
842 uint32_t a,
843 uint32_t gmem_index)
844 {
845 const struct tu_framebuffer *fb = cmd->state.framebuffer;
846 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
847 const struct tu_attachment_state *attachments = cmd->state.attachments;
848
849 const struct tu_image_view *iview = fb->attachments[a].attachment;
850 const struct tu_attachment_state *att = attachments + a;
851 if (att->pending_clear_aspects) {
852 tu6_emit_blit_clear(cmd, cs, iview,
853 tiling->gmem_offsets[gmem_index],
854 &att->clear_value);
855 } else {
856 tu6_emit_blit_info(cmd, cs, iview,
857 tiling->gmem_offsets[gmem_index],
858 A6XX_RB_BLIT_INFO_UNK0 | A6XX_RB_BLIT_INFO_GMEM);
859 }
860
861 tu6_emit_blit(cmd, cs);
862 }
863
864 static void
865 tu6_emit_tile_load(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
866 {
867 const struct tu_subpass *subpass = cmd->state.subpass;
868
869 tu6_emit_blit_scissor(cmd, cs);
870
871 for (uint32_t i = 0; i < subpass->color_count; ++i) {
872 const uint32_t a = subpass->color_attachments[i].attachment;
873 if (a != VK_ATTACHMENT_UNUSED)
874 tu6_emit_tile_load_attachment(cmd, cs, a, i);
875 }
876
877 const uint32_t a = subpass->depth_stencil_attachment.attachment;
878 if (a != VK_ATTACHMENT_UNUSED)
879 tu6_emit_tile_load_attachment(cmd, cs, a, subpass->color_count);
880 }
881
882 static void
883 tu6_emit_store_attachment(struct tu_cmd_buffer *cmd,
884 struct tu_cs *cs,
885 uint32_t a,
886 uint32_t gmem_index)
887 {
888 const struct tu_framebuffer *fb = cmd->state.framebuffer;
889 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
890
891 if (a == VK_ATTACHMENT_UNUSED)
892 return;
893
894 tu6_emit_blit_info(cmd, cs, fb->attachments[a].attachment,
895 tiling->gmem_offsets[gmem_index], 0);
896 tu6_emit_blit(cmd, cs);
897 }
898
899 static void
900 tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
901 {
902 const struct tu_subpass *subpass = cmd->state.subpass;
903
904 tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
905 tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(0) |
906 CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
907 CP_SET_DRAW_STATE__0_GROUP_ID(0));
908 tu_cs_emit(cs, CP_SET_DRAW_STATE__1_ADDR_LO(0));
909 tu_cs_emit(cs, CP_SET_DRAW_STATE__2_ADDR_HI(0));
910
911 tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
912 tu_cs_emit(cs, 0x0);
913
914 tu6_emit_marker(cmd, cs);
915 tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
916 tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10);
917 tu6_emit_marker(cmd, cs);
918
919 tu6_emit_blit_scissor(cmd, cs);
920
921 for (uint32_t i = 0; i < subpass->color_count; ++i) {
922 tu6_emit_store_attachment(cmd, cs,
923 subpass->color_attachments[i].attachment,
924 i);
925 if (subpass->resolve_attachments) {
926 tu6_emit_store_attachment(cmd, cs,
927 subpass->resolve_attachments[i].attachment,
928 i);
929 }
930 }
931
932 tu6_emit_store_attachment(cmd, cs,
933 subpass->depth_stencil_attachment.attachment,
934 subpass->color_count);
935 }
936
937 static void
938 tu6_emit_restart_index(struct tu_cs *cs, uint32_t restart_index)
939 {
940 tu_cs_emit_pkt4(cs, REG_A6XX_PC_RESTART_INDEX, 1);
941 tu_cs_emit(cs, restart_index);
942 }
943
944 static void
945 tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
946 {
947 VkResult result = tu_cs_reserve_space(cmd->device, cs, 256);
948 if (result != VK_SUCCESS) {
949 cmd->record_result = result;
950 return;
951 }
952
953 tu6_emit_cache_flush(cmd, cs);
954
955 tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 0xfffff);
956
957 tu_cs_emit_write_reg(cs, REG_A6XX_RB_CCU_CNTL, 0x7c400004);
958 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8E04, 0x00100000);
959 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE04, 0x8);
960 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE00, 0);
961 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE0F, 0x3f);
962 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B605, 0x44);
963 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B600, 0x100000);
964 tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE00, 0x80);
965 tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE01, 0);
966
967 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9600, 0);
968 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8600, 0x880);
969 tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE04, 0);
970 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE03, 0x00000410);
971 tu_cs_emit_write_reg(cs, REG_A6XX_SP_IBO_COUNT, 0);
972 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B182, 0);
973 tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BB11, 0);
974 tu_cs_emit_write_reg(cs, REG_A6XX_UCHE_UNKNOWN_0E12, 0x3200000);
975 tu_cs_emit_write_reg(cs, REG_A6XX_UCHE_CLIENT_PF, 4);
976 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8E01, 0x0);
977 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AB00, 0x5);
978 tu_cs_emit_write_reg(cs, REG_A6XX_VFD_UNKNOWN_A009, 0x00000001);
979 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8811, 0x00000010);
980 tu_cs_emit_write_reg(cs, REG_A6XX_PC_MODE_CNTL, 0x1f);
981
982 tu_cs_emit_write_reg(cs, REG_A6XX_RB_SRGB_CNTL, 0);
983
984 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8101, 0);
985 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SAMPLE_CNTL, 0);
986 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8110, 0);
987
988 tu_cs_emit_write_reg(cs, REG_A6XX_RB_RENDER_CONTROL0, 0x401);
989 tu_cs_emit_write_reg(cs, REG_A6XX_RB_RENDER_CONTROL1, 0);
990 tu_cs_emit_write_reg(cs, REG_A6XX_RB_FS_OUTPUT_CNTL0, 0);
991 tu_cs_emit_write_reg(cs, REG_A6XX_RB_SAMPLE_CNTL, 0);
992 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8818, 0);
993 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8819, 0);
994 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881A, 0);
995 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881B, 0);
996 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881C, 0);
997 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881D, 0);
998 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881E, 0);
999 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_88F0, 0);
1000
1001 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9101, 0xffff00);
1002 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9107, 0);
1003
1004 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9236, 1);
1005 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9300, 0);
1006
1007 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_SO_OVERRIDE,
1008 A6XX_VPC_SO_OVERRIDE_SO_DISABLE);
1009
1010 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9801, 0);
1011 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9806, 0);
1012 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9980, 0);
1013
1014 tu_cs_emit_write_reg(cs, REG_A6XX_PC_PRIMITIVE_CNTL_6, 0);
1015 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9B07, 0);
1016
1017 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A81B, 0);
1018
1019 tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B183, 0);
1020
1021 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8099, 0);
1022 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_809B, 0);
1023 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A0, 2);
1024 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80AF, 0);
1025 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9210, 0);
1026 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9211, 0);
1027 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9602, 0);
1028 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9981, 0x3);
1029 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9E72, 0);
1030 tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9108, 0x3);
1031 tu_cs_emit_write_reg(cs, REG_A6XX_SP_TP_UNKNOWN_B304, 0);
1032 tu_cs_emit_write_reg(cs, REG_A6XX_SP_TP_UNKNOWN_B309, 0x000000a2);
1033 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8804, 0);
1034 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A4, 0);
1035 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A5, 0);
1036 tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A6, 0);
1037 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8805, 0);
1038 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8806, 0);
1039 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8878, 0);
1040 tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8879, 0);
1041 tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_CONTROL_5_REG, 0xfc);
1042
1043 tu6_emit_marker(cmd, cs);
1044
1045 tu_cs_emit_write_reg(cs, REG_A6XX_VFD_MODE_CNTL, 0x00000000);
1046
1047 tu_cs_emit_write_reg(cs, REG_A6XX_VFD_UNKNOWN_A008, 0);
1048
1049 tu_cs_emit_write_reg(cs, REG_A6XX_PC_MODE_CNTL, 0x0000001f);
1050
1051 /* we don't use this yet.. probably best to disable.. */
1052 tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
1053 tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(0) |
1054 CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
1055 CP_SET_DRAW_STATE__0_GROUP_ID(0));
1056 tu_cs_emit(cs, CP_SET_DRAW_STATE__1_ADDR_LO(0));
1057 tu_cs_emit(cs, CP_SET_DRAW_STATE__2_ADDR_HI(0));
1058
1059 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_BASE_LO(0), 3);
1060 tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUFFER_BASE_LO_0 */
1061 tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUFFER_BASE_HI_0 */
1062 tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUFFER_SIZE_0 */
1063
1064 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_FLUSH_BASE_LO(0), 2);
1065 tu_cs_emit(cs, 0x00000000); /* VPC_SO_FLUSH_BASE_LO_0 */
1066 tu_cs_emit(cs, 0x00000000); /* VPC_SO_FLUSH_BASE_HI_0 */
1067
1068 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUF_CNTL, 1);
1069 tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUF_CNTL */
1070
1071 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(0), 1);
1072 tu_cs_emit(cs, 0x00000000); /* UNKNOWN_E2AB */
1073
1074 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_BASE_LO(1), 3);
1075 tu_cs_emit(cs, 0x00000000);
1076 tu_cs_emit(cs, 0x00000000);
1077 tu_cs_emit(cs, 0x00000000);
1078
1079 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(1), 6);
1080 tu_cs_emit(cs, 0x00000000);
1081 tu_cs_emit(cs, 0x00000000);
1082 tu_cs_emit(cs, 0x00000000);
1083 tu_cs_emit(cs, 0x00000000);
1084 tu_cs_emit(cs, 0x00000000);
1085 tu_cs_emit(cs, 0x00000000);
1086
1087 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(2), 6);
1088 tu_cs_emit(cs, 0x00000000);
1089 tu_cs_emit(cs, 0x00000000);
1090 tu_cs_emit(cs, 0x00000000);
1091 tu_cs_emit(cs, 0x00000000);
1092 tu_cs_emit(cs, 0x00000000);
1093 tu_cs_emit(cs, 0x00000000);
1094
1095 tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(3), 3);
1096 tu_cs_emit(cs, 0x00000000);
1097 tu_cs_emit(cs, 0x00000000);
1098 tu_cs_emit(cs, 0x00000000);
1099
1100 tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_CTRL_REG0, 1);
1101 tu_cs_emit(cs, 0x00000000);
1102
1103 tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_CTRL_REG0, 1);
1104 tu_cs_emit(cs, 0x00000000);
1105
1106 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_CNTL, 1);
1107 tu_cs_emit(cs, 0x00000000);
1108
1109 tu_cs_emit_pkt4(cs, REG_A6XX_RB_LRZ_CNTL, 1);
1110 tu_cs_emit(cs, 0x00000000);
1111
1112 tu_cs_sanity_check(cs);
1113 }
1114
1115 static void
1116 tu6_cache_flush(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
1117 {
1118 unsigned seqno;
1119
1120 seqno = tu6_emit_event_write(cmd, cs, CACHE_FLUSH_AND_INV_EVENT, true);
1121
1122 tu_cs_emit_pkt7(cs, CP_WAIT_REG_MEM, 6);
1123 tu_cs_emit(cs, 0x00000013);
1124 tu_cs_emit_qw(cs, cmd->scratch_bo.iova);
1125 tu_cs_emit(cs, seqno);
1126 tu_cs_emit(cs, 0xffffffff);
1127 tu_cs_emit(cs, 0x00000010);
1128
1129 seqno = tu6_emit_event_write(cmd, cs, CACHE_FLUSH_TS, true);
1130
1131 tu_cs_emit_pkt7(cs, CP_UNK_A6XX_14, 4);
1132 tu_cs_emit(cs, 0x00000000);
1133 tu_cs_emit_qw(cs, cmd->scratch_bo.iova);
1134 tu_cs_emit(cs, seqno);
1135 }
1136
1137 static void
1138 update_vsc_pipe(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
1139 {
1140 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
1141
1142 tu_cs_emit_pkt4(cs, REG_A6XX_VSC_BIN_SIZE, 3);
1143 tu_cs_emit(cs, A6XX_VSC_BIN_SIZE_WIDTH(tiling->tile0.extent.width) |
1144 A6XX_VSC_BIN_SIZE_HEIGHT(tiling->tile0.extent.height));
1145 tu_cs_emit_qw(cs, cmd->vsc_data.iova + 32 * cmd->vsc_data_pitch); /* VSC_SIZE_ADDRESS_LO/HI */
1146
1147 tu_cs_emit_pkt4(cs, REG_A6XX_VSC_BIN_COUNT, 1);
1148 tu_cs_emit(cs, A6XX_VSC_BIN_COUNT_NX(tiling->tile_count.width) |
1149 A6XX_VSC_BIN_COUNT_NY(tiling->tile_count.height));
1150
1151 tu_cs_emit_pkt4(cs, REG_A6XX_VSC_PIPE_CONFIG_REG(0), 32);
1152 for (unsigned i = 0; i < 32; i++)
1153 tu_cs_emit(cs, tiling->pipe_config[i]);
1154
1155 tu_cs_emit_pkt4(cs, REG_A6XX_VSC_PIPE_DATA2_ADDRESS_LO, 4);
1156 tu_cs_emit_qw(cs, cmd->vsc_data2.iova);
1157 tu_cs_emit(cs, cmd->vsc_data2_pitch);
1158 tu_cs_emit(cs, cmd->vsc_data2.size);
1159
1160 tu_cs_emit_pkt4(cs, REG_A6XX_VSC_PIPE_DATA_ADDRESS_LO, 4);
1161 tu_cs_emit_qw(cs, cmd->vsc_data.iova);
1162 tu_cs_emit(cs, cmd->vsc_data_pitch);
1163 tu_cs_emit(cs, cmd->vsc_data.size);
1164 }
1165
1166 static void
1167 emit_vsc_overflow_test(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
1168 {
1169 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
1170 const uint32_t used_pipe_count =
1171 tiling->pipe_count.width * tiling->pipe_count.height;
1172
1173 /* Clear vsc_scratch: */
1174 tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 3);
1175 tu_cs_emit_qw(cs, cmd->scratch_bo.iova + VSC_SCRATCH);
1176 tu_cs_emit(cs, 0x0);
1177
1178 /* Check for overflow, write vsc_scratch if detected: */
1179 for (int i = 0; i < used_pipe_count; i++) {
1180 tu_cs_emit_pkt7(cs, CP_COND_WRITE5, 8);
1181 tu_cs_emit(cs, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
1182 CP_COND_WRITE5_0_WRITE_MEMORY);
1183 tu_cs_emit(cs, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_SIZE_REG(i)));
1184 tu_cs_emit(cs, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
1185 tu_cs_emit(cs, CP_COND_WRITE5_3_REF(cmd->vsc_data_pitch));
1186 tu_cs_emit(cs, CP_COND_WRITE5_4_MASK(~0));
1187 tu_cs_emit_qw(cs, cmd->scratch_bo.iova + VSC_SCRATCH);
1188 tu_cs_emit(cs, CP_COND_WRITE5_7_WRITE_DATA(1 + cmd->vsc_data_pitch));
1189
1190 tu_cs_emit_pkt7(cs, CP_COND_WRITE5, 8);
1191 tu_cs_emit(cs, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
1192 CP_COND_WRITE5_0_WRITE_MEMORY);
1193 tu_cs_emit(cs, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_SIZE2_REG(i)));
1194 tu_cs_emit(cs, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
1195 tu_cs_emit(cs, CP_COND_WRITE5_3_REF(cmd->vsc_data2_pitch));
1196 tu_cs_emit(cs, CP_COND_WRITE5_4_MASK(~0));
1197 tu_cs_emit_qw(cs, cmd->scratch_bo.iova + VSC_SCRATCH);
1198 tu_cs_emit(cs, CP_COND_WRITE5_7_WRITE_DATA(3 + cmd->vsc_data2_pitch));
1199 }
1200
1201 tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
1202
1203 tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
1204
1205 tu_cs_emit_pkt7(cs, CP_MEM_TO_REG, 3);
1206 tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(OVERFLOW_FLAG_REG) |
1207 CP_MEM_TO_REG_0_CNT(1 - 1));
1208 tu_cs_emit_qw(cs, cmd->scratch_bo.iova + VSC_SCRATCH);
1209
1210 /*
1211 * This is a bit awkward, we really want a way to invert the
1212 * CP_REG_TEST/CP_COND_REG_EXEC logic, so that we can conditionally
1213 * execute cmds to use hwbinning when a bit is *not* set. This
1214 * dance is to invert OVERFLOW_FLAG_REG
1215 *
1216 * A CP_NOP packet is used to skip executing the 'else' clause
1217 * if (b0 set)..
1218 */
1219
1220 /* b0 will be set if VSC_DATA or VSC_DATA2 overflow: */
1221 tu_cs_emit_pkt7(cs, CP_REG_TEST, 1);
1222 tu_cs_emit(cs, A6XX_CP_REG_TEST_0_REG(OVERFLOW_FLAG_REG) |
1223 A6XX_CP_REG_TEST_0_BIT(0) |
1224 A6XX_CP_REG_TEST_0_UNK25);
1225
1226 tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
1227 tu_cs_emit(cs, 0x10000000);
1228 tu_cs_emit(cs, 7); /* conditionally execute next 7 dwords */
1229
1230 /* if (b0 set) */ {
1231 /*
1232 * On overflow, mirror the value to control->vsc_overflow
1233 * which CPU is checking to detect overflow (see
1234 * check_vsc_overflow())
1235 */
1236 tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
1237 tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(OVERFLOW_FLAG_REG) |
1238 CP_REG_TO_MEM_0_CNT(1 - 1));
1239 tu_cs_emit_qw(cs, cmd->scratch_bo.iova + VSC_OVERFLOW);
1240
1241 tu_cs_emit_pkt4(cs, OVERFLOW_FLAG_REG, 1);
1242 tu_cs_emit(cs, 0x0);
1243
1244 tu_cs_emit_pkt7(cs, CP_NOP, 2); /* skip 'else' when 'if' is taken */
1245 } /* else */ {
1246 tu_cs_emit_pkt4(cs, OVERFLOW_FLAG_REG, 1);
1247 tu_cs_emit(cs, 0x1);
1248 }
1249 }
1250
1251 static void
1252 tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
1253 {
1254 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
1255
1256 uint32_t x1 = tiling->tile0.offset.x;
1257 uint32_t y1 = tiling->tile0.offset.y;
1258 uint32_t x2 = tiling->render_area.offset.x + tiling->render_area.extent.width - 1;
1259 uint32_t y2 = tiling->render_area.offset.y + tiling->render_area.extent.height - 1;
1260
1261 tu6_emit_window_scissor(cmd, cs, x1, y1, x2, y2);
1262
1263 tu6_emit_marker(cmd, cs);
1264 tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
1265 tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BINNING));
1266 tu6_emit_marker(cmd, cs);
1267
1268 tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
1269 tu_cs_emit(cs, 0x1);
1270
1271 tu_cs_emit_pkt7(cs, CP_SET_MODE, 1);
1272 tu_cs_emit(cs, 0x1);
1273
1274 tu_cs_emit_wfi(cs);
1275
1276 tu_cs_emit_pkt4(cs, REG_A6XX_VFD_MODE_CNTL, 1);
1277 tu_cs_emit(cs, A6XX_VFD_MODE_CNTL_BINNING_PASS);
1278
1279 update_vsc_pipe(cmd, cs);
1280
1281 tu_cs_emit_pkt4(cs, REG_A6XX_PC_UNKNOWN_9805, 1);
1282 tu_cs_emit(cs, 0x1);
1283
1284 tu_cs_emit_pkt4(cs, REG_A6XX_SP_UNKNOWN_A0F8, 1);
1285 tu_cs_emit(cs, 0x1);
1286
1287 tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 1);
1288 tu_cs_emit(cs, UNK_2C);
1289
1290 tu_cs_emit_pkt4(cs, REG_A6XX_RB_WINDOW_OFFSET, 1);
1291 tu_cs_emit(cs, A6XX_RB_WINDOW_OFFSET_X(0) |
1292 A6XX_RB_WINDOW_OFFSET_Y(0));
1293
1294 tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
1295 tu_cs_emit(cs, A6XX_SP_TP_WINDOW_OFFSET_X(0) |
1296 A6XX_SP_TP_WINDOW_OFFSET_Y(0));
1297
1298 /* emit IB to binning drawcmds: */
1299 tu_cs_emit_call(cs, &cmd->draw_cs);
1300
1301 tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
1302 tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(0) |
1303 CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
1304 CP_SET_DRAW_STATE__0_GROUP_ID(0));
1305 tu_cs_emit(cs, CP_SET_DRAW_STATE__1_ADDR_LO(0));
1306 tu_cs_emit(cs, CP_SET_DRAW_STATE__2_ADDR_HI(0));
1307
1308 tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 1);
1309 tu_cs_emit(cs, UNK_2D);
1310
1311 tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE, false);
1312 tu6_cache_flush(cmd, cs);
1313
1314 tu_cs_emit_wfi(cs);
1315
1316 tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
1317
1318 emit_vsc_overflow_test(cmd, cs);
1319
1320 tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
1321 tu_cs_emit(cs, 0x0);
1322
1323 tu_cs_emit_pkt7(cs, CP_SET_MODE, 1);
1324 tu_cs_emit(cs, 0x0);
1325
1326 tu_cs_emit_wfi(cs);
1327
1328 tu_cs_emit_pkt4(cs, REG_A6XX_RB_CCU_CNTL, 1);
1329 tu_cs_emit(cs, 0x7c400004);
1330
1331 cmd->wait_for_idle = false;
1332 }
1333
1334 static void
1335 tu6_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
1336 {
1337 VkResult result = tu_cs_reserve_space(cmd->device, cs, 1024);
1338 if (result != VK_SUCCESS) {
1339 cmd->record_result = result;
1340 return;
1341 }
1342
1343 tu6_emit_lrz_flush(cmd, cs);
1344
1345 /* lrz clear? */
1346
1347 tu6_emit_cache_flush(cmd, cs);
1348
1349 tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
1350 tu_cs_emit(cs, 0x0);
1351
1352 /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
1353 tu6_emit_wfi(cmd, cs);
1354 tu_cs_emit_pkt4(cs, REG_A6XX_RB_CCU_CNTL, 1);
1355 tu_cs_emit(cs, 0x7c400004); /* RB_CCU_CNTL */
1356
1357 tu6_emit_zs(cmd, cs);
1358 tu6_emit_mrt(cmd, cs);
1359 tu6_emit_msaa(cmd, cs);
1360
1361 if (use_hw_binning(cmd)) {
1362 tu6_emit_bin_size(cmd, cs, A6XX_RB_BIN_CONTROL_BINNING_PASS | 0x6000000);
1363
1364 tu6_emit_render_cntl(cmd, cs, true);
1365
1366 tu6_emit_binning_pass(cmd, cs);
1367
1368 tu6_emit_bin_size(cmd, cs, A6XX_RB_BIN_CONTROL_USE_VIZ | 0x6000000);
1369
1370 tu_cs_emit_pkt4(cs, REG_A6XX_VFD_MODE_CNTL, 1);
1371 tu_cs_emit(cs, 0x0);
1372
1373 tu_cs_emit_pkt4(cs, REG_A6XX_PC_UNKNOWN_9805, 1);
1374 tu_cs_emit(cs, 0x1);
1375
1376 tu_cs_emit_pkt4(cs, REG_A6XX_SP_UNKNOWN_A0F8, 1);
1377 tu_cs_emit(cs, 0x1);
1378
1379 tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
1380 tu_cs_emit(cs, 0x1);
1381 } else {
1382 tu6_emit_bin_size(cmd, cs, 0x6000000);
1383 }
1384
1385 tu6_emit_render_cntl(cmd, cs, false);
1386
1387 tu_cs_sanity_check(cs);
1388 }
1389
1390 static void
1391 tu6_render_tile(struct tu_cmd_buffer *cmd,
1392 struct tu_cs *cs,
1393 const struct tu_tile *tile)
1394 {
1395 const uint32_t render_tile_space = 256 + tu_cs_get_call_size(&cmd->draw_cs);
1396 VkResult result = tu_cs_reserve_space(cmd->device, cs, render_tile_space);
1397 if (result != VK_SUCCESS) {
1398 cmd->record_result = result;
1399 return;
1400 }
1401
1402 tu6_emit_tile_select(cmd, cs, tile);
1403 tu_cs_emit_ib(cs, &cmd->state.tile_load_ib);
1404
1405 tu_cs_emit_call(cs, &cmd->draw_cs);
1406 cmd->wait_for_idle = true;
1407
1408 if (use_hw_binning(cmd)) {
1409 tu_cs_emit_pkt7(cs, CP_REG_TEST, 1);
1410 tu_cs_emit(cs, A6XX_CP_REG_TEST_0_REG(OVERFLOW_FLAG_REG) |
1411 A6XX_CP_REG_TEST_0_BIT(0) |
1412 A6XX_CP_REG_TEST_0_UNK25);
1413
1414 tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
1415 tu_cs_emit(cs, 0x10000000);
1416 tu_cs_emit(cs, 2); /* conditionally execute next 2 dwords */
1417
1418 /* if (no overflow) */ {
1419 tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
1420 tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(0x5) | 0x10);
1421 }
1422 }
1423
1424 tu_cs_emit_ib(cs, &cmd->state.tile_store_ib);
1425
1426 tu_cs_sanity_check(cs);
1427 }
1428
1429 static void
1430 tu6_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
1431 {
1432 VkResult result = tu_cs_reserve_space(cmd->device, cs, 16);
1433 if (result != VK_SUCCESS) {
1434 cmd->record_result = result;
1435 return;
1436 }
1437
1438 tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_CNTL, 1);
1439 tu_cs_emit(cs, A6XX_GRAS_LRZ_CNTL_ENABLE | A6XX_GRAS_LRZ_CNTL_UNK3);
1440
1441 tu6_emit_lrz_flush(cmd, cs);
1442
1443 tu6_emit_event_write(cmd, cs, CACHE_FLUSH_TS, true);
1444
1445 tu_cs_sanity_check(cs);
1446 }
1447
1448 static void
1449 tu_cmd_render_tiles(struct tu_cmd_buffer *cmd)
1450 {
1451 const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
1452
1453 tu6_render_begin(cmd, &cmd->cs);
1454
1455 for (uint32_t y = 0; y < tiling->tile_count.height; y++) {
1456 for (uint32_t x = 0; x < tiling->tile_count.width; x++) {
1457 struct tu_tile tile;
1458 tu_tiling_config_get_tile(tiling, cmd->device, x, y, &tile);
1459 tu6_render_tile(cmd, &cmd->cs, &tile);
1460 }
1461 }
1462
1463 tu6_render_end(cmd, &cmd->cs);
1464 }
1465
1466 static void
1467 tu_cmd_prepare_tile_load_ib(struct tu_cmd_buffer *cmd)
1468 {
1469 const uint32_t tile_load_space = 16 + 32 * MAX_RTS;
1470 const struct tu_subpass *subpass = cmd->state.subpass;
1471 struct tu_attachment_state *attachments = cmd->state.attachments;
1472 struct tu_cs sub_cs;
1473
1474 VkResult result = tu_cs_begin_sub_stream(cmd->device, &cmd->tile_cs,
1475 tile_load_space, &sub_cs);
1476 if (result != VK_SUCCESS) {
1477 cmd->record_result = result;
1478 return;
1479 }
1480
1481 /* emit to tile-load sub_cs */
1482 tu6_emit_tile_load(cmd, &sub_cs);
1483
1484 cmd->state.tile_load_ib = tu_cs_end_sub_stream(&cmd->tile_cs, &sub_cs);
1485
1486 for (uint32_t i = 0; i < subpass->color_count; ++i) {
1487 const uint32_t a = subpass->color_attachments[i].attachment;
1488 if (a != VK_ATTACHMENT_UNUSED)
1489 attachments[a].pending_clear_aspects = 0;
1490 }
1491 }
1492
1493 static void
1494 tu_cmd_prepare_tile_store_ib(struct tu_cmd_buffer *cmd)
1495 {
1496 const uint32_t tile_store_space = 32 + 32 * MAX_RTS;
1497 struct tu_cs sub_cs;
1498
1499 VkResult result = tu_cs_begin_sub_stream(cmd->device, &cmd->tile_cs,
1500 tile_store_space, &sub_cs);
1501 if (result != VK_SUCCESS) {
1502 cmd->record_result = result;
1503 return;
1504 }
1505
1506 /* emit to tile-store sub_cs */
1507 tu6_emit_tile_store(cmd, &sub_cs);
1508
1509 cmd->state.tile_store_ib = tu_cs_end_sub_stream(&cmd->tile_cs, &sub_cs);
1510 }
1511
1512 static void
1513 tu_cmd_update_tiling_config(struct tu_cmd_buffer *cmd,
1514 const VkRect2D *render_area)
1515 {
1516 const struct tu_device *dev = cmd->device;
1517 const struct tu_render_pass *pass = cmd->state.pass;
1518 const struct tu_subpass *subpass = cmd->state.subpass;
1519 struct tu_tiling_config *tiling = &cmd->state.tiling_config;
1520
1521 uint32_t buffer_cpp[MAX_RTS + 2];
1522 uint32_t buffer_count = 0;
1523
1524 for (uint32_t i = 0; i < subpass->color_count; ++i) {
1525 const uint32_t a = subpass->color_attachments[i].attachment;
1526 if (a == VK_ATTACHMENT_UNUSED) {
1527 buffer_cpp[buffer_count++] = 0;
1528 continue;
1529 }
1530
1531 const struct tu_render_pass_attachment *att = &pass->attachments[a];
1532 buffer_cpp[buffer_count++] =
1533 vk_format_get_blocksize(att->format) * att->samples;
1534 }
1535
1536 if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
1537 const uint32_t a = subpass->depth_stencil_attachment.attachment;
1538 const struct tu_render_pass_attachment *att = &pass->attachments[a];
1539
1540 /* TODO */
1541 assert(att->format != VK_FORMAT_D32_SFLOAT_S8_UINT);
1542
1543 buffer_cpp[buffer_count++] =
1544 vk_format_get_blocksize(att->format) * att->samples;
1545 }
1546
1547 tu_tiling_config_update(tiling, dev, buffer_cpp, buffer_count,
1548 render_area);
1549 }
1550
1551 const struct tu_dynamic_state default_dynamic_state = {
1552 .viewport =
1553 {
1554 .count = 0,
1555 },
1556 .scissor =
1557 {
1558 .count = 0,
1559 },
1560 .line_width = 1.0f,
1561 .depth_bias =
1562 {
1563 .bias = 0.0f,
1564 .clamp = 0.0f,
1565 .slope = 0.0f,
1566 },
1567 .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f },
1568 .depth_bounds =
1569 {
1570 .min = 0.0f,
1571 .max = 1.0f,
1572 },
1573 .stencil_compare_mask =
1574 {
1575 .front = ~0u,
1576 .back = ~0u,
1577 },
1578 .stencil_write_mask =
1579 {
1580 .front = ~0u,
1581 .back = ~0u,
1582 },
1583 .stencil_reference =
1584 {
1585 .front = 0u,
1586 .back = 0u,
1587 },
1588 };
1589
1590 static void UNUSED /* FINISHME */
1591 tu_bind_dynamic_state(struct tu_cmd_buffer *cmd_buffer,
1592 const struct tu_dynamic_state *src)
1593 {
1594 struct tu_dynamic_state *dest = &cmd_buffer->state.dynamic;
1595 uint32_t copy_mask = src->mask;
1596 uint32_t dest_mask = 0;
1597
1598 tu_use_args(cmd_buffer); /* FINISHME */
1599
1600 /* Make sure to copy the number of viewports/scissors because they can
1601 * only be specified at pipeline creation time.
1602 */
1603 dest->viewport.count = src->viewport.count;
1604 dest->scissor.count = src->scissor.count;
1605 dest->discard_rectangle.count = src->discard_rectangle.count;
1606
1607 if (copy_mask & TU_DYNAMIC_VIEWPORT) {
1608 if (memcmp(&dest->viewport.viewports, &src->viewport.viewports,
1609 src->viewport.count * sizeof(VkViewport))) {
1610 typed_memcpy(dest->viewport.viewports, src->viewport.viewports,
1611 src->viewport.count);
1612 dest_mask |= TU_DYNAMIC_VIEWPORT;
1613 }
1614 }
1615
1616 if (copy_mask & TU_DYNAMIC_SCISSOR) {
1617 if (memcmp(&dest->scissor.scissors, &src->scissor.scissors,
1618 src->scissor.count * sizeof(VkRect2D))) {
1619 typed_memcpy(dest->scissor.scissors, src->scissor.scissors,
1620 src->scissor.count);
1621 dest_mask |= TU_DYNAMIC_SCISSOR;
1622 }
1623 }
1624
1625 if (copy_mask & TU_DYNAMIC_LINE_WIDTH) {
1626 if (dest->line_width != src->line_width) {
1627 dest->line_width = src->line_width;
1628 dest_mask |= TU_DYNAMIC_LINE_WIDTH;
1629 }
1630 }
1631
1632 if (copy_mask & TU_DYNAMIC_DEPTH_BIAS) {
1633 if (memcmp(&dest->depth_bias, &src->depth_bias,
1634 sizeof(src->depth_bias))) {
1635 dest->depth_bias = src->depth_bias;
1636 dest_mask |= TU_DYNAMIC_DEPTH_BIAS;
1637 }
1638 }
1639
1640 if (copy_mask & TU_DYNAMIC_BLEND_CONSTANTS) {
1641 if (memcmp(&dest->blend_constants, &src->blend_constants,
1642 sizeof(src->blend_constants))) {
1643 typed_memcpy(dest->blend_constants, src->blend_constants, 4);
1644 dest_mask |= TU_DYNAMIC_BLEND_CONSTANTS;
1645 }
1646 }
1647
1648 if (copy_mask & TU_DYNAMIC_DEPTH_BOUNDS) {
1649 if (memcmp(&dest->depth_bounds, &src->depth_bounds,
1650 sizeof(src->depth_bounds))) {
1651 dest->depth_bounds = src->depth_bounds;
1652 dest_mask |= TU_DYNAMIC_DEPTH_BOUNDS;
1653 }
1654 }
1655
1656 if (copy_mask & TU_DYNAMIC_STENCIL_COMPARE_MASK) {
1657 if (memcmp(&dest->stencil_compare_mask, &src->stencil_compare_mask,
1658 sizeof(src->stencil_compare_mask))) {
1659 dest->stencil_compare_mask = src->stencil_compare_mask;
1660 dest_mask |= TU_DYNAMIC_STENCIL_COMPARE_MASK;
1661 }
1662 }
1663
1664 if (copy_mask & TU_DYNAMIC_STENCIL_WRITE_MASK) {
1665 if (memcmp(&dest->stencil_write_mask, &src->stencil_write_mask,
1666 sizeof(src->stencil_write_mask))) {
1667 dest->stencil_write_mask = src->stencil_write_mask;
1668 dest_mask |= TU_DYNAMIC_STENCIL_WRITE_MASK;
1669 }
1670 }
1671
1672 if (copy_mask & TU_DYNAMIC_STENCIL_REFERENCE) {
1673 if (memcmp(&dest->stencil_reference, &src->stencil_reference,
1674 sizeof(src->stencil_reference))) {
1675 dest->stencil_reference = src->stencil_reference;
1676 dest_mask |= TU_DYNAMIC_STENCIL_REFERENCE;
1677 }
1678 }
1679
1680 if (copy_mask & TU_DYNAMIC_DISCARD_RECTANGLE) {
1681 if (memcmp(&dest->discard_rectangle.rectangles,
1682 &src->discard_rectangle.rectangles,
1683 src->discard_rectangle.count * sizeof(VkRect2D))) {
1684 typed_memcpy(dest->discard_rectangle.rectangles,
1685 src->discard_rectangle.rectangles,
1686 src->discard_rectangle.count);
1687 dest_mask |= TU_DYNAMIC_DISCARD_RECTANGLE;
1688 }
1689 }
1690 }
1691
1692 static VkResult
1693 tu_create_cmd_buffer(struct tu_device *device,
1694 struct tu_cmd_pool *pool,
1695 VkCommandBufferLevel level,
1696 VkCommandBuffer *pCommandBuffer)
1697 {
1698 struct tu_cmd_buffer *cmd_buffer;
1699 cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8,
1700 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1701 if (cmd_buffer == NULL)
1702 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1703
1704 cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1705 cmd_buffer->device = device;
1706 cmd_buffer->pool = pool;
1707 cmd_buffer->level = level;
1708
1709 if (pool) {
1710 list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
1711 cmd_buffer->queue_family_index = pool->queue_family_index;
1712
1713 } else {
1714 /* Init the pool_link so we can safely call list_del when we destroy
1715 * the command buffer
1716 */
1717 list_inithead(&cmd_buffer->pool_link);
1718 cmd_buffer->queue_family_index = TU_QUEUE_GENERAL;
1719 }
1720
1721 tu_bo_list_init(&cmd_buffer->bo_list);
1722 tu_cs_init(&cmd_buffer->cs, TU_CS_MODE_GROW, 4096);
1723 tu_cs_init(&cmd_buffer->draw_cs, TU_CS_MODE_GROW, 4096);
1724 tu_cs_init(&cmd_buffer->draw_state, TU_CS_MODE_SUB_STREAM, 2048);
1725 tu_cs_init(&cmd_buffer->tile_cs, TU_CS_MODE_SUB_STREAM, 1024);
1726
1727 *pCommandBuffer = tu_cmd_buffer_to_handle(cmd_buffer);
1728
1729 list_inithead(&cmd_buffer->upload.list);
1730
1731 cmd_buffer->marker_reg = REG_A6XX_CP_SCRATCH_REG(
1732 cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY ? 7 : 6);
1733
1734 VkResult result = tu_bo_init_new(device, &cmd_buffer->scratch_bo, 0x1000);
1735 if (result != VK_SUCCESS)
1736 return result;
1737
1738 #define VSC_DATA_SIZE(pitch) ((pitch) * 32 + 0x100) /* extra size to store VSC_SIZE */
1739 #define VSC_DATA2_SIZE(pitch) ((pitch) * 32)
1740
1741 /* TODO: resize on overflow or compute a max size from # of vertices in renderpass?? */
1742 cmd_buffer->vsc_data_pitch = 0x440 * 4;
1743 cmd_buffer->vsc_data2_pitch = 0x1040 * 4;
1744
1745 result = tu_bo_init_new(device, &cmd_buffer->vsc_data, VSC_DATA_SIZE(cmd_buffer->vsc_data_pitch));
1746 if (result != VK_SUCCESS)
1747 goto fail_vsc_data;
1748
1749 result = tu_bo_init_new(device, &cmd_buffer->vsc_data2, VSC_DATA2_SIZE(cmd_buffer->vsc_data2_pitch));
1750 if (result != VK_SUCCESS)
1751 goto fail_vsc_data2;
1752
1753 return VK_SUCCESS;
1754
1755 fail_vsc_data2:
1756 tu_bo_finish(cmd_buffer->device, &cmd_buffer->vsc_data);
1757 fail_vsc_data:
1758 tu_bo_finish(cmd_buffer->device, &cmd_buffer->scratch_bo);
1759 return result;
1760 }
1761
1762 static void
1763 tu_cmd_buffer_destroy(struct tu_cmd_buffer *cmd_buffer)
1764 {
1765 tu_bo_finish(cmd_buffer->device, &cmd_buffer->scratch_bo);
1766 tu_bo_finish(cmd_buffer->device, &cmd_buffer->vsc_data);
1767 tu_bo_finish(cmd_buffer->device, &cmd_buffer->vsc_data2);
1768
1769 list_del(&cmd_buffer->pool_link);
1770
1771 for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++)
1772 free(cmd_buffer->descriptors[i].push_set.set.mapped_ptr);
1773
1774 tu_cs_finish(cmd_buffer->device, &cmd_buffer->cs);
1775 tu_cs_finish(cmd_buffer->device, &cmd_buffer->draw_cs);
1776 tu_cs_finish(cmd_buffer->device, &cmd_buffer->draw_state);
1777 tu_cs_finish(cmd_buffer->device, &cmd_buffer->tile_cs);
1778
1779 tu_bo_list_destroy(&cmd_buffer->bo_list);
1780 vk_free(&cmd_buffer->pool->alloc, cmd_buffer);
1781 }
1782
1783 static VkResult
1784 tu_reset_cmd_buffer(struct tu_cmd_buffer *cmd_buffer)
1785 {
1786 cmd_buffer->wait_for_idle = true;
1787
1788 cmd_buffer->record_result = VK_SUCCESS;
1789
1790 tu_bo_list_reset(&cmd_buffer->bo_list);
1791 tu_cs_reset(cmd_buffer->device, &cmd_buffer->cs);
1792 tu_cs_reset(cmd_buffer->device, &cmd_buffer->draw_cs);
1793 tu_cs_reset(cmd_buffer->device, &cmd_buffer->draw_state);
1794 tu_cs_reset(cmd_buffer->device, &cmd_buffer->tile_cs);
1795
1796 for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++) {
1797 cmd_buffer->descriptors[i].dirty = 0;
1798 cmd_buffer->descriptors[i].valid = 0;
1799 cmd_buffer->descriptors[i].push_dirty = false;
1800 }
1801
1802 cmd_buffer->status = TU_CMD_BUFFER_STATUS_INITIAL;
1803
1804 return cmd_buffer->record_result;
1805 }
1806
1807 static VkResult
1808 tu_cmd_state_setup_attachments(struct tu_cmd_buffer *cmd_buffer,
1809 const VkRenderPassBeginInfo *info)
1810 {
1811 struct tu_cmd_state *state = &cmd_buffer->state;
1812 const struct tu_framebuffer *fb = state->framebuffer;
1813 const struct tu_render_pass *pass = state->pass;
1814
1815 for (uint32_t i = 0; i < fb->attachment_count; ++i) {
1816 const struct tu_image_view *iview = fb->attachments[i].attachment;
1817 tu_bo_list_add(&cmd_buffer->bo_list, iview->image->bo,
1818 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
1819 }
1820
1821 if (pass->attachment_count == 0) {
1822 state->attachments = NULL;
1823 return VK_SUCCESS;
1824 }
1825
1826 state->attachments =
1827 vk_alloc(&cmd_buffer->pool->alloc,
1828 pass->attachment_count * sizeof(state->attachments[0]), 8,
1829 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1830 if (state->attachments == NULL) {
1831 cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
1832 return cmd_buffer->record_result;
1833 }
1834
1835 for (uint32_t i = 0; i < pass->attachment_count; ++i) {
1836 const struct tu_render_pass_attachment *att = &pass->attachments[i];
1837 VkImageAspectFlags att_aspects = vk_format_aspects(att->format);
1838 VkImageAspectFlags clear_aspects = 0;
1839
1840 if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
1841 /* color attachment */
1842 if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1843 clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT;
1844 }
1845 } else {
1846 /* depthstencil attachment */
1847 if ((att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
1848 att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1849 clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
1850 if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
1851 att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
1852 clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
1853 }
1854 if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
1855 att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1856 clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
1857 }
1858 }
1859
1860 state->attachments[i].pending_clear_aspects = clear_aspects;
1861 state->attachments[i].cleared_views = 0;
1862 if (clear_aspects && info) {
1863 assert(info->clearValueCount > i);
1864 state->attachments[i].clear_value = info->pClearValues[i];
1865 }
1866
1867 state->attachments[i].current_layout = att->initial_layout;
1868 }
1869
1870 return VK_SUCCESS;
1871 }
1872
1873 VkResult
1874 tu_AllocateCommandBuffers(VkDevice _device,
1875 const VkCommandBufferAllocateInfo *pAllocateInfo,
1876 VkCommandBuffer *pCommandBuffers)
1877 {
1878 TU_FROM_HANDLE(tu_device, device, _device);
1879 TU_FROM_HANDLE(tu_cmd_pool, pool, pAllocateInfo->commandPool);
1880
1881 VkResult result = VK_SUCCESS;
1882 uint32_t i;
1883
1884 for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
1885
1886 if (!list_is_empty(&pool->free_cmd_buffers)) {
1887 struct tu_cmd_buffer *cmd_buffer = list_first_entry(
1888 &pool->free_cmd_buffers, struct tu_cmd_buffer, pool_link);
1889
1890 list_del(&cmd_buffer->pool_link);
1891 list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
1892
1893 result = tu_reset_cmd_buffer(cmd_buffer);
1894 cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1895 cmd_buffer->level = pAllocateInfo->level;
1896
1897 pCommandBuffers[i] = tu_cmd_buffer_to_handle(cmd_buffer);
1898 } else {
1899 result = tu_create_cmd_buffer(device, pool, pAllocateInfo->level,
1900 &pCommandBuffers[i]);
1901 }
1902 if (result != VK_SUCCESS)
1903 break;
1904 }
1905
1906 if (result != VK_SUCCESS) {
1907 tu_FreeCommandBuffers(_device, pAllocateInfo->commandPool, i,
1908 pCommandBuffers);
1909
1910 /* From the Vulkan 1.0.66 spec:
1911 *
1912 * "vkAllocateCommandBuffers can be used to create multiple
1913 * command buffers. If the creation of any of those command
1914 * buffers fails, the implementation must destroy all
1915 * successfully created command buffer objects from this
1916 * command, set all entries of the pCommandBuffers array to
1917 * NULL and return the error."
1918 */
1919 memset(pCommandBuffers, 0,
1920 sizeof(*pCommandBuffers) * pAllocateInfo->commandBufferCount);
1921 }
1922
1923 return result;
1924 }
1925
1926 void
1927 tu_FreeCommandBuffers(VkDevice device,
1928 VkCommandPool commandPool,
1929 uint32_t commandBufferCount,
1930 const VkCommandBuffer *pCommandBuffers)
1931 {
1932 for (uint32_t i = 0; i < commandBufferCount; i++) {
1933 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, pCommandBuffers[i]);
1934
1935 if (cmd_buffer) {
1936 if (cmd_buffer->pool) {
1937 list_del(&cmd_buffer->pool_link);
1938 list_addtail(&cmd_buffer->pool_link,
1939 &cmd_buffer->pool->free_cmd_buffers);
1940 } else
1941 tu_cmd_buffer_destroy(cmd_buffer);
1942 }
1943 }
1944 }
1945
1946 VkResult
1947 tu_ResetCommandBuffer(VkCommandBuffer commandBuffer,
1948 VkCommandBufferResetFlags flags)
1949 {
1950 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
1951 return tu_reset_cmd_buffer(cmd_buffer);
1952 }
1953
1954 VkResult
1955 tu_BeginCommandBuffer(VkCommandBuffer commandBuffer,
1956 const VkCommandBufferBeginInfo *pBeginInfo)
1957 {
1958 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
1959 VkResult result = VK_SUCCESS;
1960
1961 if (cmd_buffer->status != TU_CMD_BUFFER_STATUS_INITIAL) {
1962 /* If the command buffer has already been resetted with
1963 * vkResetCommandBuffer, no need to do it again.
1964 */
1965 result = tu_reset_cmd_buffer(cmd_buffer);
1966 if (result != VK_SUCCESS)
1967 return result;
1968 }
1969
1970 memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state));
1971 cmd_buffer->usage_flags = pBeginInfo->flags;
1972
1973 tu_cs_begin(&cmd_buffer->cs);
1974 tu_cs_begin(&cmd_buffer->draw_cs);
1975
1976 cmd_buffer->marker_seqno = 0;
1977 cmd_buffer->scratch_seqno = 0;
1978
1979 /* setup initial configuration into command buffer */
1980 if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
1981 switch (cmd_buffer->queue_family_index) {
1982 case TU_QUEUE_GENERAL:
1983 tu6_init_hw(cmd_buffer, &cmd_buffer->cs);
1984 break;
1985 default:
1986 break;
1987 }
1988 }
1989
1990 cmd_buffer->status = TU_CMD_BUFFER_STATUS_RECORDING;
1991
1992 return VK_SUCCESS;
1993 }
1994
1995 void
1996 tu_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
1997 uint32_t firstBinding,
1998 uint32_t bindingCount,
1999 const VkBuffer *pBuffers,
2000 const VkDeviceSize *pOffsets)
2001 {
2002 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2003
2004 assert(firstBinding + bindingCount <= MAX_VBS);
2005
2006 for (uint32_t i = 0; i < bindingCount; i++) {
2007 cmd->state.vb.buffers[firstBinding + i] =
2008 tu_buffer_from_handle(pBuffers[i]);
2009 cmd->state.vb.offsets[firstBinding + i] = pOffsets[i];
2010 }
2011
2012 /* VB states depend on VkPipelineVertexInputStateCreateInfo */
2013 cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS;
2014 }
2015
2016 void
2017 tu_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
2018 VkBuffer buffer,
2019 VkDeviceSize offset,
2020 VkIndexType indexType)
2021 {
2022 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2023 TU_FROM_HANDLE(tu_buffer, buf, buffer);
2024
2025 /* initialize/update the restart index */
2026 if (!cmd->state.index_buffer || cmd->state.index_type != indexType) {
2027 struct tu_cs *draw_cs = &cmd->draw_cs;
2028 VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 2);
2029 if (result != VK_SUCCESS) {
2030 cmd->record_result = result;
2031 return;
2032 }
2033
2034 tu6_emit_restart_index(
2035 draw_cs, indexType == VK_INDEX_TYPE_UINT32 ? 0xffffffff : 0xffff);
2036
2037 tu_cs_sanity_check(draw_cs);
2038 }
2039
2040 /* track the BO */
2041 if (cmd->state.index_buffer != buf)
2042 tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ);
2043
2044 cmd->state.index_buffer = buf;
2045 cmd->state.index_offset = offset;
2046 cmd->state.index_type = indexType;
2047 }
2048
2049 void
2050 tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
2051 VkPipelineBindPoint pipelineBindPoint,
2052 VkPipelineLayout _layout,
2053 uint32_t firstSet,
2054 uint32_t descriptorSetCount,
2055 const VkDescriptorSet *pDescriptorSets,
2056 uint32_t dynamicOffsetCount,
2057 const uint32_t *pDynamicOffsets)
2058 {
2059 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
2060 TU_FROM_HANDLE(tu_pipeline_layout, layout, _layout);
2061 unsigned dyn_idx = 0;
2062
2063 struct tu_descriptor_state *descriptors_state =
2064 tu_get_descriptors_state(cmd_buffer, pipelineBindPoint);
2065
2066 for (unsigned i = 0; i < descriptorSetCount; ++i) {
2067 unsigned idx = i + firstSet;
2068 TU_FROM_HANDLE(tu_descriptor_set, set, pDescriptorSets[i]);
2069
2070 descriptors_state->sets[idx] = set;
2071 descriptors_state->valid |= (1u << idx);
2072
2073 for(unsigned j = 0; j < set->layout->dynamic_offset_count; ++j, ++dyn_idx) {
2074 unsigned idx = j + layout->set[i + firstSet].dynamic_offset_start;
2075 assert(dyn_idx < dynamicOffsetCount);
2076
2077 descriptors_state->dynamic_buffers[idx] =
2078 set->dynamic_descriptors[j].va + pDynamicOffsets[dyn_idx];
2079 }
2080 }
2081
2082 cmd_buffer->state.dirty |= TU_CMD_DIRTY_DESCRIPTOR_SETS;
2083 }
2084
2085 void
2086 tu_CmdPushConstants(VkCommandBuffer commandBuffer,
2087 VkPipelineLayout layout,
2088 VkShaderStageFlags stageFlags,
2089 uint32_t offset,
2090 uint32_t size,
2091 const void *pValues)
2092 {
2093 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2094 memcpy((void*) cmd->push_constants + offset, pValues, size);
2095 cmd->state.dirty |= TU_CMD_DIRTY_PUSH_CONSTANTS;
2096 }
2097
2098 VkResult
2099 tu_EndCommandBuffer(VkCommandBuffer commandBuffer)
2100 {
2101 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
2102
2103 if (cmd_buffer->scratch_seqno) {
2104 tu_bo_list_add(&cmd_buffer->bo_list, &cmd_buffer->scratch_bo,
2105 MSM_SUBMIT_BO_WRITE);
2106 }
2107
2108 if (cmd_buffer->use_vsc_data) {
2109 tu_bo_list_add(&cmd_buffer->bo_list, &cmd_buffer->vsc_data,
2110 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
2111 tu_bo_list_add(&cmd_buffer->bo_list, &cmd_buffer->vsc_data2,
2112 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
2113 }
2114
2115 for (uint32_t i = 0; i < cmd_buffer->draw_cs.bo_count; i++) {
2116 tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->draw_cs.bos[i],
2117 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP);
2118 }
2119
2120 for (uint32_t i = 0; i < cmd_buffer->draw_state.bo_count; i++) {
2121 tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->draw_state.bos[i],
2122 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP);
2123 }
2124
2125 for (uint32_t i = 0; i < cmd_buffer->tile_cs.bo_count; i++) {
2126 tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->tile_cs.bos[i],
2127 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP);
2128 }
2129
2130 tu_cs_end(&cmd_buffer->cs);
2131 tu_cs_end(&cmd_buffer->draw_cs);
2132
2133 assert(!cmd_buffer->state.attachments);
2134
2135 cmd_buffer->status = TU_CMD_BUFFER_STATUS_EXECUTABLE;
2136
2137 return cmd_buffer->record_result;
2138 }
2139
2140 void
2141 tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
2142 VkPipelineBindPoint pipelineBindPoint,
2143 VkPipeline _pipeline)
2144 {
2145 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2146 TU_FROM_HANDLE(tu_pipeline, pipeline, _pipeline);
2147
2148 switch (pipelineBindPoint) {
2149 case VK_PIPELINE_BIND_POINT_GRAPHICS:
2150 cmd->state.pipeline = pipeline;
2151 cmd->state.dirty |= TU_CMD_DIRTY_PIPELINE;
2152 break;
2153 case VK_PIPELINE_BIND_POINT_COMPUTE:
2154 cmd->state.compute_pipeline = pipeline;
2155 cmd->state.dirty |= TU_CMD_DIRTY_COMPUTE_PIPELINE;
2156 break;
2157 default:
2158 unreachable("unrecognized pipeline bind point");
2159 break;
2160 }
2161
2162 tu_bo_list_add(&cmd->bo_list, &pipeline->program.binary_bo,
2163 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP);
2164 for (uint32_t i = 0; i < pipeline->cs.bo_count; i++) {
2165 tu_bo_list_add(&cmd->bo_list, pipeline->cs.bos[i],
2166 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP);
2167 }
2168 }
2169
2170 void
2171 tu_CmdSetViewport(VkCommandBuffer commandBuffer,
2172 uint32_t firstViewport,
2173 uint32_t viewportCount,
2174 const VkViewport *pViewports)
2175 {
2176 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2177 struct tu_cs *draw_cs = &cmd->draw_cs;
2178
2179 VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 12);
2180 if (result != VK_SUCCESS) {
2181 cmd->record_result = result;
2182 return;
2183 }
2184
2185 assert(firstViewport == 0 && viewportCount == 1);
2186 tu6_emit_viewport(draw_cs, pViewports);
2187
2188 tu_cs_sanity_check(draw_cs);
2189 }
2190
2191 void
2192 tu_CmdSetScissor(VkCommandBuffer commandBuffer,
2193 uint32_t firstScissor,
2194 uint32_t scissorCount,
2195 const VkRect2D *pScissors)
2196 {
2197 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2198 struct tu_cs *draw_cs = &cmd->draw_cs;
2199
2200 VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 3);
2201 if (result != VK_SUCCESS) {
2202 cmd->record_result = result;
2203 return;
2204 }
2205
2206 assert(firstScissor == 0 && scissorCount == 1);
2207 tu6_emit_scissor(draw_cs, pScissors);
2208
2209 tu_cs_sanity_check(draw_cs);
2210 }
2211
2212 void
2213 tu_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)
2214 {
2215 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2216
2217 cmd->state.dynamic.line_width = lineWidth;
2218
2219 /* line width depends on VkPipelineRasterizationStateCreateInfo */
2220 cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
2221 }
2222
2223 void
2224 tu_CmdSetDepthBias(VkCommandBuffer commandBuffer,
2225 float depthBiasConstantFactor,
2226 float depthBiasClamp,
2227 float depthBiasSlopeFactor)
2228 {
2229 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2230 struct tu_cs *draw_cs = &cmd->draw_cs;
2231
2232 VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 4);
2233 if (result != VK_SUCCESS) {
2234 cmd->record_result = result;
2235 return;
2236 }
2237
2238 tu6_emit_depth_bias(draw_cs, depthBiasConstantFactor, depthBiasClamp,
2239 depthBiasSlopeFactor);
2240
2241 tu_cs_sanity_check(draw_cs);
2242 }
2243
2244 void
2245 tu_CmdSetBlendConstants(VkCommandBuffer commandBuffer,
2246 const float blendConstants[4])
2247 {
2248 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2249 struct tu_cs *draw_cs = &cmd->draw_cs;
2250
2251 VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 5);
2252 if (result != VK_SUCCESS) {
2253 cmd->record_result = result;
2254 return;
2255 }
2256
2257 tu6_emit_blend_constants(draw_cs, blendConstants);
2258
2259 tu_cs_sanity_check(draw_cs);
2260 }
2261
2262 void
2263 tu_CmdSetDepthBounds(VkCommandBuffer commandBuffer,
2264 float minDepthBounds,
2265 float maxDepthBounds)
2266 {
2267 }
2268
2269 void
2270 tu_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,
2271 VkStencilFaceFlags faceMask,
2272 uint32_t compareMask)
2273 {
2274 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2275
2276 if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
2277 cmd->state.dynamic.stencil_compare_mask.front = compareMask;
2278 if (faceMask & VK_STENCIL_FACE_BACK_BIT)
2279 cmd->state.dynamic.stencil_compare_mask.back = compareMask;
2280
2281 /* the front/back compare masks must be updated together */
2282 cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
2283 }
2284
2285 void
2286 tu_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,
2287 VkStencilFaceFlags faceMask,
2288 uint32_t writeMask)
2289 {
2290 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2291
2292 if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
2293 cmd->state.dynamic.stencil_write_mask.front = writeMask;
2294 if (faceMask & VK_STENCIL_FACE_BACK_BIT)
2295 cmd->state.dynamic.stencil_write_mask.back = writeMask;
2296
2297 /* the front/back write masks must be updated together */
2298 cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
2299 }
2300
2301 void
2302 tu_CmdSetStencilReference(VkCommandBuffer commandBuffer,
2303 VkStencilFaceFlags faceMask,
2304 uint32_t reference)
2305 {
2306 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2307
2308 if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
2309 cmd->state.dynamic.stencil_reference.front = reference;
2310 if (faceMask & VK_STENCIL_FACE_BACK_BIT)
2311 cmd->state.dynamic.stencil_reference.back = reference;
2312
2313 /* the front/back references must be updated together */
2314 cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
2315 }
2316
2317 void
2318 tu_CmdExecuteCommands(VkCommandBuffer commandBuffer,
2319 uint32_t commandBufferCount,
2320 const VkCommandBuffer *pCmdBuffers)
2321 {
2322 }
2323
2324 VkResult
2325 tu_CreateCommandPool(VkDevice _device,
2326 const VkCommandPoolCreateInfo *pCreateInfo,
2327 const VkAllocationCallbacks *pAllocator,
2328 VkCommandPool *pCmdPool)
2329 {
2330 TU_FROM_HANDLE(tu_device, device, _device);
2331 struct tu_cmd_pool *pool;
2332
2333 pool = vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8,
2334 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2335 if (pool == NULL)
2336 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2337
2338 if (pAllocator)
2339 pool->alloc = *pAllocator;
2340 else
2341 pool->alloc = device->alloc;
2342
2343 list_inithead(&pool->cmd_buffers);
2344 list_inithead(&pool->free_cmd_buffers);
2345
2346 pool->queue_family_index = pCreateInfo->queueFamilyIndex;
2347
2348 *pCmdPool = tu_cmd_pool_to_handle(pool);
2349
2350 return VK_SUCCESS;
2351 }
2352
2353 void
2354 tu_DestroyCommandPool(VkDevice _device,
2355 VkCommandPool commandPool,
2356 const VkAllocationCallbacks *pAllocator)
2357 {
2358 TU_FROM_HANDLE(tu_device, device, _device);
2359 TU_FROM_HANDLE(tu_cmd_pool, pool, commandPool);
2360
2361 if (!pool)
2362 return;
2363
2364 list_for_each_entry_safe(struct tu_cmd_buffer, cmd_buffer,
2365 &pool->cmd_buffers, pool_link)
2366 {
2367 tu_cmd_buffer_destroy(cmd_buffer);
2368 }
2369
2370 list_for_each_entry_safe(struct tu_cmd_buffer, cmd_buffer,
2371 &pool->free_cmd_buffers, pool_link)
2372 {
2373 tu_cmd_buffer_destroy(cmd_buffer);
2374 }
2375
2376 vk_free2(&device->alloc, pAllocator, pool);
2377 }
2378
2379 VkResult
2380 tu_ResetCommandPool(VkDevice device,
2381 VkCommandPool commandPool,
2382 VkCommandPoolResetFlags flags)
2383 {
2384 TU_FROM_HANDLE(tu_cmd_pool, pool, commandPool);
2385 VkResult result;
2386
2387 list_for_each_entry(struct tu_cmd_buffer, cmd_buffer, &pool->cmd_buffers,
2388 pool_link)
2389 {
2390 result = tu_reset_cmd_buffer(cmd_buffer);
2391 if (result != VK_SUCCESS)
2392 return result;
2393 }
2394
2395 return VK_SUCCESS;
2396 }
2397
2398 void
2399 tu_TrimCommandPool(VkDevice device,
2400 VkCommandPool commandPool,
2401 VkCommandPoolTrimFlags flags)
2402 {
2403 TU_FROM_HANDLE(tu_cmd_pool, pool, commandPool);
2404
2405 if (!pool)
2406 return;
2407
2408 list_for_each_entry_safe(struct tu_cmd_buffer, cmd_buffer,
2409 &pool->free_cmd_buffers, pool_link)
2410 {
2411 tu_cmd_buffer_destroy(cmd_buffer);
2412 }
2413 }
2414
2415 void
2416 tu_CmdBeginRenderPass(VkCommandBuffer commandBuffer,
2417 const VkRenderPassBeginInfo *pRenderPassBegin,
2418 VkSubpassContents contents)
2419 {
2420 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
2421 TU_FROM_HANDLE(tu_render_pass, pass, pRenderPassBegin->renderPass);
2422 TU_FROM_HANDLE(tu_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
2423 VkResult result;
2424
2425 cmd_buffer->state.pass = pass;
2426 cmd_buffer->state.subpass = pass->subpasses;
2427 cmd_buffer->state.framebuffer = framebuffer;
2428
2429 result = tu_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin);
2430 if (result != VK_SUCCESS)
2431 return;
2432
2433 tu_cmd_update_tiling_config(cmd_buffer, &pRenderPassBegin->renderArea);
2434 tu_cmd_prepare_tile_load_ib(cmd_buffer);
2435 tu_cmd_prepare_tile_store_ib(cmd_buffer);
2436
2437 /* note: use_hw_binning only checks tiling config */
2438 if (use_hw_binning(cmd_buffer))
2439 cmd_buffer->use_vsc_data = true;
2440 }
2441
2442 void
2443 tu_CmdBeginRenderPass2KHR(VkCommandBuffer commandBuffer,
2444 const VkRenderPassBeginInfo *pRenderPassBeginInfo,
2445 const VkSubpassBeginInfoKHR *pSubpassBeginInfo)
2446 {
2447 tu_CmdBeginRenderPass(commandBuffer, pRenderPassBeginInfo,
2448 pSubpassBeginInfo->contents);
2449 }
2450
2451 void
2452 tu_CmdNextSubpass(VkCommandBuffer commandBuffer, VkSubpassContents contents)
2453 {
2454 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2455
2456 tu_cmd_render_tiles(cmd);
2457
2458 cmd->state.subpass++;
2459
2460 tu_cmd_update_tiling_config(cmd, NULL);
2461 tu_cmd_prepare_tile_load_ib(cmd);
2462 tu_cmd_prepare_tile_store_ib(cmd);
2463 }
2464
2465 void
2466 tu_CmdNextSubpass2KHR(VkCommandBuffer commandBuffer,
2467 const VkSubpassBeginInfoKHR *pSubpassBeginInfo,
2468 const VkSubpassEndInfoKHR *pSubpassEndInfo)
2469 {
2470 tu_CmdNextSubpass(commandBuffer, pSubpassBeginInfo->contents);
2471 }
2472
2473 struct tu_draw_info
2474 {
2475 /**
2476 * Number of vertices.
2477 */
2478 uint32_t count;
2479
2480 /**
2481 * Index of the first vertex.
2482 */
2483 int32_t vertex_offset;
2484
2485 /**
2486 * First instance id.
2487 */
2488 uint32_t first_instance;
2489
2490 /**
2491 * Number of instances.
2492 */
2493 uint32_t instance_count;
2494
2495 /**
2496 * First index (indexed draws only).
2497 */
2498 uint32_t first_index;
2499
2500 /**
2501 * Whether it's an indexed draw.
2502 */
2503 bool indexed;
2504
2505 /**
2506 * Indirect draw parameters resource.
2507 */
2508 struct tu_buffer *indirect;
2509 uint64_t indirect_offset;
2510 uint32_t stride;
2511
2512 /**
2513 * Draw count parameters resource.
2514 */
2515 struct tu_buffer *count_buffer;
2516 uint64_t count_buffer_offset;
2517 };
2518
2519 enum tu_draw_state_group_id
2520 {
2521 TU_DRAW_STATE_PROGRAM,
2522 TU_DRAW_STATE_PROGRAM_BINNING,
2523 TU_DRAW_STATE_VI,
2524 TU_DRAW_STATE_VI_BINNING,
2525 TU_DRAW_STATE_VP,
2526 TU_DRAW_STATE_RAST,
2527 TU_DRAW_STATE_DS,
2528 TU_DRAW_STATE_BLEND,
2529 TU_DRAW_STATE_VS_CONST,
2530 TU_DRAW_STATE_FS_CONST,
2531 TU_DRAW_STATE_VS_TEX,
2532 TU_DRAW_STATE_FS_TEX,
2533 TU_DRAW_STATE_FS_IBO,
2534
2535 TU_DRAW_STATE_COUNT,
2536 };
2537
2538 struct tu_draw_state_group
2539 {
2540 enum tu_draw_state_group_id id;
2541 uint32_t enable_mask;
2542 struct tu_cs_entry ib;
2543 };
2544
2545 static struct tu_sampler*
2546 sampler_ptr(struct tu_descriptor_state *descriptors_state,
2547 const struct tu_descriptor_map *map, unsigned i)
2548 {
2549 assert(descriptors_state->valid & (1 << map->set[i]));
2550
2551 struct tu_descriptor_set *set = descriptors_state->sets[map->set[i]];
2552 assert(map->binding[i] < set->layout->binding_count);
2553
2554 const struct tu_descriptor_set_binding_layout *layout =
2555 &set->layout->binding[map->binding[i]];
2556
2557 switch (layout->type) {
2558 case VK_DESCRIPTOR_TYPE_SAMPLER:
2559 return (struct tu_sampler*) &set->mapped_ptr[layout->offset / 4];
2560 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
2561 return (struct tu_sampler*) &set->mapped_ptr[layout->offset / 4 + A6XX_TEX_CONST_DWORDS];
2562 default:
2563 unreachable("unimplemented descriptor type");
2564 break;
2565 }
2566 }
2567
2568 static uint32_t*
2569 texture_ptr(struct tu_descriptor_state *descriptors_state,
2570 const struct tu_descriptor_map *map, unsigned i)
2571 {
2572 assert(descriptors_state->valid & (1 << map->set[i]));
2573
2574 struct tu_descriptor_set *set = descriptors_state->sets[map->set[i]];
2575 assert(map->binding[i] < set->layout->binding_count);
2576
2577 const struct tu_descriptor_set_binding_layout *layout =
2578 &set->layout->binding[map->binding[i]];
2579
2580 switch (layout->type) {
2581 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
2582 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
2583 return &set->mapped_ptr[layout->offset / 4];
2584 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
2585 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
2586 return &set->mapped_ptr[layout->offset / 4];
2587 default:
2588 unreachable("unimplemented descriptor type");
2589 break;
2590 }
2591 }
2592
2593 static uint64_t
2594 buffer_ptr(struct tu_descriptor_state *descriptors_state,
2595 const struct tu_descriptor_map *map,
2596 unsigned i)
2597 {
2598 assert(descriptors_state->valid & (1 << map->set[i]));
2599
2600 struct tu_descriptor_set *set = descriptors_state->sets[map->set[i]];
2601 assert(map->binding[i] < set->layout->binding_count);
2602
2603 const struct tu_descriptor_set_binding_layout *layout =
2604 &set->layout->binding[map->binding[i]];
2605
2606 switch (layout->type) {
2607 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
2608 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
2609 return descriptors_state->dynamic_buffers[layout->dynamic_offset_offset];
2610 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
2611 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
2612 return (uint64_t) set->mapped_ptr[layout->offset / 4 + 1] << 32 |
2613 set->mapped_ptr[layout->offset / 4];
2614 default:
2615 unreachable("unimplemented descriptor type");
2616 break;
2617 }
2618 }
2619
2620 static inline uint32_t
2621 tu6_stage2opcode(gl_shader_stage type)
2622 {
2623 switch (type) {
2624 case MESA_SHADER_VERTEX:
2625 case MESA_SHADER_TESS_CTRL:
2626 case MESA_SHADER_TESS_EVAL:
2627 case MESA_SHADER_GEOMETRY:
2628 return CP_LOAD_STATE6_GEOM;
2629 case MESA_SHADER_FRAGMENT:
2630 case MESA_SHADER_COMPUTE:
2631 case MESA_SHADER_KERNEL:
2632 return CP_LOAD_STATE6_FRAG;
2633 default:
2634 unreachable("bad shader type");
2635 }
2636 }
2637
2638 static inline enum a6xx_state_block
2639 tu6_stage2shadersb(gl_shader_stage type)
2640 {
2641 switch (type) {
2642 case MESA_SHADER_VERTEX:
2643 return SB6_VS_SHADER;
2644 case MESA_SHADER_FRAGMENT:
2645 return SB6_FS_SHADER;
2646 case MESA_SHADER_COMPUTE:
2647 case MESA_SHADER_KERNEL:
2648 return SB6_CS_SHADER;
2649 default:
2650 unreachable("bad shader type");
2651 return ~0;
2652 }
2653 }
2654
2655 static void
2656 tu6_emit_user_consts(struct tu_cs *cs, const struct tu_pipeline *pipeline,
2657 struct tu_descriptor_state *descriptors_state,
2658 gl_shader_stage type,
2659 uint32_t *push_constants)
2660 {
2661 const struct tu_program_descriptor_linkage *link =
2662 &pipeline->program.link[type];
2663 const struct ir3_ubo_analysis_state *state = &link->ubo_state;
2664
2665 for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
2666 if (state->range[i].start < state->range[i].end) {
2667 uint32_t size = state->range[i].end - state->range[i].start;
2668 uint32_t offset = state->range[i].start;
2669
2670 /* and even if the start of the const buffer is before
2671 * first_immediate, the end may not be:
2672 */
2673 size = MIN2(size, (16 * link->constlen) - state->range[i].offset);
2674
2675 if (size == 0)
2676 continue;
2677
2678 /* things should be aligned to vec4: */
2679 debug_assert((state->range[i].offset % 16) == 0);
2680 debug_assert((size % 16) == 0);
2681 debug_assert((offset % 16) == 0);
2682
2683 if (i == 0) {
2684 /* push constants */
2685 tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + (size / 4));
2686 tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(state->range[i].offset / 16) |
2687 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
2688 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
2689 CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
2690 CP_LOAD_STATE6_0_NUM_UNIT(size / 16));
2691 tu_cs_emit(cs, 0);
2692 tu_cs_emit(cs, 0);
2693 for (unsigned i = 0; i < size / 4; i++)
2694 tu_cs_emit(cs, push_constants[i + offset / 4]);
2695 continue;
2696 }
2697
2698 uint64_t va = buffer_ptr(descriptors_state, &link->ubo_map, i - 1);
2699
2700 tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3);
2701 tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(state->range[i].offset / 16) |
2702 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
2703 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
2704 CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
2705 CP_LOAD_STATE6_0_NUM_UNIT(size / 16));
2706 tu_cs_emit_qw(cs, va + offset);
2707 }
2708 }
2709 }
2710
2711 static void
2712 tu6_emit_ubos(struct tu_cs *cs, const struct tu_pipeline *pipeline,
2713 struct tu_descriptor_state *descriptors_state,
2714 gl_shader_stage type)
2715 {
2716 const struct tu_program_descriptor_linkage *link =
2717 &pipeline->program.link[type];
2718
2719 uint32_t num = MIN2(link->ubo_map.num, link->const_state.num_ubos);
2720 uint32_t anum = align(num, 2);
2721 uint32_t i;
2722
2723 if (!num)
2724 return;
2725
2726 tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + (2 * anum));
2727 tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(link->const_state.offsets.ubo) |
2728 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
2729 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
2730 CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
2731 CP_LOAD_STATE6_0_NUM_UNIT(anum/2));
2732 tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
2733 tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
2734
2735 for (i = 0; i < num; i++)
2736 tu_cs_emit_qw(cs, buffer_ptr(descriptors_state, &link->ubo_map, i));
2737
2738 for (; i < anum; i++) {
2739 tu_cs_emit(cs, 0xffffffff);
2740 tu_cs_emit(cs, 0xffffffff);
2741 }
2742 }
2743
2744 static struct tu_cs_entry
2745 tu6_emit_consts(struct tu_cmd_buffer *cmd,
2746 const struct tu_pipeline *pipeline,
2747 struct tu_descriptor_state *descriptors_state,
2748 gl_shader_stage type)
2749 {
2750 struct tu_cs cs;
2751 tu_cs_begin_sub_stream(cmd->device, &cmd->draw_state, 512, &cs); /* TODO: maximum size? */
2752
2753 tu6_emit_user_consts(&cs, pipeline, descriptors_state, type, cmd->push_constants);
2754 tu6_emit_ubos(&cs, pipeline, descriptors_state, type);
2755
2756 return tu_cs_end_sub_stream(&cmd->draw_state, &cs);
2757 }
2758
2759 static VkResult
2760 tu6_emit_textures(struct tu_cmd_buffer *cmd,
2761 gl_shader_stage type,
2762 struct tu_cs_entry *entry,
2763 bool *needs_border)
2764 {
2765 struct tu_device *device = cmd->device;
2766 struct tu_cs *draw_state = &cmd->draw_state;
2767 struct tu_descriptor_state *descriptors_state =
2768 &cmd->descriptors[VK_PIPELINE_BIND_POINT_GRAPHICS];
2769 const struct tu_program_descriptor_linkage *link =
2770 &cmd->state.pipeline->program.link[type];
2771 VkResult result;
2772
2773 if (link->texture_map.num == 0 && link->sampler_map.num == 0) {
2774 *entry = (struct tu_cs_entry) {};
2775 return VK_SUCCESS;
2776 }
2777
2778 /* allocate and fill texture state */
2779 struct ts_cs_memory tex_const;
2780 result = tu_cs_alloc(device, draw_state, link->texture_map.num, A6XX_TEX_CONST_DWORDS, &tex_const);
2781 if (result != VK_SUCCESS)
2782 return result;
2783
2784 for (unsigned i = 0; i < link->texture_map.num; i++) {
2785 memcpy(&tex_const.map[A6XX_TEX_CONST_DWORDS*i],
2786 texture_ptr(descriptors_state, &link->texture_map, i),
2787 A6XX_TEX_CONST_DWORDS*4);
2788 }
2789
2790 /* allocate and fill sampler state */
2791 struct ts_cs_memory tex_samp;
2792 result = tu_cs_alloc(device, draw_state, link->sampler_map.num, A6XX_TEX_SAMP_DWORDS, &tex_samp);
2793 if (result != VK_SUCCESS)
2794 return result;
2795
2796 for (unsigned i = 0; i < link->sampler_map.num; i++) {
2797 struct tu_sampler *sampler = sampler_ptr(descriptors_state, &link->sampler_map, i);
2798 memcpy(&tex_samp.map[A6XX_TEX_SAMP_DWORDS*i], sampler->state, sizeof(sampler->state));
2799 *needs_border |= sampler->needs_border;
2800 }
2801
2802 unsigned tex_samp_reg, tex_const_reg, tex_count_reg;
2803 enum a6xx_state_block sb;
2804
2805 switch (type) {
2806 case MESA_SHADER_VERTEX:
2807 sb = SB6_VS_TEX;
2808 tex_samp_reg = REG_A6XX_SP_VS_TEX_SAMP_LO;
2809 tex_const_reg = REG_A6XX_SP_VS_TEX_CONST_LO;
2810 tex_count_reg = REG_A6XX_SP_VS_TEX_COUNT;
2811 break;
2812 case MESA_SHADER_FRAGMENT:
2813 sb = SB6_FS_TEX;
2814 tex_samp_reg = REG_A6XX_SP_FS_TEX_SAMP_LO;
2815 tex_const_reg = REG_A6XX_SP_FS_TEX_CONST_LO;
2816 tex_count_reg = REG_A6XX_SP_FS_TEX_COUNT;
2817 break;
2818 case MESA_SHADER_COMPUTE:
2819 sb = SB6_CS_TEX;
2820 tex_samp_reg = REG_A6XX_SP_CS_TEX_SAMP_LO;
2821 tex_const_reg = REG_A6XX_SP_CS_TEX_CONST_LO;
2822 tex_count_reg = REG_A6XX_SP_CS_TEX_COUNT;
2823 break;
2824 default:
2825 unreachable("bad state block");
2826 }
2827
2828 struct tu_cs cs;
2829 result = tu_cs_begin_sub_stream(device, draw_state, 16, &cs);
2830 if (result != VK_SUCCESS)
2831 return result;
2832
2833 /* output sampler state: */
2834 tu_cs_emit_pkt7(&cs, tu6_stage2opcode(type), 3);
2835 tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(0) |
2836 CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
2837 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
2838 CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
2839 CP_LOAD_STATE6_0_NUM_UNIT(link->sampler_map.num));
2840 tu_cs_emit_qw(&cs, tex_samp.iova); /* SRC_ADDR_LO/HI */
2841
2842 tu_cs_emit_pkt4(&cs, tex_samp_reg, 2);
2843 tu_cs_emit_qw(&cs, tex_samp.iova); /* SRC_ADDR_LO/HI */
2844
2845 /* emit texture state: */
2846 tu_cs_emit_pkt7(&cs, tu6_stage2opcode(type), 3);
2847 tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(0) |
2848 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
2849 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
2850 CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
2851 CP_LOAD_STATE6_0_NUM_UNIT(link->texture_map.num));
2852 tu_cs_emit_qw(&cs, tex_const.iova); /* SRC_ADDR_LO/HI */
2853
2854 tu_cs_emit_pkt4(&cs, tex_const_reg, 2);
2855 tu_cs_emit_qw(&cs, tex_const.iova); /* SRC_ADDR_LO/HI */
2856
2857 tu_cs_emit_pkt4(&cs, tex_count_reg, 1);
2858 tu_cs_emit(&cs, link->texture_map.num);
2859
2860 *entry = tu_cs_end_sub_stream(draw_state, &cs);
2861 return VK_SUCCESS;
2862 }
2863
2864 static struct tu_cs_entry
2865 tu6_emit_ibo(struct tu_device *device, struct tu_cs *draw_state,
2866 const struct tu_pipeline *pipeline,
2867 struct tu_descriptor_state *descriptors_state,
2868 gl_shader_stage type)
2869 {
2870 const struct tu_program_descriptor_linkage *link =
2871 &pipeline->program.link[type];
2872
2873 uint32_t size = link->image_mapping.num_ibo * A6XX_TEX_CONST_DWORDS;
2874 if (!size)
2875 return (struct tu_cs_entry) {};
2876
2877 struct tu_cs cs;
2878 tu_cs_begin_sub_stream(device, draw_state, size, &cs);
2879
2880 for (unsigned i = 0; i < link->image_mapping.num_ibo; i++) {
2881 unsigned idx = link->image_mapping.ibo_to_image[i];
2882
2883 if (idx & IBO_SSBO) {
2884 idx &= ~IBO_SSBO;
2885
2886 uint64_t va = buffer_ptr(descriptors_state, &link->ssbo_map, idx);
2887 /* We don't expose robustBufferAccess, so leave the size unlimited. */
2888 uint32_t sz = MAX_STORAGE_BUFFER_RANGE / 4;
2889
2890 tu_cs_emit(&cs, A6XX_IBO_0_FMT(TFMT6_32_UINT));
2891 tu_cs_emit(&cs,
2892 A6XX_IBO_1_WIDTH(sz & MASK(15)) |
2893 A6XX_IBO_1_HEIGHT(sz >> 15));
2894 tu_cs_emit(&cs,
2895 A6XX_IBO_2_UNK4 |
2896 A6XX_IBO_2_UNK31 |
2897 A6XX_IBO_2_TYPE(A6XX_TEX_1D));
2898 tu_cs_emit(&cs, 0);
2899 tu_cs_emit_qw(&cs, va);
2900 for (int i = 6; i < A6XX_TEX_CONST_DWORDS; i++)
2901 tu_cs_emit(&cs, 0);
2902 } else {
2903 tu_finishme("Emit images");
2904 }
2905 }
2906
2907 struct tu_cs_entry entry = tu_cs_end_sub_stream(draw_state, &cs);
2908
2909 uint64_t ibo_addr = entry.bo->iova + entry.offset;
2910
2911 tu_cs_begin_sub_stream(device, draw_state, 64, &cs);
2912
2913 /* emit texture state: */
2914 tu_cs_emit_pkt7(&cs, CP_LOAD_STATE6, 3);
2915 tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(0) |
2916 CP_LOAD_STATE6_0_STATE_TYPE(type == MESA_SHADER_COMPUTE ?
2917 ST6_IBO : ST6_SHADER) |
2918 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
2919 CP_LOAD_STATE6_0_STATE_BLOCK(type == MESA_SHADER_COMPUTE ?
2920 SB6_CS_SHADER : SB6_IBO) |
2921 CP_LOAD_STATE6_0_NUM_UNIT(link->image_mapping.num_ibo));
2922 tu_cs_emit_qw(&cs, ibo_addr); /* SRC_ADDR_LO/HI */
2923
2924 tu_cs_emit_pkt4(&cs,
2925 type == MESA_SHADER_COMPUTE ?
2926 REG_A6XX_SP_IBO_LO : REG_A6XX_SP_CS_IBO_LO, 2);
2927 tu_cs_emit_qw(&cs, ibo_addr); /* SRC_ADDR_LO/HI */
2928
2929 return tu_cs_end_sub_stream(draw_state, &cs);
2930 }
2931
2932 struct PACKED bcolor_entry {
2933 uint32_t fp32[4];
2934 uint16_t ui16[4];
2935 int16_t si16[4];
2936 uint16_t fp16[4];
2937 uint16_t rgb565;
2938 uint16_t rgb5a1;
2939 uint16_t rgba4;
2940 uint8_t __pad0[2];
2941 uint8_t ui8[4];
2942 int8_t si8[4];
2943 uint32_t rgb10a2;
2944 uint32_t z24; /* also s8? */
2945 uint16_t srgb[4]; /* appears to duplicate fp16[], but clamped, used for srgb */
2946 uint8_t __pad1[56];
2947 } border_color[] = {
2948 [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = {},
2949 [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = {},
2950 [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = {
2951 .fp32[3] = 0x3f800000,
2952 .ui16[3] = 0xffff,
2953 .si16[3] = 0x7fff,
2954 .fp16[3] = 0x3c00,
2955 .rgb5a1 = 0x8000,
2956 .rgba4 = 0xf000,
2957 .ui8[3] = 0xff,
2958 .si8[3] = 0x7f,
2959 .rgb10a2 = 0xc0000000,
2960 .srgb[3] = 0x3c00,
2961 },
2962 [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = {
2963 .fp32[3] = 1,
2964 .fp16[3] = 1,
2965 },
2966 [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = {
2967 .fp32[0 ... 3] = 0x3f800000,
2968 .ui16[0 ... 3] = 0xffff,
2969 .si16[0 ... 3] = 0x7fff,
2970 .fp16[0 ... 3] = 0x3c00,
2971 .rgb565 = 0xffff,
2972 .rgb5a1 = 0xffff,
2973 .rgba4 = 0xffff,
2974 .ui8[0 ... 3] = 0xff,
2975 .si8[0 ... 3] = 0x7f,
2976 .rgb10a2 = 0xffffffff,
2977 .z24 = 0xffffff,
2978 .srgb[0 ... 3] = 0x3c00,
2979 },
2980 [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = {
2981 .fp32[0 ... 3] = 1,
2982 .fp16[0 ... 3] = 1,
2983 },
2984 };
2985
2986 static VkResult
2987 tu6_emit_border_color(struct tu_cmd_buffer *cmd,
2988 struct tu_cs *cs)
2989 {
2990 STATIC_ASSERT(sizeof(struct bcolor_entry) == 128);
2991
2992 const struct tu_pipeline *pipeline = cmd->state.pipeline;
2993 struct tu_descriptor_state *descriptors_state =
2994 &cmd->descriptors[VK_PIPELINE_BIND_POINT_GRAPHICS];
2995 const struct tu_descriptor_map *vs_sampler =
2996 &pipeline->program.link[MESA_SHADER_VERTEX].sampler_map;
2997 const struct tu_descriptor_map *fs_sampler =
2998 &pipeline->program.link[MESA_SHADER_FRAGMENT].sampler_map;
2999 struct ts_cs_memory ptr;
3000
3001 VkResult result = tu_cs_alloc(cmd->device, &cmd->draw_state,
3002 vs_sampler->num + fs_sampler->num, 128 / 4,
3003 &ptr);
3004 if (result != VK_SUCCESS)
3005 return result;
3006
3007 for (unsigned i = 0; i < vs_sampler->num; i++) {
3008 struct tu_sampler *sampler = sampler_ptr(descriptors_state, vs_sampler, i);
3009 memcpy(ptr.map, &border_color[sampler->border], 128);
3010 ptr.map += 128 / 4;
3011 }
3012
3013 for (unsigned i = 0; i < fs_sampler->num; i++) {
3014 struct tu_sampler *sampler = sampler_ptr(descriptors_state, fs_sampler, i);
3015 memcpy(ptr.map, &border_color[sampler->border], 128);
3016 ptr.map += 128 / 4;
3017 }
3018
3019 tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR_LO, 2);
3020 tu_cs_emit_qw(cs, ptr.iova);
3021 return VK_SUCCESS;
3022 }
3023
3024 static VkResult
3025 tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
3026 struct tu_cs *cs,
3027 const struct tu_draw_info *draw)
3028 {
3029 const struct tu_pipeline *pipeline = cmd->state.pipeline;
3030 const struct tu_dynamic_state *dynamic = &cmd->state.dynamic;
3031 struct tu_draw_state_group draw_state_groups[TU_DRAW_STATE_COUNT];
3032 uint32_t draw_state_group_count = 0;
3033
3034 struct tu_descriptor_state *descriptors_state =
3035 &cmd->descriptors[VK_PIPELINE_BIND_POINT_GRAPHICS];
3036
3037 VkResult result = tu_cs_reserve_space(cmd->device, cs, 256);
3038 if (result != VK_SUCCESS)
3039 return result;
3040
3041 /* TODO lrz */
3042
3043 uint32_t pc_primitive_cntl = 0;
3044 if (pipeline->ia.primitive_restart && draw->indexed)
3045 pc_primitive_cntl |= A6XX_PC_PRIMITIVE_CNTL_0_PRIMITIVE_RESTART;
3046
3047 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9806, 0);
3048 tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9990, 0);
3049 tu_cs_emit_write_reg(cs, REG_A6XX_VFD_UNKNOWN_A008, 0);
3050
3051 tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_0, 1);
3052 tu_cs_emit(cs, pc_primitive_cntl);
3053
3054 if (cmd->state.dirty &
3055 (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH) &&
3056 (pipeline->dynamic_state.mask & TU_DYNAMIC_LINE_WIDTH)) {
3057 tu6_emit_gras_su_cntl(cs, pipeline->rast.gras_su_cntl,
3058 dynamic->line_width);
3059 }
3060
3061 if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK) &&
3062 (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_COMPARE_MASK)) {
3063 tu6_emit_stencil_compare_mask(cs, dynamic->stencil_compare_mask.front,
3064 dynamic->stencil_compare_mask.back);
3065 }
3066
3067 if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK) &&
3068 (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_WRITE_MASK)) {
3069 tu6_emit_stencil_write_mask(cs, dynamic->stencil_write_mask.front,
3070 dynamic->stencil_write_mask.back);
3071 }
3072
3073 if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE) &&
3074 (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_REFERENCE)) {
3075 tu6_emit_stencil_reference(cs, dynamic->stencil_reference.front,
3076 dynamic->stencil_reference.back);
3077 }
3078
3079 if (cmd->state.dirty &
3080 (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_VERTEX_BUFFERS)) {
3081 for (uint32_t i = 0; i < pipeline->vi.count; i++) {
3082 const uint32_t binding = pipeline->vi.bindings[i];
3083 const uint32_t stride = pipeline->vi.strides[i];
3084 const struct tu_buffer *buf = cmd->state.vb.buffers[binding];
3085 const VkDeviceSize offset = buf->bo_offset +
3086 cmd->state.vb.offsets[binding] +
3087 pipeline->vi.offsets[i];
3088 const VkDeviceSize size =
3089 offset < buf->bo->size ? buf->bo->size - offset : 0;
3090
3091 tu_cs_emit_pkt4(cs, REG_A6XX_VFD_FETCH(i), 4);
3092 tu_cs_emit_qw(cs, buf->bo->iova + offset);
3093 tu_cs_emit(cs, size);
3094 tu_cs_emit(cs, stride);
3095 }
3096 }
3097
3098 if (cmd->state.dirty & TU_CMD_DIRTY_PIPELINE) {
3099 draw_state_groups[draw_state_group_count++] =
3100 (struct tu_draw_state_group) {
3101 .id = TU_DRAW_STATE_PROGRAM,
3102 .enable_mask = 0x6,
3103 .ib = pipeline->program.state_ib,
3104 };
3105 draw_state_groups[draw_state_group_count++] =
3106 (struct tu_draw_state_group) {
3107 .id = TU_DRAW_STATE_PROGRAM_BINNING,
3108 .enable_mask = 0x1,
3109 .ib = pipeline->program.binning_state_ib,
3110 };
3111 draw_state_groups[draw_state_group_count++] =
3112 (struct tu_draw_state_group) {
3113 .id = TU_DRAW_STATE_VI,
3114 .enable_mask = 0x6,
3115 .ib = pipeline->vi.state_ib,
3116 };
3117 draw_state_groups[draw_state_group_count++] =
3118 (struct tu_draw_state_group) {
3119 .id = TU_DRAW_STATE_VI_BINNING,
3120 .enable_mask = 0x1,
3121 .ib = pipeline->vi.binning_state_ib,
3122 };
3123 draw_state_groups[draw_state_group_count++] =
3124 (struct tu_draw_state_group) {
3125 .id = TU_DRAW_STATE_VP,
3126 .enable_mask = 0x7,
3127 .ib = pipeline->vp.state_ib,
3128 };
3129 draw_state_groups[draw_state_group_count++] =
3130 (struct tu_draw_state_group) {
3131 .id = TU_DRAW_STATE_RAST,
3132 .enable_mask = 0x7,
3133 .ib = pipeline->rast.state_ib,
3134 };
3135 draw_state_groups[draw_state_group_count++] =
3136 (struct tu_draw_state_group) {
3137 .id = TU_DRAW_STATE_DS,
3138 .enable_mask = 0x7,
3139 .ib = pipeline->ds.state_ib,
3140 };
3141 draw_state_groups[draw_state_group_count++] =
3142 (struct tu_draw_state_group) {
3143 .id = TU_DRAW_STATE_BLEND,
3144 .enable_mask = 0x7,
3145 .ib = pipeline->blend.state_ib,
3146 };
3147 }
3148
3149 if (cmd->state.dirty &
3150 (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_DESCRIPTOR_SETS | TU_CMD_DIRTY_PUSH_CONSTANTS)) {
3151 draw_state_groups[draw_state_group_count++] =
3152 (struct tu_draw_state_group) {
3153 .id = TU_DRAW_STATE_VS_CONST,
3154 .enable_mask = 0x7,
3155 .ib = tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_VERTEX)
3156 };
3157 draw_state_groups[draw_state_group_count++] =
3158 (struct tu_draw_state_group) {
3159 .id = TU_DRAW_STATE_FS_CONST,
3160 .enable_mask = 0x6,
3161 .ib = tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_FRAGMENT)
3162 };
3163 }
3164
3165 if (cmd->state.dirty &
3166 (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_DESCRIPTOR_SETS)) {
3167 bool needs_border = false;
3168 struct tu_cs_entry vs_tex, fs_tex;
3169
3170 result = tu6_emit_textures(cmd, MESA_SHADER_VERTEX, &vs_tex, &needs_border);
3171 if (result != VK_SUCCESS)
3172 return result;
3173
3174 result = tu6_emit_textures(cmd, MESA_SHADER_FRAGMENT, &fs_tex, &needs_border);
3175 if (result != VK_SUCCESS)
3176 return result;
3177
3178 draw_state_groups[draw_state_group_count++] =
3179 (struct tu_draw_state_group) {
3180 .id = TU_DRAW_STATE_VS_TEX,
3181 .enable_mask = 0x7,
3182 .ib = vs_tex,
3183 };
3184 draw_state_groups[draw_state_group_count++] =
3185 (struct tu_draw_state_group) {
3186 .id = TU_DRAW_STATE_FS_TEX,
3187 .enable_mask = 0x6,
3188 .ib = fs_tex,
3189 };
3190 draw_state_groups[draw_state_group_count++] =
3191 (struct tu_draw_state_group) {
3192 .id = TU_DRAW_STATE_FS_IBO,
3193 .enable_mask = 0x6,
3194 .ib = tu6_emit_ibo(cmd->device, &cmd->draw_state, pipeline,
3195 descriptors_state, MESA_SHADER_FRAGMENT)
3196 };
3197
3198 if (needs_border) {
3199 result = tu6_emit_border_color(cmd, cs);
3200 if (result != VK_SUCCESS)
3201 return result;
3202 }
3203 }
3204
3205 tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * draw_state_group_count);
3206 for (uint32_t i = 0; i < draw_state_group_count; i++) {
3207 const struct tu_draw_state_group *group = &draw_state_groups[i];
3208
3209 uint32_t cp_set_draw_state =
3210 CP_SET_DRAW_STATE__0_COUNT(group->ib.size / 4) |
3211 CP_SET_DRAW_STATE__0_ENABLE_MASK(group->enable_mask) |
3212 CP_SET_DRAW_STATE__0_GROUP_ID(group->id);
3213 uint64_t iova;
3214 if (group->ib.size) {
3215 iova = group->ib.bo->iova + group->ib.offset;
3216 } else {
3217 cp_set_draw_state |= CP_SET_DRAW_STATE__0_DISABLE;
3218 iova = 0;
3219 }
3220
3221 tu_cs_emit(cs, cp_set_draw_state);
3222 tu_cs_emit_qw(cs, iova);
3223 }
3224
3225 tu_cs_sanity_check(cs);
3226
3227 /* track BOs */
3228 if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) {
3229 for (uint32_t i = 0; i < MAX_VBS; i++) {
3230 const struct tu_buffer *buf = cmd->state.vb.buffers[i];
3231 if (buf)
3232 tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ);
3233 }
3234 }
3235 if (cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) {
3236 unsigned i;
3237 for_each_bit(i, descriptors_state->valid) {
3238 struct tu_descriptor_set *set = descriptors_state->sets[i];
3239 for (unsigned j = 0; j < set->layout->buffer_count; ++j)
3240 if (set->descriptors[j]) {
3241 tu_bo_list_add(&cmd->bo_list, set->descriptors[j],
3242 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
3243 }
3244 }
3245 }
3246
3247 /* Fragment shader state overwrites compute shader state, so flag the
3248 * compute pipeline for re-emit.
3249 */
3250 cmd->state.dirty = TU_CMD_DIRTY_COMPUTE_PIPELINE;
3251 return VK_SUCCESS;
3252 }
3253
3254 static void
3255 tu6_emit_draw_direct(struct tu_cmd_buffer *cmd,
3256 struct tu_cs *cs,
3257 const struct tu_draw_info *draw)
3258 {
3259
3260 const enum pc_di_primtype primtype = cmd->state.pipeline->ia.primtype;
3261
3262 tu_cs_emit_pkt4(cs, REG_A6XX_VFD_INDEX_OFFSET, 2);
3263 tu_cs_emit(cs, draw->vertex_offset);
3264 tu_cs_emit(cs, draw->first_instance);
3265
3266 /* TODO hw binning */
3267 if (draw->indexed) {
3268 const enum a4xx_index_size index_size =
3269 tu6_index_size(cmd->state.index_type);
3270 const uint32_t index_bytes =
3271 (cmd->state.index_type == VK_INDEX_TYPE_UINT32) ? 4 : 2;
3272 const struct tu_buffer *buf = cmd->state.index_buffer;
3273 const VkDeviceSize offset = buf->bo_offset + cmd->state.index_offset +
3274 index_bytes * draw->first_index;
3275 const uint32_t size = index_bytes * draw->count;
3276
3277 const uint32_t cp_draw_indx =
3278 CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) |
3279 CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_DMA) |
3280 CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(index_size) |
3281 CP_DRAW_INDX_OFFSET_0_VIS_CULL(USE_VISIBILITY) | 0x2000;
3282
3283 tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 7);
3284 tu_cs_emit(cs, cp_draw_indx);
3285 tu_cs_emit(cs, draw->instance_count);
3286 tu_cs_emit(cs, draw->count);
3287 tu_cs_emit(cs, 0x0); /* XXX */
3288 tu_cs_emit_qw(cs, buf->bo->iova + offset);
3289 tu_cs_emit(cs, size);
3290 } else {
3291 const uint32_t cp_draw_indx =
3292 CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) |
3293 CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX) |
3294 CP_DRAW_INDX_OFFSET_0_VIS_CULL(USE_VISIBILITY) | 0x2000;
3295
3296 tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 3);
3297 tu_cs_emit(cs, cp_draw_indx);
3298 tu_cs_emit(cs, draw->instance_count);
3299 tu_cs_emit(cs, draw->count);
3300 }
3301 }
3302
3303 static void
3304 tu_draw(struct tu_cmd_buffer *cmd, const struct tu_draw_info *draw)
3305 {
3306 struct tu_cs *cs = &cmd->draw_cs;
3307 VkResult result;
3308
3309 result = tu6_bind_draw_states(cmd, cs, draw);
3310 if (result != VK_SUCCESS) {
3311 cmd->record_result = result;
3312 return;
3313 }
3314
3315 result = tu_cs_reserve_space(cmd->device, cs, 32);
3316 if (result != VK_SUCCESS) {
3317 cmd->record_result = result;
3318 return;
3319 }
3320
3321 if (draw->indirect) {
3322 tu_finishme("indirect draw");
3323 return;
3324 }
3325
3326 /* TODO tu6_emit_marker should pick different regs depending on cs */
3327
3328 tu6_emit_marker(cmd, cs);
3329 tu6_emit_draw_direct(cmd, cs, draw);
3330 tu6_emit_marker(cmd, cs);
3331
3332 cmd->wait_for_idle = true;
3333
3334 tu_cs_sanity_check(cs);
3335 }
3336
3337 void
3338 tu_CmdDraw(VkCommandBuffer commandBuffer,
3339 uint32_t vertexCount,
3340 uint32_t instanceCount,
3341 uint32_t firstVertex,
3342 uint32_t firstInstance)
3343 {
3344 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3345 struct tu_draw_info info = {};
3346
3347 info.count = vertexCount;
3348 info.instance_count = instanceCount;
3349 info.first_instance = firstInstance;
3350 info.vertex_offset = firstVertex;
3351
3352 tu_draw(cmd_buffer, &info);
3353 }
3354
3355 void
3356 tu_CmdDrawIndexed(VkCommandBuffer commandBuffer,
3357 uint32_t indexCount,
3358 uint32_t instanceCount,
3359 uint32_t firstIndex,
3360 int32_t vertexOffset,
3361 uint32_t firstInstance)
3362 {
3363 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3364 struct tu_draw_info info = {};
3365
3366 info.indexed = true;
3367 info.count = indexCount;
3368 info.instance_count = instanceCount;
3369 info.first_index = firstIndex;
3370 info.vertex_offset = vertexOffset;
3371 info.first_instance = firstInstance;
3372
3373 tu_draw(cmd_buffer, &info);
3374 }
3375
3376 void
3377 tu_CmdDrawIndirect(VkCommandBuffer commandBuffer,
3378 VkBuffer _buffer,
3379 VkDeviceSize offset,
3380 uint32_t drawCount,
3381 uint32_t stride)
3382 {
3383 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3384 TU_FROM_HANDLE(tu_buffer, buffer, _buffer);
3385 struct tu_draw_info info = {};
3386
3387 info.count = drawCount;
3388 info.indirect = buffer;
3389 info.indirect_offset = offset;
3390 info.stride = stride;
3391
3392 tu_draw(cmd_buffer, &info);
3393 }
3394
3395 void
3396 tu_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
3397 VkBuffer _buffer,
3398 VkDeviceSize offset,
3399 uint32_t drawCount,
3400 uint32_t stride)
3401 {
3402 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3403 TU_FROM_HANDLE(tu_buffer, buffer, _buffer);
3404 struct tu_draw_info info = {};
3405
3406 info.indexed = true;
3407 info.count = drawCount;
3408 info.indirect = buffer;
3409 info.indirect_offset = offset;
3410 info.stride = stride;
3411
3412 tu_draw(cmd_buffer, &info);
3413 }
3414
3415 struct tu_dispatch_info
3416 {
3417 /**
3418 * Determine the layout of the grid (in block units) to be used.
3419 */
3420 uint32_t blocks[3];
3421
3422 /**
3423 * A starting offset for the grid. If unaligned is set, the offset
3424 * must still be aligned.
3425 */
3426 uint32_t offsets[3];
3427 /**
3428 * Whether it's an unaligned compute dispatch.
3429 */
3430 bool unaligned;
3431
3432 /**
3433 * Indirect compute parameters resource.
3434 */
3435 struct tu_buffer *indirect;
3436 uint64_t indirect_offset;
3437 };
3438
3439 static void
3440 tu_emit_compute_driver_params(struct tu_cs *cs, struct tu_pipeline *pipeline,
3441 const struct tu_dispatch_info *info)
3442 {
3443 gl_shader_stage type = MESA_SHADER_COMPUTE;
3444 const struct tu_program_descriptor_linkage *link =
3445 &pipeline->program.link[type];
3446 const struct ir3_const_state *const_state = &link->const_state;
3447 uint32_t offset_dwords = const_state->offsets.driver_param;
3448
3449 if (link->constlen <= offset_dwords)
3450 return;
3451
3452 if (!info->indirect) {
3453 uint32_t driver_params[] = {
3454 info->blocks[0],
3455 info->blocks[1],
3456 info->blocks[2],
3457 pipeline->compute.local_size[0],
3458 pipeline->compute.local_size[1],
3459 pipeline->compute.local_size[2],
3460 };
3461 uint32_t num_consts = MIN2(const_state->num_driver_params,
3462 link->constlen - offset_dwords);
3463 uint32_t align_size = align(num_consts, 4);
3464
3465 /* push constants */
3466 tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + align_size);
3467 tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(offset_dwords / 4) |
3468 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
3469 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
3470 CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
3471 CP_LOAD_STATE6_0_NUM_UNIT(align_size / 4));
3472 tu_cs_emit(cs, 0);
3473 tu_cs_emit(cs, 0);
3474 uint32_t i;
3475 for (i = 0; i < num_consts; i++)
3476 tu_cs_emit(cs, driver_params[i]);
3477 for (; i < align_size; i++)
3478 tu_cs_emit(cs, 0);
3479 } else {
3480 tu_finishme("Indirect driver params");
3481 }
3482 }
3483
3484 static void
3485 tu_dispatch(struct tu_cmd_buffer *cmd,
3486 const struct tu_dispatch_info *info)
3487 {
3488 struct tu_cs *cs = &cmd->cs;
3489 struct tu_pipeline *pipeline = cmd->state.compute_pipeline;
3490 struct tu_descriptor_state *descriptors_state =
3491 &cmd->descriptors[VK_PIPELINE_BIND_POINT_COMPUTE];
3492
3493 VkResult result = tu_cs_reserve_space(cmd->device, cs, 256);
3494 if (result != VK_SUCCESS) {
3495 cmd->record_result = result;
3496 return;
3497 }
3498
3499 if (cmd->state.dirty & TU_CMD_DIRTY_COMPUTE_PIPELINE)
3500 tu_cs_emit_ib(cs, &pipeline->program.state_ib);
3501
3502 struct tu_cs_entry ib;
3503
3504 ib = tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_COMPUTE);
3505 if (ib.size)
3506 tu_cs_emit_ib(cs, &ib);
3507
3508 tu_emit_compute_driver_params(cs, pipeline, info);
3509
3510 bool needs_border;
3511 result = tu6_emit_textures(cmd, MESA_SHADER_COMPUTE, &ib, &needs_border);
3512 if (result != VK_SUCCESS) {
3513 cmd->record_result = result;
3514 return;
3515 }
3516
3517 if (ib.size)
3518 tu_cs_emit_ib(cs, &ib);
3519
3520 if (needs_border)
3521 tu6_emit_border_color(cmd, cs);
3522
3523 ib = tu6_emit_ibo(cmd->device, &cmd->draw_state, pipeline,
3524 descriptors_state, MESA_SHADER_COMPUTE);
3525 if (ib.size)
3526 tu_cs_emit_ib(cs, &ib);
3527
3528 /* track BOs */
3529 if (cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) {
3530 unsigned i;
3531 for_each_bit(i, descriptors_state->valid) {
3532 struct tu_descriptor_set *set = descriptors_state->sets[i];
3533 for (unsigned j = 0; j < set->layout->buffer_count; ++j)
3534 if (set->descriptors[j]) {
3535 tu_bo_list_add(&cmd->bo_list, set->descriptors[j],
3536 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
3537 }
3538 }
3539 }
3540
3541 /* Compute shader state overwrites fragment shader state, so we flag the
3542 * graphics pipeline for re-emit.
3543 */
3544 cmd->state.dirty = TU_CMD_DIRTY_PIPELINE;
3545
3546 tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
3547 tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(0x8));
3548
3549 const uint32_t *local_size = pipeline->compute.local_size;
3550 const uint32_t *num_groups = info->blocks;
3551 tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CS_NDRANGE_0, 7);
3552 tu_cs_emit(cs,
3553 A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM(3) |
3554 A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(local_size[0] - 1) |
3555 A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(local_size[1] - 1) |
3556 A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(local_size[2] - 1));
3557 tu_cs_emit(cs, A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(local_size[0] * num_groups[0]));
3558 tu_cs_emit(cs, 0); /* HLSQ_CS_NDRANGE_2_GLOBALOFF_X */
3559 tu_cs_emit(cs, A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(local_size[1] * num_groups[1]));
3560 tu_cs_emit(cs, 0); /* HLSQ_CS_NDRANGE_4_GLOBALOFF_Y */
3561 tu_cs_emit(cs, A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(local_size[2] * num_groups[2]));
3562 tu_cs_emit(cs, 0); /* HLSQ_CS_NDRANGE_6_GLOBALOFF_Z */
3563
3564 tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CS_KERNEL_GROUP_X, 3);
3565 tu_cs_emit(cs, 1); /* HLSQ_CS_KERNEL_GROUP_X */
3566 tu_cs_emit(cs, 1); /* HLSQ_CS_KERNEL_GROUP_Y */
3567 tu_cs_emit(cs, 1); /* HLSQ_CS_KERNEL_GROUP_Z */
3568
3569 if (info->indirect) {
3570 uint64_t iova = tu_buffer_iova(info->indirect) + info->indirect_offset;
3571
3572 tu_bo_list_add(&cmd->bo_list, info->indirect->bo,
3573 MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
3574
3575 tu_cs_emit_pkt7(cs, CP_EXEC_CS_INDIRECT, 4);
3576 tu_cs_emit(cs, 0x00000000);
3577 tu_cs_emit_qw(cs, iova);
3578 tu_cs_emit(cs,
3579 A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX(local_size[0] - 1) |
3580 A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY(local_size[1] - 1) |
3581 A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ(local_size[2] - 1));
3582 } else {
3583 tu_cs_emit_pkt7(cs, CP_EXEC_CS, 4);
3584 tu_cs_emit(cs, 0x00000000);
3585 tu_cs_emit(cs, CP_EXEC_CS_1_NGROUPS_X(info->blocks[0]));
3586 tu_cs_emit(cs, CP_EXEC_CS_2_NGROUPS_Y(info->blocks[1]));
3587 tu_cs_emit(cs, CP_EXEC_CS_3_NGROUPS_Z(info->blocks[2]));
3588 }
3589
3590 tu_cs_emit_wfi(cs);
3591
3592 tu6_emit_cache_flush(cmd, cs);
3593 }
3594
3595 void
3596 tu_CmdDispatchBase(VkCommandBuffer commandBuffer,
3597 uint32_t base_x,
3598 uint32_t base_y,
3599 uint32_t base_z,
3600 uint32_t x,
3601 uint32_t y,
3602 uint32_t z)
3603 {
3604 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3605 struct tu_dispatch_info info = {};
3606
3607 info.blocks[0] = x;
3608 info.blocks[1] = y;
3609 info.blocks[2] = z;
3610
3611 info.offsets[0] = base_x;
3612 info.offsets[1] = base_y;
3613 info.offsets[2] = base_z;
3614 tu_dispatch(cmd_buffer, &info);
3615 }
3616
3617 void
3618 tu_CmdDispatch(VkCommandBuffer commandBuffer,
3619 uint32_t x,
3620 uint32_t y,
3621 uint32_t z)
3622 {
3623 tu_CmdDispatchBase(commandBuffer, 0, 0, 0, x, y, z);
3624 }
3625
3626 void
3627 tu_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
3628 VkBuffer _buffer,
3629 VkDeviceSize offset)
3630 {
3631 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3632 TU_FROM_HANDLE(tu_buffer, buffer, _buffer);
3633 struct tu_dispatch_info info = {};
3634
3635 info.indirect = buffer;
3636 info.indirect_offset = offset;
3637
3638 tu_dispatch(cmd_buffer, &info);
3639 }
3640
3641 void
3642 tu_CmdEndRenderPass(VkCommandBuffer commandBuffer)
3643 {
3644 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3645
3646 tu_cs_end(&cmd_buffer->draw_cs);
3647
3648 tu_cmd_render_tiles(cmd_buffer);
3649
3650 /* discard draw_cs entries now that the tiles are rendered */
3651 tu_cs_discard_entries(&cmd_buffer->draw_cs);
3652 tu_cs_begin(&cmd_buffer->draw_cs);
3653
3654 vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
3655 cmd_buffer->state.attachments = NULL;
3656
3657 cmd_buffer->state.pass = NULL;
3658 cmd_buffer->state.subpass = NULL;
3659 cmd_buffer->state.framebuffer = NULL;
3660 }
3661
3662 void
3663 tu_CmdEndRenderPass2KHR(VkCommandBuffer commandBuffer,
3664 const VkSubpassEndInfoKHR *pSubpassEndInfo)
3665 {
3666 tu_CmdEndRenderPass(commandBuffer);
3667 }
3668
3669 struct tu_barrier_info
3670 {
3671 uint32_t eventCount;
3672 const VkEvent *pEvents;
3673 VkPipelineStageFlags srcStageMask;
3674 };
3675
3676 static void
3677 tu_barrier(struct tu_cmd_buffer *cmd_buffer,
3678 uint32_t memoryBarrierCount,
3679 const VkMemoryBarrier *pMemoryBarriers,
3680 uint32_t bufferMemoryBarrierCount,
3681 const VkBufferMemoryBarrier *pBufferMemoryBarriers,
3682 uint32_t imageMemoryBarrierCount,
3683 const VkImageMemoryBarrier *pImageMemoryBarriers,
3684 const struct tu_barrier_info *info)
3685 {
3686 }
3687
3688 void
3689 tu_CmdPipelineBarrier(VkCommandBuffer commandBuffer,
3690 VkPipelineStageFlags srcStageMask,
3691 VkPipelineStageFlags destStageMask,
3692 VkBool32 byRegion,
3693 uint32_t memoryBarrierCount,
3694 const VkMemoryBarrier *pMemoryBarriers,
3695 uint32_t bufferMemoryBarrierCount,
3696 const VkBufferMemoryBarrier *pBufferMemoryBarriers,
3697 uint32_t imageMemoryBarrierCount,
3698 const VkImageMemoryBarrier *pImageMemoryBarriers)
3699 {
3700 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3701 struct tu_barrier_info info;
3702
3703 info.eventCount = 0;
3704 info.pEvents = NULL;
3705 info.srcStageMask = srcStageMask;
3706
3707 tu_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers,
3708 bufferMemoryBarrierCount, pBufferMemoryBarriers,
3709 imageMemoryBarrierCount, pImageMemoryBarriers, &info);
3710 }
3711
3712 static void
3713 write_event(struct tu_cmd_buffer *cmd_buffer,
3714 struct tu_event *event,
3715 VkPipelineStageFlags stageMask,
3716 unsigned value)
3717 {
3718 }
3719
3720 void
3721 tu_CmdSetEvent(VkCommandBuffer commandBuffer,
3722 VkEvent _event,
3723 VkPipelineStageFlags stageMask)
3724 {
3725 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3726 TU_FROM_HANDLE(tu_event, event, _event);
3727
3728 write_event(cmd_buffer, event, stageMask, 1);
3729 }
3730
3731 void
3732 tu_CmdResetEvent(VkCommandBuffer commandBuffer,
3733 VkEvent _event,
3734 VkPipelineStageFlags stageMask)
3735 {
3736 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3737 TU_FROM_HANDLE(tu_event, event, _event);
3738
3739 write_event(cmd_buffer, event, stageMask, 0);
3740 }
3741
3742 void
3743 tu_CmdWaitEvents(VkCommandBuffer commandBuffer,
3744 uint32_t eventCount,
3745 const VkEvent *pEvents,
3746 VkPipelineStageFlags srcStageMask,
3747 VkPipelineStageFlags dstStageMask,
3748 uint32_t memoryBarrierCount,
3749 const VkMemoryBarrier *pMemoryBarriers,
3750 uint32_t bufferMemoryBarrierCount,
3751 const VkBufferMemoryBarrier *pBufferMemoryBarriers,
3752 uint32_t imageMemoryBarrierCount,
3753 const VkImageMemoryBarrier *pImageMemoryBarriers)
3754 {
3755 TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
3756 struct tu_barrier_info info;
3757
3758 info.eventCount = eventCount;
3759 info.pEvents = pEvents;
3760 info.srcStageMask = 0;
3761
3762 tu_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers,
3763 bufferMemoryBarrierCount, pBufferMemoryBarriers,
3764 imageMemoryBarrierCount, pImageMemoryBarriers, &info);
3765 }
3766
3767 void
3768 tu_CmdSetDeviceMask(VkCommandBuffer commandBuffer, uint32_t deviceMask)
3769 {
3770 /* No-op */
3771 }