tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, need_seqno ? 4 : 1);
tu_cs_emit(cs, CP_EVENT_WRITE_0_EVENT(event));
if (need_seqno) {
- tu_cs_emit_qw(cs, cmd->scratch_bo.iova);
+ tu_cs_emit_qw(cs, global_iova(cmd, seqno_dummy));
tu_cs_emit(cs, 0);
}
}
tu_cs_emit_pkt7(cs, CP_SET_MODE, 1);
tu_cs_emit(cs, 0x0);
- tu_cs_emit_pkt7(cs, CP_SET_BIN_DATA5, 7);
+ tu_cs_emit_pkt7(cs, CP_SET_BIN_DATA5_OFFSET, 4);
tu_cs_emit(cs, fb->pipe_sizes[pipe] |
CP_SET_BIN_DATA5_0_VSC_N(slot));
- tu_cs_emit_qw(cs, cmd->vsc_draw_strm.iova + pipe * cmd->vsc_draw_strm_pitch);
- tu_cs_emit_qw(cs, cmd->vsc_draw_strm.iova + pipe * 4 + 32 * cmd->vsc_draw_strm_pitch);
- tu_cs_emit_qw(cs, cmd->vsc_prim_strm.iova + pipe * cmd->vsc_prim_strm_pitch);
+ tu_cs_emit(cs, pipe * cmd->vsc_draw_strm_pitch);
+ tu_cs_emit(cs, pipe * 4);
+ tu_cs_emit(cs, pipe * cmd->vsc_prim_strm_pitch);
tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
tu_cs_emit(cs, 0x0);
static void
tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
{
- const struct tu_physical_device *phys_dev = cmd->device->physical_device;
+ struct tu_device *dev = cmd->device;
+ const struct tu_physical_device *phys_dev = dev->physical_device;
tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
A6XX_RB_LRZ_CNTL(0));
tu_cs_emit_regs(cs,
- A6XX_SP_TP_BORDER_COLOR_BASE_ADDR(.bo = &cmd->device->border_color));
+ A6XX_SP_TP_BORDER_COLOR_BASE_ADDR(.bo = &dev->global_bo,
+ .bo_offset = gb_offset(border_color)));
tu_cs_emit_regs(cs,
- A6XX_SP_PS_TP_BORDER_COLOR_BASE_ADDR(.bo = &cmd->device->border_color));
+ A6XX_SP_PS_TP_BORDER_COLOR_BASE_ADDR(.bo = &dev->global_bo,
+ .bo_offset = gb_offset(border_color)));
+
+ /* VSC buffers:
+ * use vsc pitches from the largest values used so far with this device
+ * if there hasn't been overflow, there will already be a scratch bo
+ * allocated for these sizes
+ *
+ * if overflow is detected, the stream size is increased by 2x
+ */
+ mtx_lock(&dev->vsc_pitch_mtx);
+
+ struct tu6_global *global = dev->global_bo.map;
+
+ uint32_t vsc_draw_overflow = global->vsc_draw_overflow;
+ uint32_t vsc_prim_overflow = global->vsc_prim_overflow;
+
+ if (vsc_draw_overflow >= dev->vsc_draw_strm_pitch)
+ dev->vsc_draw_strm_pitch = (dev->vsc_draw_strm_pitch - VSC_PAD) * 2 + VSC_PAD;
+
+ if (vsc_prim_overflow >= dev->vsc_prim_strm_pitch)
+ dev->vsc_prim_strm_pitch = (dev->vsc_prim_strm_pitch - VSC_PAD) * 2 + VSC_PAD;
+
+ cmd->vsc_prim_strm_pitch = dev->vsc_prim_strm_pitch;
+ cmd->vsc_draw_strm_pitch = dev->vsc_draw_strm_pitch;
+
+ mtx_unlock(&dev->vsc_pitch_mtx);
+
+ struct tu_bo *vsc_bo;
+ uint32_t size0 = cmd->vsc_prim_strm_pitch * MAX_VSC_PIPES +
+ cmd->vsc_draw_strm_pitch * MAX_VSC_PIPES;
+
+ tu_get_scratch_bo(dev, size0 + MAX_VSC_PIPES * 4, &vsc_bo);
+
+ tu_cs_emit_regs(cs,
+ A6XX_VSC_DRAW_STRM_SIZE_ADDRESS(.bo = vsc_bo, .bo_offset = size0));
+ tu_cs_emit_regs(cs,
+ A6XX_VSC_PRIM_STRM_ADDRESS(.bo = vsc_bo));
+ tu_cs_emit_regs(cs,
+ A6XX_VSC_DRAW_STRM_ADDRESS(.bo = vsc_bo,
+ .bo_offset = cmd->vsc_prim_strm_pitch * MAX_VSC_PIPES));
+
+ tu_bo_list_add(&cmd->bo_list, vsc_bo, MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
tu_cs_sanity_check(cs);
}
tu_cs_emit_regs(cs,
A6XX_VSC_BIN_SIZE(.width = fb->tile0.width,
- .height = fb->tile0.height),
- A6XX_VSC_DRAW_STRM_SIZE_ADDRESS(.bo = &cmd->vsc_draw_strm,
- .bo_offset = 32 * cmd->vsc_draw_strm_pitch));
+ .height = fb->tile0.height));
tu_cs_emit_regs(cs,
A6XX_VSC_BIN_COUNT(.nx = fb->tile_count.width,
tu_cs_emit_array(cs, fb->pipe_config, 32);
tu_cs_emit_regs(cs,
- A6XX_VSC_PRIM_STRM_ADDRESS(.bo = &cmd->vsc_prim_strm),
A6XX_VSC_PRIM_STRM_PITCH(cmd->vsc_prim_strm_pitch),
- A6XX_VSC_PRIM_STRM_LIMIT(cmd->vsc_prim_strm_pitch - 64));
+ A6XX_VSC_PRIM_STRM_LIMIT(cmd->vsc_prim_strm_pitch - VSC_PAD));
tu_cs_emit_regs(cs,
- A6XX_VSC_DRAW_STRM_ADDRESS(.bo = &cmd->vsc_draw_strm),
A6XX_VSC_DRAW_STRM_PITCH(cmd->vsc_draw_strm_pitch),
- A6XX_VSC_DRAW_STRM_LIMIT(cmd->vsc_draw_strm_pitch - 64));
+ A6XX_VSC_DRAW_STRM_LIMIT(cmd->vsc_draw_strm_pitch - VSC_PAD));
}
static void
const uint32_t used_pipe_count =
fb->pipe_count.width * fb->pipe_count.height;
- /* Clear vsc_scratch: */
- tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 3);
- tu_cs_emit_qw(cs, cmd->scratch_bo.iova + ctrl_offset(vsc_scratch));
- tu_cs_emit(cs, 0x0);
-
- /* Check for overflow, write vsc_scratch if detected: */
for (int i = 0; i < used_pipe_count; i++) {
tu_cs_emit_pkt7(cs, CP_COND_WRITE5, 8);
tu_cs_emit(cs, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
CP_COND_WRITE5_0_WRITE_MEMORY);
tu_cs_emit(cs, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_DRAW_STRM_SIZE_REG(i)));
tu_cs_emit(cs, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
- tu_cs_emit(cs, CP_COND_WRITE5_3_REF(cmd->vsc_draw_strm_pitch - 64));
+ tu_cs_emit(cs, CP_COND_WRITE5_3_REF(cmd->vsc_draw_strm_pitch - VSC_PAD));
tu_cs_emit(cs, CP_COND_WRITE5_4_MASK(~0));
- tu_cs_emit_qw(cs, cmd->scratch_bo.iova + ctrl_offset(vsc_scratch));
- tu_cs_emit(cs, CP_COND_WRITE5_7_WRITE_DATA(1 + cmd->vsc_draw_strm_pitch));
+ tu_cs_emit_qw(cs, global_iova(cmd, vsc_draw_overflow));
+ tu_cs_emit(cs, CP_COND_WRITE5_7_WRITE_DATA(cmd->vsc_draw_strm_pitch));
tu_cs_emit_pkt7(cs, CP_COND_WRITE5, 8);
tu_cs_emit(cs, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
CP_COND_WRITE5_0_WRITE_MEMORY);
tu_cs_emit(cs, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_PRIM_STRM_SIZE_REG(i)));
tu_cs_emit(cs, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
- tu_cs_emit(cs, CP_COND_WRITE5_3_REF(cmd->vsc_prim_strm_pitch - 64));
+ tu_cs_emit(cs, CP_COND_WRITE5_3_REF(cmd->vsc_prim_strm_pitch - VSC_PAD));
tu_cs_emit(cs, CP_COND_WRITE5_4_MASK(~0));
- tu_cs_emit_qw(cs, cmd->scratch_bo.iova + ctrl_offset(vsc_scratch));
- tu_cs_emit(cs, CP_COND_WRITE5_7_WRITE_DATA(3 + cmd->vsc_prim_strm_pitch));
+ tu_cs_emit_qw(cs, global_iova(cmd, vsc_prim_overflow));
+ tu_cs_emit(cs, CP_COND_WRITE5_7_WRITE_DATA(cmd->vsc_prim_strm_pitch));
}
tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
{
const struct tu_framebuffer *fb = cmd->state.framebuffer;
- if (use_hw_binning(cmd))
- cmd->use_vsc_data = true;
-
tu6_tile_render_begin(cmd, &cmd->cs);
uint32_t pipe = 0;
list_inithead(&cmd_buffer->upload.list);
- VkResult result = tu_bo_init_new(device, &cmd_buffer->scratch_bo, 0x1000);
- if (result != VK_SUCCESS)
- goto fail_scratch_bo;
-
- /* TODO: resize on overflow */
- cmd_buffer->vsc_draw_strm_pitch = device->vsc_draw_strm_pitch;
- cmd_buffer->vsc_prim_strm_pitch = device->vsc_prim_strm_pitch;
- cmd_buffer->vsc_draw_strm = device->vsc_draw_strm;
- cmd_buffer->vsc_prim_strm = device->vsc_prim_strm;
-
return VK_SUCCESS;
-
-fail_scratch_bo:
- list_del(&cmd_buffer->pool_link);
- return result;
}
static void
tu_cmd_buffer_destroy(struct tu_cmd_buffer *cmd_buffer)
{
- tu_bo_finish(cmd_buffer->device, &cmd_buffer->scratch_bo);
-
list_del(&cmd_buffer->pool_link);
tu_cs_finish(&cmd_buffer->cs);
for (uint32_t i = 0; i < IR3_MAX_SO_BUFFERS; i++) {
/* note: FLUSH_BASE is always the same, so it could go in init_hw()? */
tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_FLUSH_BASE(i), 2);
- tu_cs_emit_qw(cs, cmd->scratch_bo.iova + ctrl_offset(flush_base[i]));
+ tu_cs_emit_qw(cs, global_iova(cmd, flush_base[i]));
tu6_emit_event_write(cmd, cs, FLUSH_SO_0 + i);
}
0x40000 | /* ??? */
CP_MEM_TO_REG_0_UNK31 |
CP_MEM_TO_REG_0_CNT(1));
- tu_cs_emit_qw(cs, cmd->scratch_bo.iova + ctrl_offset(flush_base[idx]));
+ tu_cs_emit_qw(cs, global_iova(cmd, flush_base[idx]));
if (offset) {
tu_cs_emit_pkt7(cs, CP_REG_RMW, 3);
tu_emit_cache_flush(cmd_buffer, &cmd_buffer->cs);
}
- tu_bo_list_add(&cmd_buffer->bo_list, &cmd_buffer->scratch_bo,
- MSM_SUBMIT_BO_WRITE);
-
- if (cmd_buffer->use_vsc_data) {
- tu_bo_list_add(&cmd_buffer->bo_list, &cmd_buffer->vsc_draw_strm,
- MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
- tu_bo_list_add(&cmd_buffer->bo_list, &cmd_buffer->vsc_prim_strm,
- MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
- }
-
- tu_bo_list_add(&cmd_buffer->bo_list, &cmd_buffer->device->border_color,
- MSM_SUBMIT_BO_READ);
+ tu_bo_list_add(&cmd_buffer->bo_list, &cmd_buffer->device->global_bo,
+ MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
for (uint32_t i = 0; i < cmd_buffer->draw_cs.bo_count; i++) {
tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->draw_cs.bos[i],
},
};
-
VkResult
tu_CreateDevice(VkPhysicalDevice physicalDevice,
const VkDeviceCreateInfo *pCreateInfo,
if (!device->compiler)
goto fail_queues;
-#define VSC_DRAW_STRM_SIZE(pitch) ((pitch) * 32 + 0x100) /* extra size to store VSC_SIZE */
-#define VSC_PRIM_STRM_SIZE(pitch) ((pitch) * 32)
-
- device->vsc_draw_strm_pitch = 0x440 * 4;
- device->vsc_prim_strm_pitch = 0x1040 * 4;
-
- result = tu_bo_init_new(device, &device->vsc_draw_strm, VSC_DRAW_STRM_SIZE(device->vsc_draw_strm_pitch));
- if (result != VK_SUCCESS)
- goto fail_vsc_data;
-
- result = tu_bo_init_new(device, &device->vsc_prim_strm, VSC_PRIM_STRM_SIZE(device->vsc_prim_strm_pitch));
- if (result != VK_SUCCESS)
- goto fail_vsc_data2;
+ /* initial sizes, these will increase if there is overflow */
+ device->vsc_draw_strm_pitch = 0x1000 + VSC_PAD;
+ device->vsc_prim_strm_pitch = 0x4000 + VSC_PAD;
- STATIC_ASSERT(sizeof(struct bcolor_entry) == 128);
- result = tu_bo_init_new(device, &device->border_color, sizeof(border_color));
+ STATIC_ASSERT(sizeof(border_color) == sizeof(((struct tu6_global*) 0)->border_color));
+ result = tu_bo_init_new(device, &device->global_bo, sizeof(struct tu6_global));
if (result != VK_SUCCESS)
- goto fail_border_color;
+ goto fail_global_bo;
- result = tu_bo_map(device, &device->border_color);
+ result = tu_bo_map(device, &device->global_bo);
if (result != VK_SUCCESS)
- goto fail_border_color_map;
+ goto fail_global_bo_map;
- memcpy(device->border_color.map, border_color, sizeof(border_color));
+ memcpy(device->global_bo.map + gb_offset(border_color), border_color, sizeof(border_color));
VkPipelineCacheCreateInfo ci;
ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++)
mtx_init(&device->scratch_bos[i].construct_mtx, mtx_plain);
+ mtx_init(&device->vsc_pitch_mtx, mtx_plain);
+
*pDevice = tu_device_to_handle(device);
return VK_SUCCESS;
fail_pipeline_cache:
-fail_border_color_map:
- tu_bo_finish(device, &device->border_color);
-
-fail_border_color:
- tu_bo_finish(device, &device->vsc_prim_strm);
+fail_global_bo_map:
+ tu_bo_finish(device, &device->global_bo);
-fail_vsc_data2:
- tu_bo_finish(device, &device->vsc_draw_strm);
-
-fail_vsc_data:
+fail_global_bo:
ralloc_free(device->compiler);
fail_queues:
if (!device)
return;
- tu_bo_finish(device, &device->vsc_draw_strm);
- tu_bo_finish(device, &device->vsc_prim_strm);
-
for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
for (unsigned q = 0; q < device->queue_count[i]; q++)
tu_queue_finish(&device->queues[i][q]);
void *map;
};
+/* This struct defines the layout of the global_bo */
+struct tu6_global
+{
+ /* 6 bcolor_entry entries, one for each VK_BORDER_COLOR */
+ uint8_t border_color[128 * 6];
+
+ uint32_t seqno_dummy; /* dummy seqno for CP_EVENT_WRITE */
+ uint32_t _pad0;
+ volatile uint32_t vsc_draw_overflow;
+ uint32_t _pad1;
+ volatile uint32_t vsc_prim_overflow;
+ uint32_t _pad2[3];
+
+ /* scratch space for VPC_SO[i].FLUSH_BASE_LO/HI, start on 32 byte boundary. */
+ struct {
+ uint32_t offset;
+ uint32_t pad[7];
+ } flush_base[4];
+};
+#define gb_offset(member) offsetof(struct tu6_global, member)
+#define global_iova(cmd, member) ((cmd)->device->global_bo.iova + gb_offset(member))
+
+/* extra space in vsc draw/prim streams */
+#define VSC_PAD 0x40
+
struct tu_device
{
VK_LOADER_DATA _loader_data;
/* Backup in-memory cache to be used if the app doesn't provide one */
struct tu_pipeline_cache *mem_cache;
- struct tu_bo vsc_draw_strm;
- struct tu_bo vsc_prim_strm;
- uint32_t vsc_draw_strm_pitch;
- uint32_t vsc_prim_strm_pitch;
-
#define MIN_SCRATCH_BO_SIZE_LOG2 12 /* A page */
/* Currently the kernel driver uses a 32-bit GPU address space, but it
bool initialized;
} scratch_bos[48 - MIN_SCRATCH_BO_SIZE_LOG2];
- struct tu_bo border_color;
+ struct tu_bo global_bo;
struct tu_device_extension_table enabled_extensions;
+
+ uint32_t vsc_draw_strm_pitch;
+ uint32_t vsc_prim_strm_pitch;
+ mtx_t vsc_pitch_mtx;
};
VkResult _tu_device_set_lost(struct tu_device *device,
VkResult
tu_bo_list_merge(struct tu_bo_list *list, const struct tu_bo_list *other);
-/* This struct defines the layout of the scratch_bo */
-struct tu6_control
-{
- uint32_t seqno_dummy; /* dummy seqno for CP_EVENT_WRITE */
- uint32_t _pad0;
- volatile uint32_t vsc_overflow;
- uint32_t _pad1;
- /* flag set from cmdstream when VSC overflow detected: */
- uint32_t vsc_scratch;
- uint32_t _pad2;
- uint32_t _pad3;
- uint32_t _pad4;
-
- /* scratch space for VPC_SO[i].FLUSH_BASE_LO/HI, start on 32 byte boundary. */
- struct {
- uint32_t offset;
- uint32_t pad[7];
- } flush_base[4];
-};
-
-#define ctrl_offset(member) offsetof(struct tu6_control, member)
-
struct tu_cmd_buffer
{
VK_LOADER_DATA _loader_data;
struct tu_cs draw_epilogue_cs;
struct tu_cs sub_cs;
- struct tu_bo scratch_bo;
-
bool has_tess;
- struct tu_bo vsc_draw_strm;
- struct tu_bo vsc_prim_strm;
uint32_t vsc_draw_strm_pitch;
uint32_t vsc_prim_strm_pitch;
- bool use_vsc_data;
};
/* Temporary struct for tracking a register state to be written, used by