+
+/* VK_EXT_transform_feedback */
+void radv_CmdBindTransformFeedbackBuffersEXT(
+ VkCommandBuffer commandBuffer,
+ uint32_t firstBinding,
+ uint32_t bindingCount,
+ const VkBuffer* pBuffers,
+ const VkDeviceSize* pOffsets,
+ const VkDeviceSize* pSizes)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
+ uint8_t enabled_mask = 0;
+
+ assert(firstBinding + bindingCount <= MAX_SO_BUFFERS);
+ for (uint32_t i = 0; i < bindingCount; i++) {
+ uint32_t idx = firstBinding + i;
+
+ sb[idx].buffer = radv_buffer_from_handle(pBuffers[i]);
+ sb[idx].offset = pOffsets[i];
+ sb[idx].size = pSizes[i];
+
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
+ sb[idx].buffer->bo);
+
+ enabled_mask |= 1 << idx;
+ }
+
+ cmd_buffer->state.streamout.enabled_mask |= enabled_mask;
+
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER;
+}
+
+static void
+radv_emit_streamout_enable(struct radv_cmd_buffer *cmd_buffer)
+{
+ struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+
+ radeon_set_context_reg_seq(cs, R_028B94_VGT_STRMOUT_CONFIG, 2);
+ radeon_emit(cs,
+ S_028B94_STREAMOUT_0_EN(so->streamout_enabled) |
+ S_028B94_RAST_STREAM(0) |
+ S_028B94_STREAMOUT_1_EN(so->streamout_enabled) |
+ S_028B94_STREAMOUT_2_EN(so->streamout_enabled) |
+ S_028B94_STREAMOUT_3_EN(so->streamout_enabled));
+ radeon_emit(cs, so->hw_enabled_mask &
+ so->enabled_stream_buffers_mask);
+
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
+}
+
+static void
+radv_set_streamout_enable(struct radv_cmd_buffer *cmd_buffer, bool enable)
+{
+ struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+ bool old_streamout_enabled = so->streamout_enabled;
+ uint32_t old_hw_enabled_mask = so->hw_enabled_mask;
+
+ so->streamout_enabled = enable;
+
+ so->hw_enabled_mask = so->enabled_mask |
+ (so->enabled_mask << 4) |
+ (so->enabled_mask << 8) |
+ (so->enabled_mask << 12);
+
+ if ((old_streamout_enabled != so->streamout_enabled) ||
+ (old_hw_enabled_mask != so->hw_enabled_mask))
+ radv_emit_streamout_enable(cmd_buffer);
+}
+
+static void radv_flush_vgt_streamout(struct radv_cmd_buffer *cmd_buffer)
+{
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ unsigned reg_strmout_cntl;
+
+ /* The register is at different places on different ASICs. */
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
+ reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
+ radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0);
+ } else {
+ reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL;
+ radeon_set_config_reg(cs, reg_strmout_cntl, 0);
+ }
+
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0));
+
+ radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+ radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
+ radeon_emit(cs, reg_strmout_cntl >> 2); /* register */
+ radeon_emit(cs, 0);
+ radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */
+ radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */
+ radeon_emit(cs, 4); /* poll interval */
+}
+
+void radv_CmdBeginTransformFeedbackEXT(
+ VkCommandBuffer commandBuffer,
+ uint32_t firstCounterBuffer,
+ uint32_t counterBufferCount,
+ const VkBuffer* pCounterBuffers,
+ const VkDeviceSize* pCounterBufferOffsets)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
+ struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint32_t i;
+
+ radv_flush_vgt_streamout(cmd_buffer);
+
+ assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
+ for_each_bit(i, so->enabled_mask) {
+ int32_t counter_buffer_idx = i - firstCounterBuffer;
+ if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
+ counter_buffer_idx = -1;
+
+ /* AMD GCN binds streamout buffers as shader resources.
+ * VGT only counts primitives and tells the shader through
+ * SGPRs what to do.
+ */
+ radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2);
+ radeon_emit(cs, sb[i].size >> 2); /* BUFFER_SIZE (in DW) */
+ radeon_emit(cs, so->stride_in_dw[i]); /* VTX_STRIDE (in DW) */
+
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
+
+ if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
+ /* The array of counter buffers is optional. */
+ RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
+ uint64_t va = radv_buffer_get_va(buffer->bo);
+
+ va += buffer->offset + pCounterBufferOffsets[counter_buffer_idx];
+
+ /* Append */
+ radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+ radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
+ STRMOUT_DATA_TYPE(1) | /* offset in bytes */
+ STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, va); /* src address lo */
+ radeon_emit(cs, va >> 32); /* src address hi */
+
+ radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo);
+ } else {
+ /* Start from the beginning. */
+ radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+ radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
+ STRMOUT_DATA_TYPE(1) | /* offset in bytes */
+ STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
+ }
+ }
+
+ radv_set_streamout_enable(cmd_buffer, true);
+}
+
+void radv_CmdEndTransformFeedbackEXT(
+ VkCommandBuffer commandBuffer,
+ uint32_t firstCounterBuffer,
+ uint32_t counterBufferCount,
+ const VkBuffer* pCounterBuffers,
+ const VkDeviceSize* pCounterBufferOffsets)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint32_t i;
+
+ radv_flush_vgt_streamout(cmd_buffer);
+
+ assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
+ for_each_bit(i, so->enabled_mask) {
+ int32_t counter_buffer_idx = i - firstCounterBuffer;
+ if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
+ counter_buffer_idx = -1;
+
+ if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
+ /* The array of counters buffer is optional. */
+ RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
+ uint64_t va = radv_buffer_get_va(buffer->bo);
+
+ va += buffer->offset + pCounterBufferOffsets[counter_buffer_idx];
+
+ radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+ radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
+ STRMOUT_DATA_TYPE(1) | /* offset in bytes */
+ STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
+ STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */
+ radeon_emit(cs, va); /* dst address lo */
+ radeon_emit(cs, va >> 32); /* dst address hi */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
+
+ radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo);
+ }
+
+ /* Deactivate transform feedback by zeroing the buffer size.
+ * The counters (primitives generated, primitives emitted) may
+ * be enabled even if there is not buffer bound. This ensures
+ * that the primitives-emitted query won't increment.
+ */
+ radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
+
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
+ }
+
+ radv_set_streamout_enable(cmd_buffer, false);
+}
+
+void radv_CmdDrawIndirectByteCountEXT(
+ VkCommandBuffer commandBuffer,
+ uint32_t instanceCount,
+ uint32_t firstInstance,
+ VkBuffer _counterBuffer,
+ VkDeviceSize counterBufferOffset,
+ uint32_t counterOffset,
+ uint32_t vertexStride)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_buffer, counterBuffer, _counterBuffer);
+ struct radv_draw_info info = {};
+
+ info.instance_count = instanceCount;
+ info.first_instance = firstInstance;
+ info.strmout_buffer = counterBuffer;
+ info.strmout_buffer_offset = counterBufferOffset;
+ info.stride = vertexStride;
+
+ radv_draw(cmd_buffer, &info);
+}