#include "vk_format.h"
#include "vk_util.h"
-VkResult
-tu_graphics_pipeline_create(
- VkDevice _device,
- VkPipelineCache _cache,
- const VkGraphicsPipelineCreateInfo *pCreateInfo,
- const struct tu_graphics_pipeline_create_info *extra,
- const VkAllocationCallbacks *pAllocator,
- VkPipeline *pPipeline)
+#include "tu_cs.h"
+
+struct tu_pipeline_builder
+{
+ struct tu_device *device;
+ struct tu_pipeline_cache *cache;
+ const VkAllocationCallbacks *alloc;
+ const VkGraphicsPipelineCreateInfo *create_info;
+
+ bool rasterizer_discard;
+ /* these states are affectd by rasterizer_discard */
+ VkSampleCountFlagBits samples;
+};
+
+static enum tu_dynamic_state_bits
+tu_dynamic_state_bit(VkDynamicState state)
+{
+ switch (state) {
+ case VK_DYNAMIC_STATE_VIEWPORT:
+ return TU_DYNAMIC_VIEWPORT;
+ case VK_DYNAMIC_STATE_SCISSOR:
+ return TU_DYNAMIC_SCISSOR;
+ case VK_DYNAMIC_STATE_LINE_WIDTH:
+ return TU_DYNAMIC_LINE_WIDTH;
+ case VK_DYNAMIC_STATE_DEPTH_BIAS:
+ return TU_DYNAMIC_DEPTH_BIAS;
+ case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
+ return TU_DYNAMIC_BLEND_CONSTANTS;
+ case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
+ return TU_DYNAMIC_DEPTH_BOUNDS;
+ case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
+ return TU_DYNAMIC_STENCIL_COMPARE_MASK;
+ case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
+ return TU_DYNAMIC_STENCIL_WRITE_MASK;
+ case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
+ return TU_DYNAMIC_STENCIL_REFERENCE;
+ default:
+ unreachable("invalid dynamic state");
+ return 0;
+ }
+}
+
+static enum pc_di_primtype
+tu6_primtype(VkPrimitiveTopology topology)
+{
+ switch (topology) {
+ case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
+ return DI_PT_POINTLIST;
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
+ return DI_PT_LINELIST;
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
+ return DI_PT_LINESTRIP;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
+ return DI_PT_TRILIST;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
+ return DI_PT_TRILIST;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
+ return DI_PT_TRIFAN;
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
+ return DI_PT_LINE_ADJ;
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
+ return DI_PT_LINESTRIP_ADJ;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
+ return DI_PT_TRI_ADJ;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
+ return DI_PT_TRISTRIP_ADJ;
+ case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
+ default:
+ unreachable("invalid primitive topology");
+ return DI_PT_NONE;
+ }
+}
+
+static uint32_t
+tu6_guardband_adj(uint32_t v)
+{
+ if (v > 256)
+ return (uint32_t)(511.0 - 65.0 * (log2(v) - 8.0));
+ else
+ return 511;
+}
+
+void
+tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport)
+{
+ float offsets[3];
+ float scales[3];
+ scales[0] = viewport->width / 2.0f;
+ scales[1] = viewport->height / 2.0f;
+ scales[2] = viewport->maxDepth - viewport->minDepth;
+ offsets[0] = viewport->x + scales[0];
+ offsets[1] = viewport->y + scales[1];
+ offsets[2] = viewport->minDepth;
+
+ VkOffset2D min;
+ VkOffset2D max;
+ min.x = (int32_t) viewport->x;
+ max.x = (int32_t) ceilf(viewport->x + viewport->width);
+ if (viewport->height >= 0.0f) {
+ min.y = (int32_t) viewport->y;
+ max.y = (int32_t) ceilf(viewport->y + viewport->height);
+ } else {
+ min.y = (int32_t)(viewport->y + viewport->height);
+ max.y = (int32_t) ceilf(viewport->y);
+ }
+ /* the spec allows viewport->height to be 0.0f */
+ if (min.y == max.y)
+ max.y++;
+ assert(min.x >= 0 && min.x < max.x);
+ assert(min.y >= 0 && min.y < max.y);
+
+ VkExtent2D guardband_adj;
+ guardband_adj.width = tu6_guardband_adj(max.x - min.x);
+ guardband_adj.height = tu6_guardband_adj(max.y - min.y);
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_VPORT_XOFFSET_0, 6);
+ tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_XOFFSET_0(offsets[0]));
+ tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_XSCALE_0(scales[0]));
+ tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_YOFFSET_0(offsets[1]));
+ tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_YSCALE_0(scales[1]));
+ tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_ZOFFSET_0(offsets[2]));
+ tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_ZSCALE_0(scales[2]));
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0, 2);
+ tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(min.x) |
+ A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(min.y));
+ tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(max.x - 1) |
+ A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(max.y - 1));
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ, 1);
+ tu_cs_emit(cs,
+ A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(guardband_adj.width) |
+ A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(guardband_adj.height));
+}
+
+void
+tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissor)
+{
+ const VkOffset2D min = scissor->offset;
+ const VkOffset2D max = {
+ scissor->offset.x + scissor->extent.width,
+ scissor->offset.y + scissor->extent.height,
+ };
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0, 2);
+ tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(min.x) |
+ A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(min.y));
+ tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(max.x - 1) |
+ A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(max.y - 1));
+}
+
+static void
+tu6_emit_gras_unknowns(struct tu_cs *cs)
+{
+ tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_8000, 1);
+ tu_cs_emit(cs, 0x80);
+ tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_8001, 1);
+ tu_cs_emit(cs, 0x0);
+ tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_8004, 1);
+ tu_cs_emit(cs, 0x0);
+}
+
+static void
+tu6_emit_point_size(struct tu_cs *cs)
+{
+ tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_POINT_MINMAX, 2);
+ tu_cs_emit(cs, A6XX_GRAS_SU_POINT_MINMAX_MIN(1.0f / 16.0f) |
+ A6XX_GRAS_SU_POINT_MINMAX_MAX(4092.0f));
+ tu_cs_emit(cs, A6XX_GRAS_SU_POINT_SIZE(1.0f));
+}
+
+static uint32_t
+tu6_gras_su_cntl(const VkPipelineRasterizationStateCreateInfo *rast_info,
+ VkSampleCountFlagBits samples)
+{
+ uint32_t gras_su_cntl = 0;
+
+ if (rast_info->cullMode & VK_CULL_MODE_FRONT_BIT)
+ gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_FRONT;
+ if (rast_info->cullMode & VK_CULL_MODE_BACK_BIT)
+ gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_BACK;
+
+ if (rast_info->frontFace == VK_FRONT_FACE_CLOCKWISE)
+ gras_su_cntl |= A6XX_GRAS_SU_CNTL_FRONT_CW;
+
+ /* don't set A6XX_GRAS_SU_CNTL_LINEHALFWIDTH */
+
+ if (rast_info->depthBiasEnable)
+ gras_su_cntl |= A6XX_GRAS_SU_CNTL_POLY_OFFSET;
+
+ if (samples > VK_SAMPLE_COUNT_1_BIT)
+ gras_su_cntl |= A6XX_GRAS_SU_CNTL_MSAA_ENABLE;
+
+ return gras_su_cntl;
+}
+
+void
+tu6_emit_gras_su_cntl(struct tu_cs *cs,
+ uint32_t gras_su_cntl,
+ float line_width)
{
+ assert((gras_su_cntl & A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK) == 0);
+ gras_su_cntl |= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(line_width / 2.0f);
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_CNTL, 1);
+ tu_cs_emit(cs, gras_su_cntl);
+}
+
+void
+tu6_emit_depth_bias(struct tu_cs *cs,
+ float constant_factor,
+ float clamp,
+ float slope_factor)
+{
+ tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_POLY_OFFSET_SCALE, 3);
+ tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_SCALE(slope_factor));
+ tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET(constant_factor));
+ tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(clamp));
+}
+
+static VkResult
+tu_pipeline_builder_create_pipeline(struct tu_pipeline_builder *builder,
+ struct tu_pipeline **out_pipeline)
+{
+ struct tu_device *dev = builder->device;
+
+ struct tu_pipeline *pipeline =
+ vk_zalloc2(&dev->alloc, builder->alloc, sizeof(*pipeline), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!pipeline)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+ tu_cs_init(&pipeline->cs, TU_CS_MODE_SUB_STREAM, 2048);
+
+ /* reserve the space now such that tu_cs_begin_sub_stream never fails */
+ VkResult result = tu_cs_reserve_space(dev, &pipeline->cs, 2048);
+ if (result != VK_SUCCESS) {
+ vk_free2(&dev->alloc, builder->alloc, pipeline);
+ return result;
+ }
+
+ *out_pipeline = pipeline;
return VK_SUCCESS;
}
+static void
+tu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder *builder,
+ struct tu_pipeline *pipeline)
+{
+ const VkPipelineDynamicStateCreateInfo *dynamic_info =
+ builder->create_info->pDynamicState;
+
+ if (!dynamic_info)
+ return;
+
+ for (uint32_t i = 0; i < dynamic_info->dynamicStateCount; i++) {
+ pipeline->dynamic_state.mask |=
+ tu_dynamic_state_bit(dynamic_info->pDynamicStates[i]);
+ }
+}
+
+static void
+tu_pipeline_builder_parse_input_assembly(struct tu_pipeline_builder *builder,
+ struct tu_pipeline *pipeline)
+{
+ const VkPipelineInputAssemblyStateCreateInfo *ia_info =
+ builder->create_info->pInputAssemblyState;
+
+ pipeline->ia.primtype = tu6_primtype(ia_info->topology);
+ pipeline->ia.primitive_restart = ia_info->primitiveRestartEnable;
+}
+
+static void
+tu_pipeline_builder_parse_viewport(struct tu_pipeline_builder *builder,
+ struct tu_pipeline *pipeline)
+{
+ /* The spec says:
+ *
+ * pViewportState is a pointer to an instance of the
+ * VkPipelineViewportStateCreateInfo structure, and is ignored if the
+ * pipeline has rasterization disabled."
+ *
+ * We leave the relevant registers stale in that case.
+ */
+ if (builder->rasterizer_discard)
+ return;
+
+ const VkPipelineViewportStateCreateInfo *vp_info =
+ builder->create_info->pViewportState;
+
+ struct tu_cs vp_cs;
+ tu_cs_begin_sub_stream(builder->device, &pipeline->cs, 15, &vp_cs);
+
+ if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_VIEWPORT)) {
+ assert(vp_info->viewportCount == 1);
+ tu6_emit_viewport(&vp_cs, vp_info->pViewports);
+ }
+
+ if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_SCISSOR)) {
+ assert(vp_info->scissorCount == 1);
+ tu6_emit_scissor(&vp_cs, vp_info->pScissors);
+ }
+
+ pipeline->vp.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &vp_cs);
+}
+
+static void
+tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder,
+ struct tu_pipeline *pipeline)
+{
+ const VkPipelineRasterizationStateCreateInfo *rast_info =
+ builder->create_info->pRasterizationState;
+
+ assert(!rast_info->depthClampEnable);
+ assert(rast_info->polygonMode == VK_POLYGON_MODE_FILL);
+
+ struct tu_cs rast_cs;
+ tu_cs_begin_sub_stream(builder->device, &pipeline->cs, 20, &rast_cs);
+
+ /* move to hw ctx init? */
+ tu6_emit_gras_unknowns(&rast_cs);
+ tu6_emit_point_size(&rast_cs);
+
+ const uint32_t gras_su_cntl =
+ tu6_gras_su_cntl(rast_info, builder->samples);
+
+ if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_LINE_WIDTH))
+ tu6_emit_gras_su_cntl(&rast_cs, gras_su_cntl, rast_info->lineWidth);
+
+ if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_DEPTH_BIAS)) {
+ tu6_emit_depth_bias(&rast_cs, rast_info->depthBiasConstantFactor,
+ rast_info->depthBiasClamp,
+ rast_info->depthBiasSlopeFactor);
+ }
+
+ pipeline->rast.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &rast_cs);
+
+ pipeline->rast.gras_su_cntl = gras_su_cntl;
+}
+
+static void
+tu_pipeline_finish(struct tu_pipeline *pipeline,
+ struct tu_device *dev,
+ const VkAllocationCallbacks *alloc)
+{
+ tu_cs_finish(dev, &pipeline->cs);
+}
+
+static VkResult
+tu_pipeline_builder_build(struct tu_pipeline_builder *builder,
+ struct tu_pipeline **pipeline)
+{
+ VkResult result = tu_pipeline_builder_create_pipeline(builder, pipeline);
+ if (result != VK_SUCCESS)
+ return result;
+
+ tu_pipeline_builder_parse_dynamic(builder, *pipeline);
+ tu_pipeline_builder_parse_input_assembly(builder, *pipeline);
+ tu_pipeline_builder_parse_viewport(builder, *pipeline);
+ tu_pipeline_builder_parse_rasterization(builder, *pipeline);
+
+ /* we should have reserved enough space upfront such that the CS never
+ * grows
+ */
+ assert((*pipeline)->cs.bo_count == 1);
+
+ return VK_SUCCESS;
+}
+
+static void
+tu_pipeline_builder_init_graphics(
+ struct tu_pipeline_builder *builder,
+ struct tu_device *dev,
+ struct tu_pipeline_cache *cache,
+ const VkGraphicsPipelineCreateInfo *create_info,
+ const VkAllocationCallbacks *alloc)
+{
+ *builder = (struct tu_pipeline_builder) {
+ .device = dev,
+ .cache = cache,
+ .create_info = create_info,
+ .alloc = alloc,
+ };
+
+ builder->rasterizer_discard =
+ create_info->pRasterizationState->rasterizerDiscardEnable;
+
+ if (builder->rasterizer_discard)
+ builder->samples = VK_SAMPLE_COUNT_1_BIT;
+ else
+ builder->samples = create_info->pMultisampleState->rasterizationSamples;
+}
+
VkResult
-tu_CreateGraphicsPipelines(VkDevice _device,
+tu_CreateGraphicsPipelines(VkDevice device,
VkPipelineCache pipelineCache,
uint32_t count,
const VkGraphicsPipelineCreateInfo *pCreateInfos,
const VkAllocationCallbacks *pAllocator,
VkPipeline *pPipelines)
{
- VkResult result = VK_SUCCESS;
- unsigned i = 0;
+ TU_FROM_HANDLE(tu_device, dev, device);
+ TU_FROM_HANDLE(tu_pipeline_cache, cache, pipelineCache);
- for (; i < count; i++) {
- VkResult r;
- r =
- tu_graphics_pipeline_create(_device, pipelineCache, &pCreateInfos[i],
- NULL, pAllocator, &pPipelines[i]);
- if (r != VK_SUCCESS) {
- result = r;
- pPipelines[i] = VK_NULL_HANDLE;
+ for (uint32_t i = 0; i < count; i++) {
+ struct tu_pipeline_builder builder;
+ tu_pipeline_builder_init_graphics(&builder, dev, cache,
+ &pCreateInfos[i], pAllocator);
+
+ struct tu_pipeline *pipeline;
+ VkResult result = tu_pipeline_builder_build(&builder, &pipeline);
+
+ if (result != VK_SUCCESS) {
+ for (uint32_t j = 0; j < i; j++) {
+ tu_DestroyPipeline(device, pPipelines[j], pAllocator);
+ pPipelines[j] = VK_NULL_HANDLE;
+ }
+
+ return result;
}
+
+ pPipelines[i] = tu_pipeline_to_handle(pipeline);
}
- return result;
+ return VK_SUCCESS;
}
static VkResult
return result;
}
+
+void
+tu_DestroyPipeline(VkDevice _device,
+ VkPipeline _pipeline,
+ const VkAllocationCallbacks *pAllocator)
+{
+ TU_FROM_HANDLE(tu_device, dev, _device);
+ TU_FROM_HANDLE(tu_pipeline, pipeline, _pipeline);
+
+ if (!_pipeline)
+ return;
+
+ tu_pipeline_finish(pipeline, dev, pAllocator);
+ vk_free2(&dev->alloc, pAllocator, pipeline);
+}