From: Vivek Pandya Date: Sun, 14 Mar 2021 06:11:26 +0000 (+0530) Subject: Implement RenderPass, CommandBuffers, Buffers, GPUState, X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=40a0d7c04353e5f6f255567506ef09133bb548d3;p=mesa.git Implement RenderPass, CommandBuffers, Buffers, GPUState, execute() and relevant stuffs. --- diff --git a/src/libre-soc/vulkan/libresoc_cmd_buffer.c b/src/libre-soc/vulkan/libresoc_cmd_buffer.c index 2782653eee2..ee9f24047d8 100644 --- a/src/libre-soc/vulkan/libresoc_cmd_buffer.c +++ b/src/libre-soc/vulkan/libresoc_cmd_buffer.c @@ -30,7 +30,46 @@ void libresoc_CmdEndRenderPass( VkCommandBuffer commandBuffer) -{} +{ + LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer); + struct libresoc_cmd *cmd; + cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct libresoc_cmd), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + cmd->command.endRenderPass.commandId = EndRenderPassID; + list_addtail(&cmd->link, &cmd_buffer->cmd.link); +} + +void libresoc_CmdSetScissor( + VkCommandBuffer commandBuffer, + uint32_t firstScissor, + uint32_t scissorCount, + const VkRect2D* pScissors) +{ + LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer); + struct libresoc_cmd *cmd; + cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct libresoc_cmd), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + cmd->command.setScissors.commandId = SetScissorsID; + list_addtail(&cmd->link, &cmd_buffer->cmd.link); +} + +void libresoc_CmdPushConstants(VkCommandBuffer commandBuffer, + VkPipelineLayout layout, + VkShaderStageFlags stageFlags, + uint32_t offset, + uint32_t size, + const void* pValues) +{ + LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer); + struct libresoc_cmd *cmd; + cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct libresoc_cmd), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + cmd->command.pushConstants.commandId = PushConstantsID; + cmd->command.pushConstants.offset = offset; + cmd->command.pushConstants.size = size; + memcpy(cmd->command.pushConstants.values, pValues, size); + list_addtail(&cmd->link, &cmd_buffer->cmd.link); +} void libresoc_CmdDraw( VkCommandBuffer commandBuffer, @@ -39,6 +78,80 @@ void libresoc_CmdDraw( uint32_t firstVertex, uint32_t firstInstance) { + LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer); + struct libresoc_cmd *cmd; + cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct libresoc_cmd), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + cmd->command.draw.commandId = DrawID; + cmd->command.draw.vertexCount = vertexCount; + cmd->command.draw.instanceCount = instanceCount; + cmd->command.draw.firstVertex = firstVertex; + cmd->command.draw.firstInstance = firstInstance; + list_addtail(&cmd->link, &cmd_buffer->cmd.link); +} + +void libresoc_CmdDrawIndexed( + VkCommandBuffer commandBuffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance) +{ + LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer); + struct libresoc_cmd *cmd; + cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct libresoc_cmd), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + cmd->command.drawIndexed.commandId = DrawIndexedID; + cmd->command.drawIndexed.indexCount = indexCount; + cmd->command.drawIndexed.instanceCount = instanceCount; + cmd->command.drawIndexed.firstIndex = firstIndex; + cmd->command.drawIndexed.vertexOffset = vertexOffset; + cmd->command.drawIndexed.firstInstance = firstInstance; + list_addtail(&cmd->link, &cmd_buffer->cmd.link); +} + +void libresoc_CmdCopyBufferToImage( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer); + for(uint32_t r = 0; r < regionCount; r++) + { + struct libresoc_cmd *cmd; + cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct libresoc_cmd), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + cmd->command.copyBuf2Img.commandId = CopyBuf2ImgID; + cmd->command.copyBuf2Img.srcBuffer = srcBuffer; + cmd->command.copyBuf2Img.dstImage = destImage; + cmd->command.copyBuf2Img.region = pRegions[r]; + list_addtail(&cmd->link, &cmd_buffer->cmd.link); + } +} + +void libresoc_CmdCopyBuffer( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferCopy* pRegions) +{ + LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer); + for(uint32_t r = 0; r < regionCount; r++) + { + struct libresoc_cmd *cmd; + cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct libresoc_cmd), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + cmd->command.copyBuf.commandId = CopyBufID; + cmd->command.copyBuf.srcBuffer = srcBuffer; + cmd->command.copyBuf.dstBuffer = destBuffer; + cmd->command.copyBuf.region = pRegions[r]; + list_addtail(&cmd->link, &cmd_buffer->cmd.link); + } } void libresoc_CmdBindPipeline( @@ -46,14 +159,93 @@ void libresoc_CmdBindPipeline( VkPipelineBindPoint pipelineBindPoint, VkPipeline _pipeline) { + LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer); + struct libresoc_cmd *cmd; + cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct libresoc_cmd), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + cmd->command.bindPipeline.commandId = BindPipelineID; + cmd->command.bindPipeline.pipeline = _pipeline; + list_addtail(&cmd->link, &cmd_buffer->cmd.link); } +void libresoc_CmdBindDescriptorSets( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, + uint32_t firstSet, + uint32_t descriptorSetCount, + const VkDescriptorSet* pDescriptorSets, + uint32_t dynamicOffsetCount, + const uint32_t* pDynamicOffsets) +{ + LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer); + for(int i=0; i < descriptorSetCount; ++i) + { + struct libresoc_cmd *cmd; + cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct libresoc_cmd), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + cmd->command.bindDescriptorSets.commandId = BindDescriptorSetsID; + cmd->command.bindDescriptorSets.idx = firstSet + i; + cmd->command.bindDescriptorSets.set = pDescriptorSets[i]; + list_addtail(&cmd->link, &cmd_buffer->cmd.link); + } +} + +void libresoc_CmdBindVertexBuffers( + VkCommandBuffer commandBuffer, + uint32_t firstBinding, + uint32_t bindingCount, + const VkBuffer* pBuffers, + const VkDeviceSize* pOffsets) +{ + LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer); + for(int i=0; i < bindingCount; ++i) + { + struct libresoc_cmd *cmd; + cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct libresoc_cmd), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + cmd->command.bindVB.commandId = BindVBID; + cmd->command.bindVB.slot = firstBinding + i; + cmd->command.bindVB.buffer = pBuffers[i]; + cmd->command.bindVB.offset = pOffsets[i]; + list_addtail(&cmd->link, &cmd_buffer->cmd.link); + } +} + +void libresoc_CmdBindIndexBuffer( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer); + struct libresoc_cmd *cmd; + cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct libresoc_cmd), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + cmd->command.bindIB.commandId = BindIBID; + cmd->command.bindIB.buffer = buffer; + cmd->command.bindIB.offset = offset; + cmd->command.bindIB.indexType = indexType; + list_addtail(&cmd->link, &cmd_buffer->cmd.link); +} void libresoc_CmdBeginRenderPass( VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, VkSubpassContents contents) { + + LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer); + struct libresoc_cmd *cmd; + cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct libresoc_cmd), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + cmd->command.beginRenderPass.commandId = BeginRenderPassID; + cmd->command.beginRenderPass.renderPass = pRenderPassBegin->renderPass; + cmd->command.beginRenderPass.framebuffer = pRenderPassBegin->framebuffer; + size_t count = (pRenderPassBegin->clearValueCount < 8U) ? pRenderPassBegin->clearValueCount : 8U; + memcpy(&cmd->command.beginRenderPass.clearval, pRenderPassBegin->pClearValues, + sizeof(VkClearValue) * count); + list_addtail(&cmd->link, &cmd_buffer->cmd.link); } void libresoc_FreeCommandBuffers( @@ -138,26 +330,11 @@ static VkResult libresoc_create_cmd_buffer( vk_object_base_init(&device->vk, &cmd_buffer->base, VK_OBJECT_TYPE_COMMAND_BUFFER); - - cmd_buffer->device = device; - cmd_buffer->pool = pool; - cmd_buffer->level = level; - - list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); - cmd_buffer->queue_family_index = pool->queue_family_index; - - // ring = libresoc_queue_family_to_ring(cmd_buffer->queue_family_index); - - // cmd_buffer->cs = device->ws->cs_create(device->ws, ring); - // if (!cmd_buffer->cs) { - // libresoc_destroy_cmd_buffer(cmd_buffer); - // return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - // } - + cmd_buffer->device = device; + cmd_buffer->size = 0; + list_inithead(&cmd_buffer->cmd.link); *pCommandBuffer = libresoc_cmd_buffer_to_handle(cmd_buffer); - list_inithead(&cmd_buffer->upload.list); - return VK_SUCCESS; } @@ -173,23 +350,10 @@ VkResult libresoc_AllocateCommandBuffers( uint32_t i; for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { - - if (!list_is_empty(&pool->free_cmd_buffers)) { - struct libresoc_cmd_buffer *cmd_buffer = list_first_entry(&pool->free_cmd_buffers, struct libresoc_cmd_buffer, pool_link); - - list_del(&cmd_buffer->pool_link); - list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); - - //result = libresoc_reset_cmd_buffer(cmd_buffer); - cmd_buffer->level = pAllocateInfo->level; - - pCommandBuffers[i] = libresoc_cmd_buffer_to_handle(cmd_buffer); - } else { - result = libresoc_create_cmd_buffer(device, pool, pAllocateInfo->level, - &pCommandBuffers[i]); - } - if (result != VK_SUCCESS) - break; + result = libresoc_create_cmd_buffer(device, pool, pAllocateInfo->level, + &pCommandBuffers[i]); + if (result != VK_SUCCESS) + break; } // if (result != VK_SUCCESS) { @@ -220,46 +384,6 @@ VkResult libresoc_BeginCommandBuffer( VkResult result = VK_SUCCESS; - // memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state)); - // cmd_buffer->state.last_primitive_reset_en = -1; - // cmd_buffer->state.last_index_type = -1; - // cmd_buffer->state.last_num_instances = -1; - // cmd_buffer->state.last_vertex_offset = -1; - // cmd_buffer->state.last_first_instance = -1; - // cmd_buffer->state.predication_type = -1; - // cmd_buffer->state.last_sx_ps_downconvert = -1; - // cmd_buffer->state.last_sx_blend_opt_epsilon = -1; - // cmd_buffer->state.last_sx_blend_opt_control = -1; - cmd_buffer->usage_flags = pBeginInfo->flags; - - // if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY && - // (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)) { - // assert(pBeginInfo->pInheritanceInfo); - // cmd_buffer->state.framebuffer = libresoc_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer); - // cmd_buffer->state.pass = libresoc_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass); - - // struct libresoc_subpass *subpass = - // &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass]; - - // if (cmd_buffer->state.framebuffer) { - // result = libresoc_cmd_state_setup_attachments(cmd_buffer, cmd_buffer->state.pass, NULL); - // if (result != VK_SUCCESS) - // return result; - // } - - // cmd_buffer->state.inherited_pipeline_statistics = - // pBeginInfo->pInheritanceInfo->pipelineStatistics; - - // libresoc_cmd_buffer_set_subpass(cmd_buffer, subpass); - // } - - // if (unlikely(cmd_buffer->device->trace_bo)) - // libresoc_cmd_buffer_trace_emit(cmd_buffer); - -// libresoc_describe_begin_cmd_buffer(cmd_buffer); - - //cmd_buffer->status = LIBRESOC_CMD_BUFFER_STATUS_RECORDING; - return result; } @@ -276,17 +400,16 @@ void libresoc_CmdPipelineBarrier( const VkImageMemoryBarrier* pImageMemoryBarriers) { // LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer); - // struct libresoc_barrier_info info; - - // info.reason = RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER; - // info.eventCount = 0; - // info.pEvents = NULL; - // info.srcStageMask = srcStageMask; - // info.dstStageMask = destStageMask; - - // libresoc_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers, - // bufferMemoryBarrierCount, pBufferMemoryBarriers, - // imageMemoryBarrierCount, pImageMemoryBarriers, &info); + // struct PipelineBarrier *cmd; + // cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct PipelineBarrier), 8, + // VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + // cmd->commandId = PipelineBarrierID; + // if (list_is_empty(&cmd_buffer->list)) { + // list_inithead(&cmd); + // } else { + // list_addtail(&cmd, &cmd_buffer->list); + // } + // cmd_buffer->size += sizeof(struct PipelineBarrier); } VkResult libresoc_EndCommandBuffer( @@ -330,5 +453,5 @@ VkResult libresoc_EndCommandBuffer( // cmd_buffer->status = LIBRESOC_CMD_BUFFER_STATUS_EXECUTABLE; - return cmd_buffer->record_result; + return VK_SUCCESS; } diff --git a/src/libre-soc/vulkan/libresoc_device.c b/src/libre-soc/vulkan/libresoc_device.c index fe63fe4f6ca..47c53193cd7 100644 --- a/src/libre-soc/vulkan/libresoc_device.c +++ b/src/libre-soc/vulkan/libresoc_device.c @@ -91,6 +91,11 @@ struct libresoc_queue_submission { uint32_t signal_value_count; }; +static uint32_t libresoc_surface_max_layer_count(struct libresoc_image_view *iview) +{ + return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count); +} + void libresoc_free_memory(struct libresoc_device *device, const VkAllocationCallbacks* pAllocator, @@ -110,21 +115,21 @@ static VkResult libresoc_alloc_memory(struct libresoc_device *device, { struct libresoc_device_memory *mem; VkResult result; - uint32_t flags = 0; + // uint32_t flags = 0; assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); const VkImportMemoryFdInfoKHR *import_info = vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR); - const VkMemoryDedicatedAllocateInfo *dedicate_info = - vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO); - const VkExportMemoryAllocateInfo *export_info = - vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO); + // const VkMemoryDedicatedAllocateInfo *dedicate_info = + // vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO); + // const VkExportMemoryAllocateInfo *export_info = + // vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO); const VkImportMemoryHostPointerInfoEXT *host_ptr_info = vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT); - const struct wsi_memory_allocate_info *wsi_info = - vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA); + // const struct wsi_memory_allocate_info *wsi_info = + // vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA); mem = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8, @@ -1389,7 +1394,7 @@ void libresoc_GetPhysicalDeviceProperties2( VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties2 *pProperties) { - LIBRESOC_FROM_HANDLE(libresoc_physical_device, pdevice, physicalDevice); + // LIBRESOC_FROM_HANDLE(libresoc_physical_device, pdevice, physicalDevice); libresoc_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties); //TODO: add more stuffs when required } @@ -1398,7 +1403,7 @@ void libresoc_GetPhysicalDeviceFeatures2( VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures2 *pFeatures) { - LIBRESOC_FROM_HANDLE(libresoc_physical_device, pdevice, physicalDevice); + // LIBRESOC_FROM_HANDLE(libresoc_physical_device, pdevice, physicalDevice); libresoc_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features); } @@ -1522,10 +1527,10 @@ VkResult libresoc_BindImageMemory( return libresoc_BindImageMemory2(device, 1, &info); } -static VkResult libresoc_queue_submit(struct libresoc_queue *queue, - const struct libresoc_queue_submission *submission) -{ - return VK_SUCCESS; +// static VkResult libresoc_queue_submit(struct libresoc_queue *queue, +// const struct libresoc_queue_submission *submission) +// { +// return VK_SUCCESS; // struct libresoc_deferred_queue_submission *deferred = NULL; // VkResult result = libresoc_create_deferred_submission(queue, submission, &deferred); @@ -1542,22 +1547,361 @@ static VkResult libresoc_queue_submit(struct libresoc_queue *queue, // return result; // } // return libresoc_process_submissions(&processing_list); -} +// } /* Signals fence as soon as all the work currently put on queue is done. */ -static VkResult libresoc_signal_fence(struct libresoc_queue *queue, - VkFence fence) +// static VkResult libresoc_signal_fence(struct libresoc_queue *queue, +// VkFence fence) +// { +// return libresoc_queue_submit(queue, &(struct libresoc_queue_submission) { +// .fence = fence +// }); +// } + +// static bool libresoc_submit_has_effects(const VkSubmitInfo *info) +// { +// return info->commandBufferCount || +// info->waitSemaphoreCount || +// info->signalSemaphoreCount; +// } + +static int double_triarea(const struct int4 *a, const struct int4 *b, const struct int4 *c) +{ + return (b->x - a->x) * (c->y - a->y) - (b->y - a->y) * (c->x - a->x); +} + +static void MinMax(struct int4 *coords, struct int4 *minwin, struct int4 *maxwin) { - return libresoc_queue_submit(queue, &(struct libresoc_queue_submission) { - .fence = fence - }); + minwin->x = INT_MAX; + minwin->y = INT_MAX; + minwin->z = INT_MAX; + minwin->w = INT_MAX; + maxwin->x = INT_MIN; + maxwin->y = INT_MIN; + maxwin->z = INT_MIN; + maxwin->w = INT_MIN; + + for(int i = 0; i < 3; i++) + { + for(int c = 0; c < 4; c++) + { + minwin->v[c] = (minwin->v[c] < coords[i].v[c]) ? minwin->v[c] : coords[i].v[c]; + maxwin->v[c] = (maxwin->v[c] > coords[i].v[c]) ? maxwin->v[c] : coords[i].v[c]; + } + } } -static bool libresoc_submit_has_effects(const VkSubmitInfo *info) +static float clamp01(float in) { - return info->commandBufferCount || - info->waitSemaphoreCount || - info->signalSemaphoreCount; + return in > 1.0f ? 1.0f : (in < 0.0f ? 0.0f : in); +} + +static void ProcessTriangles(struct TriangleWork *work) +{ + struct GPUState *state = work->state; + LIBRESOC_FROM_HANDLE(libresoc_image, img, state->col[0]); + LIBRESOC_FROM_HANDLE(libresoc_pipeline, pipeline, state->pipeline); + const uint32_t w = img->width; + const uint32_t h = img->height; + byte *bytes = img->bytes; + + for(int y = work->minwin.y; y < work->maxwin.y; y++) + { + for(int x = work->minwin.x; x < work->maxwin.x; x++) + { + const int PAx = work->tri[0].x - x; + const int PAy = work->tri[0].y - y; + + const int ux = (work->ACx * PAy) - (work->ACy * PAx); + const int uy = (PAx * work->ABy) - (PAy * work->ABx); + + struct int4 b; + b.x = (work->area2 - (ux + uy)); + b.y = ux; + b.z = uy; + b.w = 0; + + if (b.x >= 0 && b.y >= 0 && b.z >= 0) + { + struct float4 n; + n.x = (float)b.x; + n.y = (float)b.y; + n.z = (float)b.z; + n.w = 0.0f; + float pixdepth = n.x * work->depth.x + n.y * work->depth.y + n.z * work->depth.z; + + n.x *= work->invw.x; + n.y *= work->invw.y; + n.z *= work->invw.z; + + float invlen = 1.0f / (n.x + n.y + n.z); + n.x *= invlen; + n.y *= invlen; + n.z *= invlen; + + struct float4 pix; + const int bpp = 4; + pipeline->fs(state, pixdepth, &n, work->vsout, &pix); + bytes[(y * w + x) * bpp + 2] = (byte)(clamp01(pix.x) * 255.0f); + bytes[(y * w + x) * bpp + 1] = (byte)(clamp01(pix.y) * 255.0f); + bytes[(y * w + x) * bpp + 0] = (byte)(clamp01(pix.z) * 255.0f); + } + + } + } +} + +static uint32_t GetIndex(struct GPUState *state, uint32_t vertexIndex, bool indexed) +{ + LIBRESOC_FROM_HANDLE(libresoc_buffer, buf, state->ib.buffer); + if(!indexed) + return vertexIndex; + + const byte *ib = buf->bytes + state->ib.offset; + + if(state->ib.indexType == VK_INDEX_TYPE_UINT16) + { + uint16_t *i16 = (uint16_t *)ib; + i16 += vertexIndex; + return *i16; + } + else + { + uint32_t *i32 = (uint32_t *)ib; + i32 += vertexIndex; + return *i32; + } +} + +static void ClearTarget(VkImage target, const VkClearColorValue *col) +{ + + LIBRESOC_FROM_HANDLE(libresoc_image, img, target); + byte *bits = img->bytes; + const uint32_t w = img->width; + const uint32_t h = img->height; + const uint32_t bpp = 4; + + byte eval[4]; + eval[2] = (byte)(col->float32[0] * 255.0f); + eval[1] = (byte)(col->float32[1] * 255.0f); + eval[0] = (byte)(col->float32[2] * 255.0f); + eval[3] = (byte)(col->float32[3] * 255.0f); + + if(bpp == 1) + { + memset(bits, eval[2], w * h); + } + else if(bpp == 4) + { + for(uint32_t y = 0; y < h; y++) + { + for(uint32_t x = 0; x < w; x++) + { + memcpy(&bits[(y * w + x) * bpp], eval, 4); + } + } + } +} + +static void DrawTriangles(struct GPUState *state, int numVerts, uint32_t first, bool indexed) +{ + LIBRESOC_FROM_HANDLE(libresoc_image, img, state->col[0]); + LIBRESOC_FROM_HANDLE(libresoc_pipeline, pipeline, state->pipeline); + const uint32_t w = img->width; + const uint32_t h = img->height; + struct VertexCacheEntry *vertices; + vertices = (struct VertexCacheEntry *)malloc(sizeof(struct VertexCacheEntry) * numVerts); + + int lastVert = numVerts - 3; + uint32_t vertexIndex = first; + // Shed Triangles + struct VertexCacheEntry tri[4]; + // For now we support only VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST + for(int v = 0; v <= lastVert; v += 3) + { + pipeline->vs(state, GetIndex(state, vertexIndex, indexed), &tri[0]); + vertexIndex++; + pipeline->vs(state, GetIndex(state, vertexIndex, indexed), &tri[1]); + vertexIndex++; + pipeline->vs(state, GetIndex(state, vertexIndex, indexed), &tri[2]); + vertexIndex++; + vertices[v] = tri[0]; + vertices[v + 1] = tri[1]; + vertices[v + 2] = tri[2]; + } + + struct int4 *winCords; + winCords = (struct int4 *)malloc(sizeof(struct int4) * numVerts); + // ToWindow + for(int v =0; v <= lastVert; ++v) + { + struct int4 win; + + win.x = (int)((vertices[v].position.x / vertices[v].position.w + 1.0f) * 0.5f * w); + win.y = (int)((vertices[v].position.y * -1.0f / vertices[v].position.w + 1.0f) * 0.5f * h); + winCords[v] = win; + } + + + for(int i = 0; i <= lastVert; i += 3) + { + + // culling can be done here + int area2 = double_triarea(&winCords[i + 0], &winCords[i + 1], &winCords[i + 2]); + + + int area2_flipped = area2; + struct TriangleWork work; + + struct int4 minwin, maxwin; + MinMax(&winCords[i], &minwin, &maxwin); + + minwin.x = (0 > minwin.x) ? 0 : minwin.x; + minwin.y = (0 > minwin.y) ? 0 : minwin.y; + maxwin.x = ((int)(w - 1) < maxwin.x) ? (int)(w - 1) : maxwin.x; + maxwin.y = ((int)(h - 1) < maxwin.y) ? (int)(h - 1) : maxwin.y; + + work.state = state; + work.ABx = winCords[i + 1].x - winCords[i + 0].x; + work.ABy = winCords[i + 1].y - winCords[i + 0].y; + work.ACx = winCords[i + 2].x - winCords[i + 0].x; + work.ACy = winCords[i + 2].y - winCords[i + 0].y; + work.vsout = &vertices[i]; + work.tri = &winCords[i]; + // work.barymul = barymul; + work.area2 = area2; + work.invarea = 1.0f / (float)(area2_flipped); + work.invw.x = 1.0f / vertices[i + 0].position.w; + work.invw.y = 1.0f / vertices[i + 1].position.w; + work.invw.z = 1.0f / vertices[i + 2].position.w; + work.invw.w = 0.0f; + work.depth.x = vertices[i + 0].position.z * work.invw.x; + work.depth.y = vertices[i + 1].position.z * work.invw.y; + work.depth.z = vertices[i + 2].position.z * work.invw.z; + work.depth.w = 0.0f; + + const int blockSize = 32; + + int xblocks = 1 + (maxwin.x - minwin.x) / blockSize; + int yblocks = 1 + (maxwin.y - minwin.y) / blockSize; + + { + + for(int x = 0; x < xblocks; x++) + { + for(int y = 0; y < yblocks; y++) + { + work.minwin = minwin; + work.minwin.x += blockSize * x; + work.minwin.y += blockSize * y; + + work.maxwin.x = (maxwin.x < work.minwin.x + blockSize) ? maxwin.x : work.minwin.x + blockSize; + work.maxwin.y = (maxwin.y < work.minwin.y + blockSize) ? maxwin.y : work.minwin.y + blockSize; + } + } + } + // rasterize + ProcessTriangles(&work); + + } +} + +static void execute(VkCommandBuffer commandBuffer) +{ + LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer); + struct GPUState state = {0}; + list_for_each_entry(struct libresoc_cmd, lcmd, + &cmd_buffer->cmd.link, link) { + int *commandId = (int *)&lcmd->command; + switch (*commandId) { + case BeginRenderPassID: + { + LIBRESOC_FROM_HANDLE(libresoc_render_pass, render_pass, lcmd->command.beginRenderPass.renderPass); + LIBRESOC_FROM_HANDLE(libresoc_framebuffer, frame_buffer, lcmd->command.beginRenderPass.framebuffer); + LIBRESOC_FROM_HANDLE(libresoc_image, img, state.col[0]); + struct libresoc_subpass subpass = render_pass->subpasses[0]; + struct libresoc_subpass_attachment attachment = subpass.attachments[0]; + + state.col[0] = (struct VkImage_T *)frame_buffer->attachments[attachment.attachment]->image; + + int clearIdx = 0; + + if(attachment.clear) + { + ClearTarget(state.col[0], &lcmd->command.beginRenderPass.clearval[clearIdx++].color); + } + + // VkRenderPass_T::Attachment &depth = subpass.depthAttachment; + + // if(depth.idx >= 0) + // { + // state.depth = data.framebuffer->attachments[depth.idx]->image; + + // if(depth.clear) + // { + // ClearTarget(state.depth, data.clearval[clearIdx++].depthStencil); + // } + // } + break; + } + case EndRenderPassID: + { + state.col[0] = VK_NULL_HANDLE; + break; + } + case BindPipelineID: + { + state.pipeline = lcmd->command.bindPipeline.pipeline; + break; + } + case BindDescriptorSetsID: + { + state.sets[lcmd->command.bindDescriptorSets.idx] = lcmd->command.bindDescriptorSets.set; + break; + } + case BindVBID: + { + state.vbs[lcmd->command.bindVB.slot].buffer = lcmd->command.bindVB.buffer; + state.vbs[lcmd->command.bindVB.slot].offset = lcmd->command.bindVB.offset; + break; + } + case BindIBID: + { + state.ib.buffer = lcmd->command.bindIB.buffer; + state.ib.offset = lcmd->command.bindIB.offset; + state.ib.indexType = lcmd->command.bindIB.indexType; + break; + } + case SetViewportID: + { + state.view = lcmd->command.setViewport.view; + break; + } + case SetScissorsID: + break; + case PushConstantsID: + { + memcpy(state.pushconsts + lcmd->command.pushConstants.offset, + lcmd->command.pushConstants.values, + lcmd->command.pushConstants.size); + break; + } + case DrawID: + { + DrawTriangles(&state, lcmd->command.draw.vertexCount, lcmd->command.draw.firstVertex, false); + break; + } + case DrawIndexedID: + break; + case CopyBuf2ImgID: + break; + case CopyBufID: + break; + case PipelineBarrierID: + break; + } + } } VkResult libresoc_QueueSubmit( @@ -1566,58 +1910,15 @@ VkResult libresoc_QueueSubmit( const VkSubmitInfo* pSubmits, VkFence fence) { - LIBRESOC_FROM_HANDLE(libresoc_queue, queue, _queue); - VkResult result; - uint32_t fence_idx = 0; - bool flushed_caches = false; - - if (fence != VK_NULL_HANDLE) { - for (uint32_t i = 0; i < submitCount; ++i) - if (libresoc_submit_has_effects(pSubmits + i)) - fence_idx = i; - } else - fence_idx = UINT32_MAX; - - for (uint32_t i = 0; i < submitCount; i++) { - if (!libresoc_submit_has_effects(pSubmits + i) && fence_idx != i) - continue; - - VkPipelineStageFlags wait_dst_stage_mask = 0; - for (unsigned j = 0; j < pSubmits[i].waitSemaphoreCount; ++j) { - wait_dst_stage_mask |= pSubmits[i].pWaitDstStageMask[j]; - } - - const VkTimelineSemaphoreSubmitInfo *timeline_info = - vk_find_struct_const(pSubmits[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO); - - result = libresoc_queue_submit(queue, &(struct libresoc_queue_submission) { - .cmd_buffers = pSubmits[i].pCommandBuffers, - .cmd_buffer_count = pSubmits[i].commandBufferCount, - .wait_dst_stage_mask = wait_dst_stage_mask, - .flush_caches = !flushed_caches, - .wait_semaphores = pSubmits[i].pWaitSemaphores, - .wait_semaphore_count = pSubmits[i].waitSemaphoreCount, - .signal_semaphores = pSubmits[i].pSignalSemaphores, - .signal_semaphore_count = pSubmits[i].signalSemaphoreCount, - .fence = i == fence_idx ? fence : VK_NULL_HANDLE, - .wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL, - .wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues ? timeline_info->waitSemaphoreValueCount : 0, - .signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL, - .signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues ? timeline_info->signalSemaphoreValueCount : 0, - }); - if (result != VK_SUCCESS) - return result; - - flushed_caches = true; - } - - if (fence != VK_NULL_HANDLE && !submitCount) { - result = libresoc_signal_fence(queue, fence); - if (result != VK_SUCCESS) - return result; - } - - return VK_SUCCESS; + // LIBRESOC_FROM_HANDLE(libresoc_queue, queue, _queue); + for(uint32_t i = 0; i < submitCount; i++) + { + for(uint32_t c = 0; c < pSubmits[i].commandBufferCount; c++) + { + execute(pSubmits[i].pCommandBuffers[c]); + } + } + return VK_SUCCESS; } VkResult libresoc_CreateFence( @@ -1627,10 +1928,10 @@ VkResult libresoc_CreateFence( VkFence* pFence) { LIBRESOC_FROM_HANDLE(libresoc_device, device, _device); - const VkExportFenceCreateInfo *export = - vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO); - VkExternalFenceHandleTypeFlags handleTypes = - export ? export->handleTypes : 0; + // const VkExportFenceCreateInfo *export = + // vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO); + // VkExternalFenceHandleTypeFlags handleTypes = + // export ? export->handleTypes : 0; struct libresoc_fence *fence; fence = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*fence), 8, @@ -1705,7 +2006,7 @@ void libresoc_UnmapMemory( VkDevice _device, VkDeviceMemory _memory) { - LIBRESOC_FROM_HANDLE(libresoc_device, device, _device); + // LIBRESOC_FROM_HANDLE(libresoc_device, device, _device); LIBRESOC_FROM_HANDLE(libresoc_device_memory, mem, _memory); if (mem == NULL) @@ -1741,15 +2042,108 @@ VkResult libresoc_CreateFramebuffer( const VkAllocationCallbacks* pAllocator, VkFramebuffer* pFramebuffer) { - //TODO: stub + LIBRESOC_FROM_HANDLE(libresoc_device, device, _device); + struct libresoc_framebuffer *framebuffer; + const VkFramebufferAttachmentsCreateInfo *imageless_create_info = + vk_find_struct_const(pCreateInfo->pNext, + FRAMEBUFFER_ATTACHMENTS_CREATE_INFO); + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); + + size_t size = sizeof(*framebuffer); + if (!imageless_create_info) + size += sizeof(struct libresoc_image_view*) * pCreateInfo->attachmentCount; + framebuffer = vk_alloc2(&device->vk.alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (framebuffer == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &framebuffer->base, + VK_OBJECT_TYPE_FRAMEBUFFER); + + framebuffer->attachment_count = pCreateInfo->attachmentCount; + framebuffer->width = pCreateInfo->width; + framebuffer->height = pCreateInfo->height; + framebuffer->layers = pCreateInfo->layers; + if (imageless_create_info) { + for (unsigned i = 0; i < imageless_create_info->attachmentImageInfoCount; ++i) { + const VkFramebufferAttachmentImageInfo *attachment = + imageless_create_info->pAttachmentImageInfos + i; + framebuffer->width = MIN2(framebuffer->width, attachment->width); + framebuffer->height = MIN2(framebuffer->height, attachment->height); + framebuffer->layers = MIN2(framebuffer->layers, attachment->layerCount); + } + } else { + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + VkImageView _iview = pCreateInfo->pAttachments[i]; + struct libresoc_image_view *iview = libresoc_image_view_from_handle(_iview); + framebuffer->attachments[i] = iview; + framebuffer->width = MIN2(framebuffer->width, iview->extent.width); + framebuffer->height = MIN2(framebuffer->height, iview->extent.height); + framebuffer->layers = MIN2(framebuffer->layers, libresoc_surface_max_layer_count(iview)); + } + } + + *pFramebuffer = libresoc_framebuffer_to_handle(framebuffer); + return VK_SUCCESS; +} + +VkResult libresoc_CreateBuffer( + VkDevice _device, + const VkBufferCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkBuffer* pBuffer) +{ + LIBRESOC_FROM_HANDLE(libresoc_device, device, _device); + struct libresoc_buffer *buffer; + + buffer = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*buffer), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (buffer == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &buffer->base, VK_OBJECT_TYPE_BUFFER); + + buffer->size = pCreateInfo->size; + buffer->bytes = NULL; + *pBuffer = libresoc_buffer_to_handle(buffer); + return VK_SUCCESS; } + void libresoc_DestroyBuffer( VkDevice _device, VkBuffer _buffer, const VkAllocationCallbacks* pAllocator) {} +void libresoc_GetBufferMemoryRequirements( + VkDevice _device, + VkBuffer _buffer, + VkMemoryRequirements* pMemoryRequirements) +{ + LIBRESOC_FROM_HANDLE(libresoc_device, device, _device); + LIBRESOC_FROM_HANDLE(libresoc_buffer, buffer, _buffer); + + pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1; + + pMemoryRequirements->alignment = 16; + + pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment); +} + +VkResult libresoc_BindBufferMemory( + VkDevice device, + VkBuffer _buffer, + VkDeviceMemory _memory, + VkDeviceSize memoryOffset) +{ + LIBRESOC_FROM_HANDLE(libresoc_buffer, buffer, _buffer); + LIBRESOC_FROM_HANDLE(libresoc_device_memory, memory, _memory); + buffer->bytes = memory->bytes + memoryOffset; + return VK_SUCCESS; +} + void libresoc_DestroyFence( VkDevice _device, VkFence _fence, diff --git a/src/libre-soc/vulkan/libresoc_image.c b/src/libre-soc/vulkan/libresoc_image.c index cd8fe7cd023..9e6b508ab3f 100644 --- a/src/libre-soc/vulkan/libresoc_image.c +++ b/src/libre-soc/vulkan/libresoc_image.c @@ -150,6 +150,7 @@ libresoc_CreateImage(VkDevice device, pAllocator, pImage); } + void libresoc_GetImageSubresourceLayout( VkDevice _device, VkImage _image, @@ -159,7 +160,7 @@ void libresoc_GetImageSubresourceLayout( LIBRESOC_FROM_HANDLE(libresoc_image, image, _image); pLayout->size = image->size; - pLayout->rowPitch = image->width; + pLayout->rowPitch = image->width * 4; } VkResult @@ -168,24 +169,28 @@ libresoc_CreateImageView(VkDevice _device, const VkAllocationCallbacks *pAllocator, VkImageView *pView) { - //TODO: stub - return VK_SUCCESS; - // LIBRESOC_FROM_HANDLE(libresoc_device, device, _device); - // struct libresoc_image_view *view; + LIBRESOC_FROM_HANDLE(libresoc_device, device, _device); + LIBRESOC_FROM_HANDLE(libresoc_image, image, pCreateInfo->image); + struct libresoc_image_view *view; - // view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, - // VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - // if (view == NULL) - // return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (view == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &view->base, + VK_OBJECT_TYPE_IMAGE_VIEW); - // vk_object_base_init(&device->vk, &view->base, - // VK_OBJECT_TYPE_IMAGE_VIEW); - // libresoc_image_view_init(view, device, pCreateInfo, NULL); + view->image = image; + view->type = pCreateInfo->viewType; + view->vk_format = pCreateInfo->format; + view->extent.width = image->width; + view->extent.height = image->height; - // *pView = libresoc_image_view_to_handle(view); + *pView = libresoc_image_view_to_handle(view); - // return VK_SUCCESS; + return VK_SUCCESS; } void libresoc_DestroyImage(VkDevice _device, VkImage _image, diff --git a/src/libre-soc/vulkan/libresoc_llvm.c b/src/libre-soc/vulkan/libresoc_llvm.c index 06a62424300..054a85c7dba 100644 --- a/src/libre-soc/vulkan/libresoc_llvm.c +++ b/src/libre-soc/vulkan/libresoc_llvm.c @@ -2689,7 +2689,7 @@ LLVMModuleRef libresoc_nir_translate(struct libresoc_llvm *llvm_ref, struct nir_ //TODO: this is zero argument function and returns void LLVMTypeRef main_function_type = LLVMFunctionType(ret_type, arg_types, ctx.args.arg_count, 0); - LLVMValueRef main_function = LLVMAddFunction(mod, "main_function", main_function_type); + LLVMValueRef main_function = LLVMAddFunction(mod, gl_shader_stage_name(nir->info.stage), main_function_type); LLVMBasicBlockRef main_function_body = LLVMAppendBasicBlockInContext(ctx.lc.context, main_function, "main_body"); LLVMPositionBuilderAtEnd(ctx.lc.builder, main_function_body); @@ -2732,52 +2732,60 @@ LLVMModuleRef libresoc_nir_translate(struct libresoc_llvm *llvm_ref, struct nir_ 0, NULL, NULL); if (disasm) { -LLVMOrcTargetAddress MainAddr; -LLVMOrcGetSymbolAddress(llvm_ref->orc_ref, &MainAddr ,"main_function"); - const uint8_t *bytes = (const uint8_t *)MainAddr; - char outline[1024]; -uint64_t pc; - pc = 0; -uint64_t extent = 200; - while (pc < extent) { - size_t Size; - - /* - * Print address. We use addresses relative to the start of the function, - * so that between runs. - */ - - - Size = LLVMDisasmInstruction(disasm, (uint8_t *)bytes + pc, extent - pc, 0, outline, - sizeof outline); - - /* - * Print the instruction. - */ - printf("\t%s \n", outline); - - - /* - * Stop disassembling on return statements, if there is no record of a - * jump to a successive address. - * - * XXX: This currently assumes x86 - */ - - if (Size == 1 && bytes[pc] == 0xc3) { - break; - } + LLVMOrcTargetAddress MainAddr; + LLVMOrcGetSymbolAddress(llvm_ref->orc_ref, &MainAddr ,gl_shader_stage_name(nir->info.stage)); + // if(nir->info.stage == MESA_SHADER_VERTEX) + // { + // pipeline->vs = (VertexShader)MainAddr; + // } + // else if(nir->info.stage == MESA_SHADER_FRAGMENT) + // { + // pipeline->fs = (FragmentShader)MainAddr; + // } + const uint8_t *bytes = (const uint8_t *)MainAddr; + char outline[1024]; + uint64_t pc; + pc = 0; + uint64_t extent = 200; + while (pc < extent) { + size_t Size; + + /* + * Print address. We use addresses relative to the start of the function, + * so that between runs. + */ + + + Size = LLVMDisasmInstruction(disasm, (uint8_t *)bytes + pc, extent - pc, 0, outline, + sizeof outline); + + /* + * Print the instruction. + */ + printf("\t%s \n", outline); + + + /* + * Stop disassembling on return statements, if there is no record of a + * jump to a successive address. + * + * XXX: This currently assumes x86 + */ + + if (Size == 1 && bytes[pc] == 0xc3) { + break; + } - /* - * Advance. - */ + /* + * Advance. + */ - pc += Size; + pc += Size; - if (pc >= extent) { - break; - } - } + if (pc >= extent) { + break; + } + } } return mod; // LLVMModuleRef mod = LLVMModuleCreateWithName("libresoc_mod"); @@ -2800,3 +2808,9 @@ uint64_t extent = 200; // orc_sym_resolver, // (void *)(llvm_ref->orc_ref)); } + +Shader GetFuncPointer(struct libresoc_llvm *llvm_ref, const char *name) { + LLVMOrcTargetAddress MainAddr; + LLVMOrcGetSymbolAddress(llvm_ref->orc_ref, &MainAddr , name); + return (Shader)MainAddr; +} diff --git a/src/libre-soc/vulkan/libresoc_llvm.h b/src/libre-soc/vulkan/libresoc_llvm.h index 6aeae357aee..160d207b0f0 100644 --- a/src/libre-soc/vulkan/libresoc_llvm.h +++ b/src/libre-soc/vulkan/libresoc_llvm.h @@ -12,6 +12,7 @@ #include "nir/nir_deref.h" #include +typedef void (*Shader)(); enum { ADDR_SPACE_FLAT = 0, @@ -173,5 +174,6 @@ void handle_shader_output_decl(struct libresoc_nir_tran_ctx *ctx, struct nir_shader *nir, struct nir_variable *variable, gl_shader_stage stage); LLVMModuleRef libresoc_nir_translate(struct libresoc_llvm *llvm_ref, struct nir_shader *nir); +Shader GetFuncPointer(struct libresoc_llvm *llvm_ref, const char *name); #endif diff --git a/src/libre-soc/vulkan/libresoc_meta_clear.c b/src/libre-soc/vulkan/libresoc_meta_clear.c index 80ed9904080..4868269d18d 100644 --- a/src/libre-soc/vulkan/libresoc_meta_clear.c +++ b/src/libre-soc/vulkan/libresoc_meta_clear.c @@ -25,27 +25,38 @@ #include "libresoc_private.h" void libresoc_CmdClearColorImage( - VkCommandBuffer commandBuffer, - VkImage image_h, - VkImageLayout imageLayout, - const VkClearColorValue* pColor, - uint32_t rangeCount, - const VkImageSubresourceRange* pRanges) + VkCommandBuffer commandBuffer, + VkImage image_h, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) { - LIBRESOC_FROM_HANDLE(libresoc_image, image, image_h); - uint32_t r = pColor->uint32[0]; - uint32_t g = pColor->uint32[1]; - uint32_t b = pColor->uint32[2]; - uint32_t a = pColor->uint32[3]; - for (int i=0; i < image->size / 4; ) { - image->bytes[i] = r; - i += 1; - image->bytes[i] = g; - i += 1; - image->bytes[i] = b; - i += 1; - image->bytes[i] = a; - i += 1; + LIBRESOC_FROM_HANDLE(libresoc_image, image, image_h); + byte *bits = image->bytes; + const uint32_t w = image->width; + const uint32_t h = image->height; + const uint32_t bpp = 4; + + byte eval[4]; + eval[2] = (byte)(pColor->float32[0] * 255.0f); + eval[1] = (byte)(pColor->float32[1] * 255.0f); + eval[0] = (byte)(pColor->float32[2] * 255.0f); + eval[3] = (byte)(pColor->float32[3] * 255.0f); + + if(bpp == 1) + { + memset(bits, eval[2], w * h); + } + else if(bpp == 4) + { + for(uint32_t y = 0; y < h; y++) + { + for(uint32_t x = 0; x < w; x++) + { + memcpy(&bits[(y * w + x) * bpp], eval, 4); + } + } } } diff --git a/src/libre-soc/vulkan/libresoc_pass.c b/src/libre-soc/vulkan/libresoc_pass.c index c05f60f6634..330d11df7b7 100644 --- a/src/libre-soc/vulkan/libresoc_pass.c +++ b/src/libre-soc/vulkan/libresoc_pass.c @@ -29,12 +29,469 @@ #include "vk_util.h" +static void +libresoc_render_pass_add_subpass_dep(struct libresoc_render_pass *pass, + const VkSubpassDependency2 *dep) +{ + uint32_t src = dep->srcSubpass; + uint32_t dst = dep->dstSubpass; + + /* Ignore subpass self-dependencies as they allow the app to call + * vkCmdPipelineBarrier() inside the render pass and the driver should + * only do the barrier when called, not when starting the render pass. + */ + if (src == dst) + return; + + /* Accumulate all ingoing external dependencies to the first subpass. */ + if (src == VK_SUBPASS_EXTERNAL) + dst = 0; + + if (dst == VK_SUBPASS_EXTERNAL) { + if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT) + pass->end_barrier.src_stage_mask |= dep->srcStageMask; + pass->end_barrier.src_access_mask |= dep->srcAccessMask; + pass->end_barrier.dst_access_mask |= dep->dstAccessMask; + } else { + if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT) + pass->subpasses[dst].start_barrier.src_stage_mask |= dep->srcStageMask; + pass->subpasses[dst].start_barrier.src_access_mask |= dep->srcAccessMask; + pass->subpasses[dst].start_barrier.dst_access_mask |= dep->dstAccessMask; + } +} + +static bool +libresoc_pass_has_layout_transitions(const struct libresoc_render_pass *pass) +{ + for (unsigned i = 0; i < pass->subpass_count; i++) { + const struct libresoc_subpass *subpass = &pass->subpasses[i]; + for (unsigned j = 0; j < subpass->attachment_count; j++) { + const uint32_t a = subpass->attachments[j].attachment; + if (a == VK_ATTACHMENT_UNUSED) + continue; + + uint32_t initial_layout = pass->attachments[a].initial_layout; + uint32_t stencil_initial_layout = pass->attachments[a].stencil_initial_layout; + uint32_t final_layout = pass->attachments[a].final_layout; + uint32_t stencil_final_layout = pass->attachments[a].stencil_final_layout; + + if (subpass->attachments[j].layout != initial_layout || + subpass->attachments[j].layout != stencil_initial_layout || + subpass->attachments[j].layout != final_layout || + subpass->attachments[j].layout != stencil_final_layout) + return true; + } + } + + return false; +} +static void +libresoc_render_pass_add_implicit_deps(struct libresoc_render_pass *pass, + bool has_ingoing_dep, bool has_outgoing_dep) +{ + /* From the Vulkan 1.0.39 spec: + * + * If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the + * first subpass that uses an attachment, then an implicit subpass + * dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is + * used in. The implicit subpass dependency only exists if there + * exists an automatic layout transition away from initialLayout. + * The subpass dependency operates as if defined with the + * following parameters: + * + * VkSubpassDependency implicitDependency = { + * .srcSubpass = VK_SUBPASS_EXTERNAL; + * .dstSubpass = firstSubpass; // First subpass attachment is used in + * .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + * .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + * .srcAccessMask = 0; + * .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | + * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + * .dependencyFlags = 0; + * }; + * + * Similarly, if there is no subpass dependency from the last subpass + * that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit + * subpass dependency exists from the last subpass it is used in to + * VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists + * if there exists an automatic layout transition into finalLayout. + * The subpass dependency operates as if defined with the following + * parameters: + * + * VkSubpassDependency implicitDependency = { + * .srcSubpass = lastSubpass; // Last subpass attachment is used in + * .dstSubpass = VK_SUBPASS_EXTERNAL; + * .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + * .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + * .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | + * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + * .dstAccessMask = 0; + * .dependencyFlags = 0; + * }; + */ + + /* Implicit subpass dependencies only make sense if automatic layout + * transitions are performed. + */ + if (!libresoc_pass_has_layout_transitions(pass)) + return; + + if (!has_ingoing_dep) { + const VkSubpassDependency2KHR implicit_ingoing_dep = { + .srcSubpass = VK_SUBPASS_EXTERNAL, + .dstSubpass = 0, + .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, + .dependencyFlags = 0, + }; + + libresoc_render_pass_add_subpass_dep(pass, &implicit_ingoing_dep); + } + + if (!has_outgoing_dep) { + const VkSubpassDependency2KHR implicit_outgoing_dep = { + .srcSubpass = 0, + .dstSubpass = VK_SUBPASS_EXTERNAL, + .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, + .dstAccessMask = 0, + .dependencyFlags = 0, + }; + + libresoc_render_pass_add_subpass_dep(pass, &implicit_outgoing_dep); + } +} + +static void +libresoc_render_pass_compile(struct libresoc_render_pass *pass) +{ + for (uint32_t i = 0; i < pass->subpass_count; i++) { + struct libresoc_subpass *subpass = &pass->subpasses[i]; + + for (uint32_t j = 0; j < subpass->attachment_count; j++) { + struct libresoc_subpass_attachment *subpass_att = + &subpass->attachments[j]; + if (subpass_att->attachment == VK_ATTACHMENT_UNUSED) + continue; + + struct libresoc_render_pass_attachment *pass_att = + &pass->attachments[subpass_att->attachment]; + + pass_att->first_subpass_idx = UINT32_MAX; + } + } + + for (uint32_t i = 0; i < pass->subpass_count; i++) { + struct libresoc_subpass *subpass = &pass->subpasses[i]; + uint32_t color_sample_count = 1, depth_sample_count = 1; + + /* We don't allow depth_stencil_attachment to be non-NULL and + * be VK_ATTACHMENT_UNUSED. This way something can just check + * for NULL and be guaranteed that they have a valid + * attachment. + */ + if (subpass->depth_stencil_attachment && + subpass->depth_stencil_attachment->attachment == VK_ATTACHMENT_UNUSED) + subpass->depth_stencil_attachment = NULL; + + if (subpass->ds_resolve_attachment && + subpass->ds_resolve_attachment->attachment == VK_ATTACHMENT_UNUSED) + subpass->ds_resolve_attachment = NULL; + + for (uint32_t j = 0; j < subpass->attachment_count; j++) { + struct libresoc_subpass_attachment *subpass_att = + &subpass->attachments[j]; + if (subpass_att->attachment == VK_ATTACHMENT_UNUSED) + continue; + + struct libresoc_render_pass_attachment *pass_att = + &pass->attachments[subpass_att->attachment]; + + if (i < pass_att->first_subpass_idx) + pass_att->first_subpass_idx = i; + pass_att->last_subpass_idx = i; + } + + subpass->has_color_att = false; + for (uint32_t j = 0; j < subpass->color_count; j++) { + struct libresoc_subpass_attachment *subpass_att = + &subpass->color_attachments[j]; + if (subpass_att->attachment == VK_ATTACHMENT_UNUSED) + continue; + + subpass->has_color_att = true; + + struct libresoc_render_pass_attachment *pass_att = + &pass->attachments[subpass_att->attachment]; + + color_sample_count = pass_att->samples; + } + + if (subpass->depth_stencil_attachment) { + const uint32_t a = + subpass->depth_stencil_attachment->attachment; + struct libresoc_render_pass_attachment *pass_att = + &pass->attachments[a]; + depth_sample_count = pass_att->samples; + } + + subpass->max_sample_count = MAX2(color_sample_count, + depth_sample_count); + subpass->color_sample_count = color_sample_count; + subpass->depth_sample_count = depth_sample_count; + + /* We have to handle resolve attachments specially */ + subpass->has_color_resolve = false; + if (subpass->resolve_attachments) { + for (uint32_t j = 0; j < subpass->color_count; j++) { + struct libresoc_subpass_attachment *resolve_att = + &subpass->resolve_attachments[j]; + + if (resolve_att->attachment == VK_ATTACHMENT_UNUSED) + continue; + + subpass->has_color_resolve = true; + } + } + + for (uint32_t j = 0; j < subpass->input_count; ++j) { + if (subpass->input_attachments[j].attachment == VK_ATTACHMENT_UNUSED) + continue; + + for (uint32_t k = 0; k < subpass->color_count; ++k) { + if (subpass->color_attachments[k].attachment == subpass->input_attachments[j].attachment) { + subpass->input_attachments[j].in_render_loop = true; + subpass->color_attachments[k].in_render_loop = true; + } + } + + if (subpass->depth_stencil_attachment && + subpass->depth_stencil_attachment->attachment == subpass->input_attachments[j].attachment) { + subpass->input_attachments[j].in_render_loop = true; + subpass->depth_stencil_attachment->in_render_loop = true; + } + } + } +} + +static unsigned +libresoc_num_subpass_attachments(const VkSubpassDescription *desc) +{ + return desc->inputAttachmentCount + + desc->colorAttachmentCount + + (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) + + (desc->pDepthStencilAttachment != NULL); +} + +static void +libresoc_destroy_render_pass(struct libresoc_device *device, + const VkAllocationCallbacks *pAllocator, + struct libresoc_render_pass *pass) +{ + vk_object_base_finish(&pass->base); + vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments); + vk_free2(&device->vk.alloc, pAllocator, pass); +} VkResult libresoc_CreateRenderPass( VkDevice _device, const VkRenderPassCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkRenderPass* pRenderPass) { + LIBRESOC_FROM_HANDLE(libresoc_device, device, _device); + struct libresoc_render_pass *pass; + size_t size; + size_t attachments_offset; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); + + size = sizeof(*pass); + size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); + attachments_offset = size; + size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]); + + pass = vk_alloc2(&device->vk.alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pass == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + memset(pass, 0, size); + + vk_object_base_init(&device->vk, &pass->base, + VK_OBJECT_TYPE_RENDER_PASS); + + pass->attachment_count = pCreateInfo->attachmentCount; + pass->subpass_count = pCreateInfo->subpassCount; + pass->attachments = (void *) pass + attachments_offset; + + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + struct libresoc_render_pass_attachment *att = &pass->attachments[i]; + + att->format = pCreateInfo->pAttachments[i].format; + att->samples = pCreateInfo->pAttachments[i].samples; + att->load_op = pCreateInfo->pAttachments[i].loadOp; + att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; + att->initial_layout = pCreateInfo->pAttachments[i].initialLayout; + att->final_layout = pCreateInfo->pAttachments[i].finalLayout; + att->stencil_initial_layout = pCreateInfo->pAttachments[i].initialLayout; + att->stencil_final_layout = pCreateInfo->pAttachments[i].finalLayout; + // att->store_op = pCreateInfo->pAttachments[i].storeOp; + // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; + } + uint32_t subpass_attachment_count = 0; + struct libresoc_subpass_attachment *p; + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + subpass_attachment_count += + libresoc_num_subpass_attachments(&pCreateInfo->pSubpasses[i]); + } + + if (subpass_attachment_count) { + pass->subpass_attachments = + vk_alloc2(&device->vk.alloc, pAllocator, + subpass_attachment_count * sizeof(struct libresoc_subpass_attachment), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pass->subpass_attachments == NULL) { + libresoc_destroy_render_pass(device, pAllocator, pass); + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + } + } else + pass->subpass_attachments = NULL; + + p = pass->subpass_attachments; + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; + struct libresoc_subpass *subpass = &pass->subpasses[i]; + + subpass->input_count = desc->inputAttachmentCount; + subpass->color_count = desc->colorAttachmentCount; + subpass->attachment_count = libresoc_num_subpass_attachments(desc); + subpass->attachments = p; + + for (uint32_t j = 0; j < subpass->attachment_count; j++) { + subpass->attachments[j].clear = (pCreateInfo->pAttachments[i].loadOp == + VK_ATTACHMENT_LOAD_OP_CLEAR); + } + + if (desc->inputAttachmentCount > 0) { + subpass->input_attachments = p; + p += desc->inputAttachmentCount; + + for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { + subpass->input_attachments[j] = (struct libresoc_subpass_attachment) { + .attachment = desc->pInputAttachments[j].attachment, + .layout = desc->pInputAttachments[j].layout, + .stencil_layout = desc->pInputAttachments[j].layout, + }; + } + } + + if (desc->colorAttachmentCount > 0) { + subpass->color_attachments = p; + p += desc->colorAttachmentCount; + + for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { + subpass->color_attachments[j] = (struct libresoc_subpass_attachment) { + .attachment = desc->pColorAttachments[j].attachment, + .layout = desc->pColorAttachments[j].layout, + }; + } + } + + if (desc->pResolveAttachments) { + subpass->resolve_attachments = p; + p += desc->colorAttachmentCount; + + for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { + subpass->resolve_attachments[j] = (struct libresoc_subpass_attachment) { + .attachment = desc->pResolveAttachments[j].attachment, + .layout = desc->pResolveAttachments[j].layout, + .stencil_layout = desc->pResolveAttachments[j].layout, + }; + } + } + + if (desc->pDepthStencilAttachment) { + subpass->depth_stencil_attachment = p++; + + *subpass->depth_stencil_attachment = (struct libresoc_subpass_attachment) { + .attachment = desc->pDepthStencilAttachment->attachment, + .layout = desc->pDepthStencilAttachment->layout, + .stencil_layout = desc->pDepthStencilAttachment->layout, + }; + } + } + + bool has_ingoing_dep = false; + bool has_outgoing_dep = false; + + for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) { + /* Convert to a Dependency2 */ + struct VkSubpassDependency2 dep2 = { + .srcSubpass = pCreateInfo->pDependencies[i].srcSubpass, + .dstSubpass = pCreateInfo->pDependencies[i].dstSubpass, + .srcStageMask = pCreateInfo->pDependencies[i].srcStageMask, + .dstStageMask = pCreateInfo->pDependencies[i].dstStageMask, + .srcAccessMask = pCreateInfo->pDependencies[i].srcAccessMask, + .dstAccessMask = pCreateInfo->pDependencies[i].dstAccessMask, + .dependencyFlags = pCreateInfo->pDependencies[i].dependencyFlags, + }; + libresoc_render_pass_add_subpass_dep(pass, &dep2); + + /* Determine if the subpass has explicit dependencies from/to + * VK_SUBPASS_EXTERNAL. + */ + if (pCreateInfo->pDependencies[i].srcSubpass == VK_SUBPASS_EXTERNAL) + has_ingoing_dep = true; + if (pCreateInfo->pDependencies[i].dstSubpass == VK_SUBPASS_EXTERNAL) + has_outgoing_dep = true; + } + + libresoc_render_pass_add_implicit_deps(pass, + has_ingoing_dep, has_outgoing_dep); + + libresoc_render_pass_compile(pass); + + *pRenderPass = libresoc_render_pass_to_handle(pass); //TODO: stub return VK_SUCCESS; } + +void libresoc_DestroyRenderPass( + VkDevice _device, + VkRenderPass _pass, + const VkAllocationCallbacks* pAllocator) +{ + LIBRESOC_FROM_HANDLE(libresoc_device, device, _device); + LIBRESOC_FROM_HANDLE(libresoc_render_pass, pass, _pass); + + if (!_pass) + return; + + libresoc_destroy_render_pass(device, pAllocator, pass); +} + +void libresoc_GetRenderAreaGranularity( + VkDevice device, + VkRenderPass renderPass, + VkExtent2D* pGranularity) +{ + pGranularity->width = 1; + pGranularity->height = 1; +} diff --git a/src/libre-soc/vulkan/libresoc_pipeline.c b/src/libre-soc/vulkan/libresoc_pipeline.c index 097f37b6721..e80d48e1f1c 100644 --- a/src/libre-soc/vulkan/libresoc_pipeline.c +++ b/src/libre-soc/vulkan/libresoc_pipeline.c @@ -62,6 +62,14 @@ VkResult libresoc_create_shaders(struct libresoc_pipeline *pipeline, subgroup_size, ballot_bit_size); modules[i]->llvm_module = libresoc_nir_translate(&device->instance->llvm_ref, nir[i]); + if(nir[i]->info.stage == MESA_SHADER_VERTEX) + { + pipeline->vs = (VertexShader)GetFuncPointer(&device->instance->llvm_ref, gl_shader_stage_name(nir[i]->info.stage)); + } + else if(nir[i]->info.stage == MESA_SHADER_FRAGMENT) + { + pipeline->fs = (FragmentShader)GetFuncPointer(&device->instance->llvm_ref, gl_shader_stage_name(nir[i]->info.stage)); + } /* We don't want to alter meta shaders IR directly so clone it * first. */ diff --git a/src/libre-soc/vulkan/libresoc_private.h b/src/libre-soc/vulkan/libresoc_private.h index 84eb55c70d4..cbfff1661e2 100644 --- a/src/libre-soc/vulkan/libresoc_private.h +++ b/src/libre-soc/vulkan/libresoc_private.h @@ -52,6 +52,7 @@ typedef unsigned char byte; + static inline gl_shader_stage vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage) { @@ -144,6 +145,216 @@ libresoc_clear_mask(uint32_t *inout_mask, uint32_t clear_mask) } } +struct GPUState +{ + struct + { + VkBuffer buffer; + VkDeviceSize offset; + VkIndexType indexType; + } ib; + struct + { + VkBuffer buffer; + VkDeviceSize offset; + VkDeviceSize stride; + } vbs[4]; + VkViewport view; + VkImage col[8]; + VkImage depth; + VkPipeline pipeline; + VkDescriptorSet sets[8]; + byte pushconsts[128]; +}; + +struct int4 +{ + union + { + struct + { + int x, y, z, w; + }; + int v[4]; + }; +}; + +struct float4 +{ + union + { + struct + { + float x, y, z, w; + }; + float v[4]; + }; +}; + +struct VertexCacheEntry +{ + struct float4 position; + struct float4 interps[10]; +}; + +struct TriangleWork +{ + struct GPUState *state; + + int ABx; + int ABy; + int ACx; + int ACy; + + struct VertexCacheEntry *vsout; + struct int4 *tri; + + int barymul; + int area2; + + float invarea; + + struct float4 invw; + struct float4 depth; + + struct int4 minwin, maxwin; +}; + +enum Command { + BeginRenderPassID = 200, + EndRenderPassID = 201, + BindPipelineID = 202, + BindDescriptorSetsID = 203, + BindVBID = 204, + BindIBID = 205, + SetViewportID = 206, + SetScissorsID = 207, + PushConstantsID = 208, + DrawID = 209, + DrawIndexedID = 210, + CopyBuf2ImgID = 211, + CopyBufID = 212, + PipelineBarrierID = 213, +}; + +struct PipelineBarrier +{ + unsigned int commandId; +}; + +struct BeginRenderPass +{ + unsigned int commandId; + VkRenderPass renderPass; + VkFramebuffer framebuffer; + VkClearValue clearval[8]; +}; + +struct EndRenderPass +{ + unsigned int commandId; +}; + +struct BindPipeline +{ + unsigned int commandId; + VkPipeline pipeline; +}; + +struct BindDescriptorSets +{ + unsigned int commandId; + uint32_t idx; + VkDescriptorSet set; +}; + +struct BindVB +{ + unsigned int commandId; + uint32_t slot; + VkBuffer buffer; + VkDeviceSize offset; +}; + +struct BindIB +{ + unsigned int commandId; + VkBuffer buffer; + VkDeviceSize offset; + VkIndexType indexType; +}; + +struct SetViewport +{ + unsigned int commandId; + VkViewport view; +}; + +struct SetScissors +{ + unsigned int commandId; +}; + +struct PushConstants +{ + unsigned int commandId; + uint32_t offset, size; + byte values[128]; +}; + +struct Draw +{ + unsigned int commandId; + uint32_t vertexCount, instanceCount, firstVertex, firstInstance; +}; + +struct DrawIndexed +{ + unsigned int commandId; + uint32_t indexCount, instanceCount, firstIndex, firstInstance; + int32_t vertexOffset; +}; + +struct CopyBuf2Img +{ + unsigned int commandId; + VkBuffer srcBuffer; + VkImage dstImage; + VkBufferImageCopy region; +}; + +struct CopyBuf +{ + unsigned int commandId; + VkBuffer srcBuffer; + VkBuffer dstBuffer; + VkBufferCopy region; +}; + +struct libresoc_cmd { + struct list_head link; + union { + struct PipelineBarrier pipelineBarrier; + struct EndRenderPass endRenderPass; + struct BeginRenderPass beginRenderPass; + struct BindPipeline bindPipeline; + struct BindDescriptorSets bindDescriptorSets; + struct BindVB bindVB; + struct BindIB bindIB; + struct SetViewport setViewport; + struct SetScissors setScissors; + struct PushConstants pushConstants; + struct Draw draw; + struct DrawIndexed drawIndexed; + struct CopyBuf2Img copyBuf2Img; + struct CopyBuf copyBuf; + } command; +}; + +typedef void (*Shader)(); +typedef void (*VertexShader)(struct GPUState *state, uint32_t vertexIndex, struct VertexCacheEntry *out); +typedef void (*FragmentShader)(struct GPUState *state, float pixdepth, struct float4 *bary, + struct VertexCacheEntry tri[3], struct float4 *out); struct libresoc_fence { struct vk_object_base base; }; @@ -224,6 +435,8 @@ struct libresoc_pipeline { VkShaderStageFlags active_stages; + VertexShader vs; + FragmentShader fs; }; void libresoc_pipeline_cache_init(struct libresoc_pipeline_cache *cache, @@ -369,41 +582,10 @@ enum libresoc_cmd_buffer_status { }; struct libresoc_cmd_buffer { - struct vk_object_base base; - - struct libresoc_device * device; - - struct libresoc_cmd_pool * pool; - struct list_head pool_link; - - VkCommandBufferUsageFlags usage_flags; - VkCommandBufferLevel level; - enum libresoc_cmd_buffer_status status; - //struct radeon_cmdbuf *cs; - // struct libresoc_cmd_state state; - // struct libresoc_vertex_binding vertex_bindings[MAX_VBS]; - // struct libresoc_streamout_binding streamout_bindings[MAX_SO_BUFFERS]; - uint32_t queue_family_index; - - uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE]; - VkShaderStageFlags push_constant_stages; - // struct libresoc_descriptor_set meta_push_descriptors; - - // struct libresoc_descriptor_state descriptors[MAX_BIND_POINTS]; - - struct libresoc_cmd_buffer_upload upload; - - uint32_t scratch_size_per_wave_needed; - uint32_t scratch_waves_wanted; - uint32_t compute_scratch_size_per_wave_needed; - uint32_t compute_scratch_waves_wanted; - uint32_t esgs_ring_size_needed; - uint32_t gsvs_ring_size_needed; - bool tess_rings_needed; - bool sample_positions_needed; - - VkResult record_result; - + struct vk_object_base base; + struct libresoc_device *device; + uint64_t size; + struct libresoc_cmd cmd; }; struct libresoc_device_memory { @@ -419,6 +601,12 @@ struct libresoc_device_memory { byte *bytes; }; +struct libresoc_buffer { + struct vk_object_base base; + VkDeviceSize size; + byte *bytes; +}; + void libresoc_free_memory(struct libresoc_device *device, const VkAllocationCallbacks* pAllocator, struct libresoc_device_memory *mem); @@ -514,6 +702,124 @@ libresoc_graphics_pipeline_create(VkDevice device, const VkAllocationCallbacks *alloc, VkPipeline *pPipeline); +union libresoc_descriptor { + struct { + uint32_t plane0_descriptor[8]; + uint32_t fmask_descriptor[8]; + }; + struct { + uint32_t plane_descriptors[3][8]; + }; +}; + +struct libresoc_image_view { + struct vk_object_base base; + struct libresoc_image *image; /**< VkImageViewCreateInfo::image */ + + VkImageViewType type; + VkImageAspectFlags aspect_mask; + VkFormat vk_format; + unsigned plane_id; + bool multiple_planes; + uint32_t base_layer; + uint32_t layer_count; + uint32_t base_mip; + uint32_t level_count; + VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */ + + union libresoc_descriptor descriptor; + + /* Descriptor for use as a storage image as opposed to a sampled image. + * This has a few differences for cube maps (e.g. type). + */ + union libresoc_descriptor storage_descriptor; +}; + +struct libresoc_framebuffer { + struct vk_object_base base; + uint32_t width; + uint32_t height; + uint32_t layers; + + uint32_t attachment_count; + struct libresoc_image_view *attachments[0]; +}; + +struct libresoc_subpass_barrier { + VkPipelineStageFlags src_stage_mask; + VkAccessFlags src_access_mask; + VkAccessFlags dst_access_mask; +}; + +void libresoc_subpass_barrier(struct libresoc_cmd_buffer *cmd_buffer, + const struct libresoc_subpass_barrier *barrier); + +struct libresoc_subpass_attachment { + uint32_t attachment; + VkImageLayout layout; + VkImageLayout stencil_layout; + bool in_render_loop; + bool clear; +}; + +struct libresoc_subpass { + uint32_t attachment_count; + struct libresoc_subpass_attachment * attachments; + + uint32_t input_count; + uint32_t color_count; + struct libresoc_subpass_attachment * input_attachments; + struct libresoc_subpass_attachment * color_attachments; + struct libresoc_subpass_attachment * resolve_attachments; + struct libresoc_subpass_attachment * depth_stencil_attachment; + struct libresoc_subpass_attachment * ds_resolve_attachment; + VkResolveModeFlagBits depth_resolve_mode; + VkResolveModeFlagBits stencil_resolve_mode; + + /** Subpass has at least one color resolve attachment */ + bool has_color_resolve; + + /** Subpass has at least one color attachment */ + bool has_color_att; + + struct libresoc_subpass_barrier start_barrier; + + uint32_t view_mask; + + VkSampleCountFlagBits color_sample_count; + VkSampleCountFlagBits depth_sample_count; + VkSampleCountFlagBits max_sample_count; +}; + +uint32_t +libresoc_get_subpass_id(struct libresoc_cmd_buffer *cmd_buffer); + +struct libresoc_render_pass_attachment { + VkFormat format; + uint32_t samples; + VkAttachmentLoadOp load_op; + VkAttachmentLoadOp stencil_load_op; + VkImageLayout initial_layout; + VkImageLayout final_layout; + VkImageLayout stencil_initial_layout; + VkImageLayout stencil_final_layout; + + /* The subpass id in which the attachment will be used first/last. */ + uint32_t first_subpass_idx; + uint32_t last_subpass_idx; +}; + +struct libresoc_render_pass { + struct vk_object_base base; + uint32_t attachment_count; + uint32_t subpass_count; + struct libresoc_subpass_attachment * subpass_attachments; + struct libresoc_render_pass_attachment * attachments; + struct libresoc_subpass_barrier end_barrier; + struct libresoc_subpass subpasses[0]; +}; + + #define libresoc_printflike(a, b) __attribute__((__format__(__printf__, a, b))) VkResult __vk_errorf(struct libresoc_instance *instance, VkResult error, @@ -564,7 +870,7 @@ LIBRESOC_DEFINE_HANDLE_CASTS(libresoc_physical_device, VkPhysicalDevice) LIBRESOC_DEFINE_HANDLE_CASTS(libresoc_queue, VkQueue) LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_cmd_pool, VkCommandPool) -//LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_buffer, VkBuffer) +LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_buffer, VkBuffer) //LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_buffer_view, VkBufferView) //LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_descriptor_pool, VkDescriptorPool) //LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_descriptor_set, VkDescriptorSet) @@ -573,14 +879,14 @@ LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_cmd_pool, VkCommandPool) LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_device_memory, VkDeviceMemory) LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_fence, VkFence) //LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_event, VkEvent) -//LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_framebuffer, VkFramebuffer) +LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_framebuffer, VkFramebuffer) LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_image, VkImage) -//LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_image_view, VkImageView); +LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_image_view, VkImageView); LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_pipeline_cache, VkPipelineCache) LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_pipeline, VkPipeline) //LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_pipeline_layout, VkPipelineLayout) //LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_query_pool, VkQueryPool) -//LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_render_pass, VkRenderPass) +LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_render_pass, VkRenderPass) //LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_sampler, VkSampler) //LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_sampler_ycbcr_conversion, VkSamplerYcbcrConversion) LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_shader_module, VkShaderModule)