Implement RenderPass, CommandBuffers, Buffers, GPUState,
authorVivek Pandya <vivekvpandya@gmail.com>
Sun, 14 Mar 2021 06:11:26 +0000 (11:41 +0530)
committerVivek Pandya <vivekvpandya@gmail.com>
Sun, 14 Mar 2021 06:11:26 +0000 (11:41 +0530)
execute() and relevant stuffs.

src/libre-soc/vulkan/libresoc_cmd_buffer.c
src/libre-soc/vulkan/libresoc_device.c
src/libre-soc/vulkan/libresoc_image.c
src/libre-soc/vulkan/libresoc_llvm.c
src/libre-soc/vulkan/libresoc_llvm.h
src/libre-soc/vulkan/libresoc_meta_clear.c
src/libre-soc/vulkan/libresoc_pass.c
src/libre-soc/vulkan/libresoc_pipeline.c
src/libre-soc/vulkan/libresoc_private.h

index 2782653eee2a8d4aad0a087e671d01ab1fd27d9a..ee9f24047d85f48166ac682b1203d9ca3e5191c8 100644 (file)
 
 void libresoc_CmdEndRenderPass(
        VkCommandBuffer                             commandBuffer)
-{}
+{
+    LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer);
+        struct libresoc_cmd *cmd;
+        cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct libresoc_cmd), 8,
+                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+    cmd->command.endRenderPass.commandId = EndRenderPassID;
+    list_addtail(&cmd->link, &cmd_buffer->cmd.link); 
+}
+
+void libresoc_CmdSetScissor(
+       VkCommandBuffer                             commandBuffer,
+       uint32_t                                    firstScissor,
+       uint32_t                                    scissorCount,
+       const VkRect2D*                             pScissors)
+{
+    LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer);
+        struct libresoc_cmd *cmd;
+        cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct libresoc_cmd), 8,
+                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+    cmd->command.setScissors.commandId = SetScissorsID;
+    list_addtail(&cmd->link, &cmd_buffer->cmd.link); 
+}
+
+void libresoc_CmdPushConstants(VkCommandBuffer commandBuffer,
+                          VkPipelineLayout layout,
+                          VkShaderStageFlags stageFlags,
+                          uint32_t offset,
+                          uint32_t size,
+                          const void* pValues)
+{
+    LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer);
+        struct libresoc_cmd *cmd;
+        cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct libresoc_cmd), 8,
+                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+    cmd->command.pushConstants.commandId = PushConstantsID;
+    cmd->command.pushConstants.offset = offset;
+    cmd->command.pushConstants.size = size;
+    memcpy(cmd->command.pushConstants.values, pValues, size);
+    list_addtail(&cmd->link, &cmd_buffer->cmd.link); 
+}
 
 void libresoc_CmdDraw(
        VkCommandBuffer                             commandBuffer,
@@ -39,6 +78,80 @@ void libresoc_CmdDraw(
        uint32_t                                    firstVertex,
        uint32_t                                    firstInstance)
 {
+    LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer);
+        struct libresoc_cmd *cmd;
+        cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct libresoc_cmd), 8,
+                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+    cmd->command.draw.commandId = DrawID;
+    cmd->command.draw.vertexCount = vertexCount;
+    cmd->command.draw.instanceCount = instanceCount;
+    cmd->command.draw.firstVertex = firstVertex;
+    cmd->command.draw.firstInstance = firstInstance;
+    list_addtail(&cmd->link, &cmd_buffer->cmd.link); 
+}
+
+void libresoc_CmdDrawIndexed(
+       VkCommandBuffer                             commandBuffer,
+       uint32_t                                    indexCount,
+       uint32_t                                    instanceCount,
+       uint32_t                                    firstIndex,
+       int32_t                                     vertexOffset,
+       uint32_t                                    firstInstance)
+{
+    LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer);
+        struct libresoc_cmd *cmd;
+        cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct libresoc_cmd), 8,
+                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+    cmd->command.drawIndexed.commandId = DrawIndexedID;
+    cmd->command.drawIndexed.indexCount = indexCount;
+    cmd->command.drawIndexed.instanceCount = instanceCount;
+    cmd->command.drawIndexed.firstIndex = firstIndex;
+    cmd->command.drawIndexed.vertexOffset = vertexOffset;
+    cmd->command.drawIndexed.firstInstance = firstInstance;
+    list_addtail(&cmd->link, &cmd_buffer->cmd.link); 
+}
+
+void libresoc_CmdCopyBufferToImage(
+       VkCommandBuffer                             commandBuffer,
+       VkBuffer                                    srcBuffer,
+       VkImage                                     destImage,
+       VkImageLayout                               destImageLayout,
+       uint32_t                                    regionCount,
+       const VkBufferImageCopy*                    pRegions)
+{
+    LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer);
+    for(uint32_t r = 0; r < regionCount; r++)
+    {
+        struct libresoc_cmd *cmd;
+        cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct libresoc_cmd), 8,
+                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+        cmd->command.copyBuf2Img.commandId = CopyBuf2ImgID;
+        cmd->command.copyBuf2Img.srcBuffer = srcBuffer;
+        cmd->command.copyBuf2Img.dstImage = destImage;
+        cmd->command.copyBuf2Img.region = pRegions[r];
+        list_addtail(&cmd->link, &cmd_buffer->cmd.link); 
+    }
+}
+
+void libresoc_CmdCopyBuffer(
+       VkCommandBuffer                             commandBuffer,
+       VkBuffer                                    srcBuffer,
+       VkBuffer                                    destBuffer,
+       uint32_t                                    regionCount,
+       const VkBufferCopy*                         pRegions)
+{
+    LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer);
+    for(uint32_t r = 0; r < regionCount; r++)
+    {
+        struct libresoc_cmd *cmd;
+        cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct libresoc_cmd), 8,
+                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+        cmd->command.copyBuf.commandId = CopyBufID;
+        cmd->command.copyBuf.srcBuffer = srcBuffer;
+        cmd->command.copyBuf.dstBuffer = destBuffer;
+        cmd->command.copyBuf.region = pRegions[r];
+        list_addtail(&cmd->link, &cmd_buffer->cmd.link); 
+    }
 }
 
 void libresoc_CmdBindPipeline(
@@ -46,14 +159,93 @@ void libresoc_CmdBindPipeline(
        VkPipelineBindPoint                         pipelineBindPoint,
        VkPipeline                                  _pipeline)
 {
+    LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer);
+    struct libresoc_cmd *cmd;
+    cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct libresoc_cmd), 8,
+            VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+    cmd->command.bindPipeline.commandId = BindPipelineID;
+    cmd->command.bindPipeline.pipeline = _pipeline;
+    list_addtail(&cmd->link, &cmd_buffer->cmd.link); 
 }
 
+void libresoc_CmdBindDescriptorSets(
+       VkCommandBuffer                             commandBuffer,
+       VkPipelineBindPoint                         pipelineBindPoint,
+       VkPipelineLayout                            _layout,
+       uint32_t                                    firstSet,
+       uint32_t                                    descriptorSetCount,
+       const VkDescriptorSet*                      pDescriptorSets,
+       uint32_t                                    dynamicOffsetCount,
+       const uint32_t*                             pDynamicOffsets)
+{
+       LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer);
+    for(int i=0; i < descriptorSetCount; ++i)
+    {
+        struct libresoc_cmd *cmd;
+        cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct libresoc_cmd), 8,
+                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+        cmd->command.bindDescriptorSets.commandId = BindDescriptorSetsID;
+        cmd->command.bindDescriptorSets.idx = firstSet + i;
+        cmd->command.bindDescriptorSets.set = pDescriptorSets[i]; 
+        list_addtail(&cmd->link, &cmd_buffer->cmd.link); 
+    }
+}
+
+void libresoc_CmdBindVertexBuffers(
+        VkCommandBuffer                             commandBuffer,
+        uint32_t                                    firstBinding,
+        uint32_t                                    bindingCount,
+        const VkBuffer*                             pBuffers,
+        const VkDeviceSize*                         pOffsets)
+{
+       LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer);
+    for(int i=0; i < bindingCount; ++i)
+    {
+        struct libresoc_cmd *cmd;
+        cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct libresoc_cmd), 8,
+                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+        cmd->command.bindVB.commandId = BindVBID;
+        cmd->command.bindVB.slot = firstBinding + i;
+        cmd->command.bindVB.buffer = pBuffers[i]; 
+        cmd->command.bindVB.offset = pOffsets[i]; 
+        list_addtail(&cmd->link, &cmd_buffer->cmd.link); 
+    }
+}
+
+void libresoc_CmdBindIndexBuffer(
+       VkCommandBuffer                             commandBuffer,
+       VkBuffer buffer,
+       VkDeviceSize offset,
+       VkIndexType indexType)
+{
+       LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer);
+    struct libresoc_cmd *cmd;
+       cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct libresoc_cmd), 8,
+                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+    cmd->command.bindIB.commandId = BindIBID;
+    cmd->command.bindIB.buffer = buffer;
+    cmd->command.bindIB.offset = offset;
+    cmd->command.bindIB.indexType = indexType;
+    list_addtail(&cmd->link, &cmd_buffer->cmd.link); 
+}
 
 void libresoc_CmdBeginRenderPass(
        VkCommandBuffer                             commandBuffer,
        const VkRenderPassBeginInfo*                pRenderPassBegin,
        VkSubpassContents                           contents)
 {
+
+       LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer);
+    struct libresoc_cmd *cmd;
+       cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct libresoc_cmd), 8,
+                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+    cmd->command.beginRenderPass.commandId = BeginRenderPassID;
+    cmd->command.beginRenderPass.renderPass = pRenderPassBegin->renderPass;
+    cmd->command.beginRenderPass.framebuffer = pRenderPassBegin->framebuffer;
+    size_t count = (pRenderPassBegin->clearValueCount < 8U) ? pRenderPassBegin->clearValueCount : 8U;
+    memcpy(&cmd->command.beginRenderPass.clearval, pRenderPassBegin->pClearValues,
+            sizeof(VkClearValue) * count);
+    list_addtail(&cmd->link, &cmd_buffer->cmd.link); 
 }
 
 void libresoc_FreeCommandBuffers(
@@ -138,26 +330,11 @@ static VkResult libresoc_create_cmd_buffer(
 
        vk_object_base_init(&device->vk, &cmd_buffer->base,
                            VK_OBJECT_TYPE_COMMAND_BUFFER);
-
-       cmd_buffer->device = device;
-       cmd_buffer->pool = pool;
-       cmd_buffer->level = level;
-
-       list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
-       cmd_buffer->queue_family_index = pool->queue_family_index;
-
-       // ring = libresoc_queue_family_to_ring(cmd_buffer->queue_family_index);
-
-       // cmd_buffer->cs = device->ws->cs_create(device->ws, ring);
-       // if (!cmd_buffer->cs) {
-       //      libresoc_destroy_cmd_buffer(cmd_buffer);
-       //      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-       // }
-
+    cmd_buffer->device = device;
+    cmd_buffer->size = 0;
+    list_inithead(&cmd_buffer->cmd.link);
        *pCommandBuffer = libresoc_cmd_buffer_to_handle(cmd_buffer);
 
-       list_inithead(&cmd_buffer->upload.list);
-
        return VK_SUCCESS;
 }
 
@@ -173,23 +350,10 @@ VkResult libresoc_AllocateCommandBuffers(
        uint32_t i;
 
        for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
-
-               if (!list_is_empty(&pool->free_cmd_buffers)) {
-                       struct libresoc_cmd_buffer *cmd_buffer = list_first_entry(&pool->free_cmd_buffers, struct libresoc_cmd_buffer, pool_link);
-
-                       list_del(&cmd_buffer->pool_link);
-                       list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
-
-                       //result = libresoc_reset_cmd_buffer(cmd_buffer);
-                       cmd_buffer->level = pAllocateInfo->level;
-
-                       pCommandBuffers[i] = libresoc_cmd_buffer_to_handle(cmd_buffer);
-               } else {
-                       result = libresoc_create_cmd_buffer(device, pool, pAllocateInfo->level,
-                                                       &pCommandBuffers[i]);
-               }
-               if (result != VK_SUCCESS)
-                       break;
+        result = libresoc_create_cmd_buffer(device, pool, pAllocateInfo->level,
+                &pCommandBuffers[i]);
+        if (result != VK_SUCCESS)
+            break;
        }
 
        // if (result != VK_SUCCESS) {
@@ -220,46 +384,6 @@ VkResult libresoc_BeginCommandBuffer(
        VkResult result = VK_SUCCESS;
 
 
-       // memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state));
-       // cmd_buffer->state.last_primitive_reset_en = -1;
-       // cmd_buffer->state.last_index_type = -1;
-       // cmd_buffer->state.last_num_instances = -1;
-       // cmd_buffer->state.last_vertex_offset = -1;
-       // cmd_buffer->state.last_first_instance = -1;
-       // cmd_buffer->state.predication_type = -1;
-       // cmd_buffer->state.last_sx_ps_downconvert = -1;
-       // cmd_buffer->state.last_sx_blend_opt_epsilon = -1;
-       // cmd_buffer->state.last_sx_blend_opt_control = -1;
-       cmd_buffer->usage_flags = pBeginInfo->flags;
-
-       // if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY &&
-       //     (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)) {
-       //      assert(pBeginInfo->pInheritanceInfo);
-       //      cmd_buffer->state.framebuffer = libresoc_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer);
-       //      cmd_buffer->state.pass = libresoc_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass);
-
-       //      struct libresoc_subpass *subpass =
-       //              &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass];
-
-       //      if (cmd_buffer->state.framebuffer) {
-       //              result = libresoc_cmd_state_setup_attachments(cmd_buffer, cmd_buffer->state.pass, NULL);
-       //              if (result != VK_SUCCESS)
-       //                      return result;
-       //      }
-
-       //      cmd_buffer->state.inherited_pipeline_statistics =
-       //              pBeginInfo->pInheritanceInfo->pipelineStatistics;
-
-       //      libresoc_cmd_buffer_set_subpass(cmd_buffer, subpass);
-       // }
-
-       // if (unlikely(cmd_buffer->device->trace_bo))
-       //      libresoc_cmd_buffer_trace_emit(cmd_buffer);
-
-//     libresoc_describe_begin_cmd_buffer(cmd_buffer);
-
-       //cmd_buffer->status = LIBRESOC_CMD_BUFFER_STATUS_RECORDING;
-
        return result;
 }
 
@@ -276,17 +400,16 @@ void libresoc_CmdPipelineBarrier(
        const VkImageMemoryBarrier*                 pImageMemoryBarriers)
 {
        // LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer);
-       // struct libresoc_barrier_info info;
-
-       // info.reason = RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER;
-       // info.eventCount = 0;
-       // info.pEvents = NULL;
-       // info.srcStageMask = srcStageMask;
-       // info.dstStageMask = destStageMask;
-
-       // libresoc_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers,
-       //           bufferMemoryBarrierCount, pBufferMemoryBarriers,
-       //           imageMemoryBarrierCount, pImageMemoryBarriers, &info);
+    // struct PipelineBarrier *cmd;
+       // cmd = vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct PipelineBarrier), 8,
+       //                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+    // cmd->commandId = PipelineBarrierID;
+    // if (list_is_empty(&cmd_buffer->list)) {
+        // list_inithead(&cmd);
+    // } else {
+        // list_addtail(&cmd, &cmd_buffer->list); 
+    // }
+    // cmd_buffer->size += sizeof(struct PipelineBarrier);
 }
 
 VkResult libresoc_EndCommandBuffer(
@@ -330,5 +453,5 @@ VkResult libresoc_EndCommandBuffer(
 
        // cmd_buffer->status = LIBRESOC_CMD_BUFFER_STATUS_EXECUTABLE;
 
-       return cmd_buffer->record_result;
+       return VK_SUCCESS;
 }
index fe63fe4f6ca6a3705bebe8f0fe554f587f6a97b8..47c53193cd7b8b407b189abfe5eac7bedb6314b8 100644 (file)
@@ -91,6 +91,11 @@ struct libresoc_queue_submission {
        uint32_t signal_value_count;
 };
 
+static uint32_t libresoc_surface_max_layer_count(struct libresoc_image_view *iview)
+{
+       return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count);
+}
+
 void
 libresoc_free_memory(struct libresoc_device *device,
                 const VkAllocationCallbacks* pAllocator,
@@ -110,21 +115,21 @@ static VkResult libresoc_alloc_memory(struct libresoc_device *device,
 {
        struct libresoc_device_memory *mem;
        VkResult result;
-       uint32_t flags = 0;
+       // uint32_t flags = 0;
 
        assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
 
        const VkImportMemoryFdInfoKHR *import_info =
                vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
-       const VkMemoryDedicatedAllocateInfo *dedicate_info =
-               vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
-       const VkExportMemoryAllocateInfo *export_info =
-               vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
+       // const VkMemoryDedicatedAllocateInfo *dedicate_info =
+       //      vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
+       // const VkExportMemoryAllocateInfo *export_info =
+       //      vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
        const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
                vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
 
-       const struct wsi_memory_allocate_info *wsi_info =
-               vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
+       // const struct wsi_memory_allocate_info *wsi_info =
+       //      vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
 
 
        mem = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8,
@@ -1389,7 +1394,7 @@ void libresoc_GetPhysicalDeviceProperties2(
        VkPhysicalDevice                            physicalDevice,
        VkPhysicalDeviceProperties2                *pProperties)
 {
-       LIBRESOC_FROM_HANDLE(libresoc_physical_device, pdevice, physicalDevice);
+       // LIBRESOC_FROM_HANDLE(libresoc_physical_device, pdevice, physicalDevice);
        libresoc_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
        //TODO: add more stuffs when required
 }
@@ -1398,7 +1403,7 @@ void libresoc_GetPhysicalDeviceFeatures2(
        VkPhysicalDevice                            physicalDevice,
        VkPhysicalDeviceFeatures2                  *pFeatures)
 {
-       LIBRESOC_FROM_HANDLE(libresoc_physical_device, pdevice, physicalDevice);
+       // LIBRESOC_FROM_HANDLE(libresoc_physical_device, pdevice, physicalDevice);
        libresoc_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
 }
 
@@ -1522,10 +1527,10 @@ VkResult libresoc_BindImageMemory(
        return libresoc_BindImageMemory2(device, 1, &info);
 }
 
-static VkResult libresoc_queue_submit(struct libresoc_queue *queue,
-                                  const struct libresoc_queue_submission *submission)
-{
-       return VK_SUCCESS;
+// static VkResult libresoc_queue_submit(struct libresoc_queue *queue,
+//                                   const struct libresoc_queue_submission *submission)
+// {
+//     return VK_SUCCESS;
        // struct libresoc_deferred_queue_submission *deferred = NULL;
 
        // VkResult result = libresoc_create_deferred_submission(queue, submission, &deferred);
@@ -1542,22 +1547,361 @@ static VkResult libresoc_queue_submit(struct libresoc_queue *queue,
        //      return result;
        // }
        // return libresoc_process_submissions(&processing_list);
-}
+// }
 
 /* Signals fence as soon as all the work currently put on queue is done. */
-static VkResult libresoc_signal_fence(struct libresoc_queue *queue,
-                              VkFence fence)
+// static VkResult libresoc_signal_fence(struct libresoc_queue *queue,
+//                               VkFence fence)
+// {
+//     return libresoc_queue_submit(queue, &(struct libresoc_queue_submission) {
+//                     .fence = fence
+//             });
+// }
+
+// static bool libresoc_submit_has_effects(const VkSubmitInfo *info)
+// {
+//     return info->commandBufferCount ||
+//            info->waitSemaphoreCount ||
+//            info->signalSemaphoreCount;
+// }
+
+static int double_triarea(const struct int4 *a, const struct int4 *b, const struct int4 *c)
+{
+  return (b->x - a->x) * (c->y - a->y) - (b->y - a->y) * (c->x - a->x);
+}
+
+static void MinMax(struct int4 *coords, struct int4 *minwin, struct int4 *maxwin)
 {
-       return libresoc_queue_submit(queue, &(struct libresoc_queue_submission) {
-                       .fence = fence
-               });
+  minwin->x = INT_MAX;
+  minwin->y = INT_MAX;
+  minwin->z = INT_MAX;
+  minwin->w = INT_MAX;
+  maxwin->x = INT_MIN;
+  maxwin->y = INT_MIN;
+  maxwin->z = INT_MIN;
+  maxwin->w = INT_MIN;
+
+  for(int i = 0; i < 3; i++)
+  {
+    for(int c = 0; c < 4; c++)
+    {
+      minwin->v[c] = (minwin->v[c] < coords[i].v[c]) ? minwin->v[c] : coords[i].v[c];
+      maxwin->v[c] = (maxwin->v[c] > coords[i].v[c]) ? maxwin->v[c] : coords[i].v[c];
+    }
+  }
 }
 
-static bool libresoc_submit_has_effects(const VkSubmitInfo *info)
+static float clamp01(float in)
 {
-       return info->commandBufferCount ||
-              info->waitSemaphoreCount ||
-              info->signalSemaphoreCount;
+  return in > 1.0f ? 1.0f : (in < 0.0f ? 0.0f : in);
+}
+
+static void ProcessTriangles(struct TriangleWork *work)
+{
+    struct GPUState *state = work->state;
+    LIBRESOC_FROM_HANDLE(libresoc_image, img, state->col[0]);
+    LIBRESOC_FROM_HANDLE(libresoc_pipeline, pipeline, state->pipeline);
+    const uint32_t w = img->width;
+    const uint32_t h = img->height;
+    byte *bytes = img->bytes;
+
+    for(int y = work->minwin.y; y < work->maxwin.y; y++)
+    {
+        for(int x = work->minwin.x; x < work->maxwin.x; x++)
+        {
+            const int PAx = work->tri[0].x - x;
+            const int PAy = work->tri[0].y - y;
+
+            const int ux = (work->ACx * PAy) - (work->ACy * PAx);
+            const int uy = (PAx * work->ABy) - (PAy * work->ABx);
+
+            struct int4 b;
+            b.x = (work->area2 - (ux + uy));
+            b.y =  ux;
+            b.z =  uy;
+            b.w = 0;
+
+            if (b.x >= 0 && b.y >= 0 && b.z >= 0)
+            {
+                struct float4 n;
+                n.x = (float)b.x;
+                n.y = (float)b.y;
+                n.z = (float)b.z;
+                n.w = 0.0f;
+                float pixdepth = n.x * work->depth.x + n.y * work->depth.y + n.z * work->depth.z;
+
+                n.x *= work->invw.x;
+                n.y *= work->invw.y;
+                n.z *= work->invw.z;
+
+                float invlen = 1.0f / (n.x + n.y + n.z);
+                n.x *= invlen;
+                n.y *= invlen;
+                n.z *= invlen;
+
+                struct float4 pix;
+                const int bpp = 4;
+                pipeline->fs(state, pixdepth, &n, work->vsout, &pix);
+                bytes[(y * w + x) * bpp + 2] = (byte)(clamp01(pix.x) * 255.0f);
+                bytes[(y * w + x) * bpp + 1] = (byte)(clamp01(pix.y) * 255.0f);
+                bytes[(y * w + x) * bpp + 0] = (byte)(clamp01(pix.z) * 255.0f);
+            }
+
+        }
+    }
+}
+
+static uint32_t GetIndex(struct GPUState *state, uint32_t vertexIndex, bool indexed)
+{
+    LIBRESOC_FROM_HANDLE(libresoc_buffer, buf, state->ib.buffer);
+  if(!indexed)
+    return vertexIndex;
+
+  const byte *ib = buf->bytes + state->ib.offset;
+
+  if(state->ib.indexType == VK_INDEX_TYPE_UINT16)
+  {
+    uint16_t *i16 = (uint16_t *)ib;
+    i16 += vertexIndex;
+    return *i16;
+  }
+  else
+  {
+    uint32_t *i32 = (uint32_t *)ib;
+    i32 += vertexIndex;
+    return *i32;
+  }
+}
+
+static void ClearTarget(VkImage target, const VkClearColorValue *col)
+{
+
+  LIBRESOC_FROM_HANDLE(libresoc_image, img, target);
+  byte *bits = img->bytes;
+  const uint32_t w = img->width;
+  const uint32_t h = img->height;
+  const uint32_t bpp = 4;
+
+  byte eval[4];
+  eval[2] = (byte)(col->float32[0] * 255.0f);
+  eval[1] = (byte)(col->float32[1] * 255.0f);
+  eval[0] = (byte)(col->float32[2] * 255.0f);
+  eval[3] = (byte)(col->float32[3] * 255.0f);
+
+  if(bpp == 1)
+  {
+    memset(bits, eval[2], w * h);
+  }
+  else if(bpp == 4)
+  {
+    for(uint32_t y = 0; y < h; y++)
+    {
+      for(uint32_t x = 0; x < w; x++)
+      {
+        memcpy(&bits[(y * w + x) * bpp], eval, 4);
+      }
+    }
+  }
+}
+
+static void DrawTriangles(struct GPUState *state, int numVerts, uint32_t first, bool indexed)
+{
+    LIBRESOC_FROM_HANDLE(libresoc_image, img, state->col[0]);
+    LIBRESOC_FROM_HANDLE(libresoc_pipeline, pipeline, state->pipeline);
+    const uint32_t w = img->width;
+    const uint32_t h = img->height;
+    struct VertexCacheEntry *vertices;
+       vertices = (struct VertexCacheEntry *)malloc(sizeof(struct VertexCacheEntry) * numVerts);
+    
+    int lastVert = numVerts - 3;
+    uint32_t vertexIndex = first;
+    // Shed Triangles
+    struct VertexCacheEntry tri[4];
+    // For now we support only VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST
+    for(int v = 0; v <= lastVert; v += 3)
+    {
+      pipeline->vs(state, GetIndex(state, vertexIndex, indexed), &tri[0]);
+      vertexIndex++;
+      pipeline->vs(state, GetIndex(state, vertexIndex, indexed), &tri[1]);
+      vertexIndex++;
+      pipeline->vs(state, GetIndex(state, vertexIndex, indexed), &tri[2]);
+      vertexIndex++;
+      vertices[v]     = tri[0];
+      vertices[v + 1] = tri[1];
+      vertices[v + 2] = tri[2];
+    }
+
+    struct int4 *winCords;
+       winCords = (struct int4 *)malloc(sizeof(struct int4) * numVerts);
+    // ToWindow
+    for(int v =0; v <= lastVert; ++v)
+    {
+        struct int4 win;
+
+        win.x = (int)((vertices[v].position.x / vertices[v].position.w + 1.0f) * 0.5f * w);
+        win.y = (int)((vertices[v].position.y * -1.0f / vertices[v].position.w + 1.0f) * 0.5f * h);
+        winCords[v] = win;
+    }
+
+
+    for(int i = 0; i <= lastVert; i += 3)
+    {
+
+        // culling can be done here
+        int area2 = double_triarea(&winCords[i + 0], &winCords[i + 1], &winCords[i + 2]);
+
+
+        int area2_flipped = area2;
+        struct  TriangleWork work;
+
+        struct int4 minwin, maxwin;
+        MinMax(&winCords[i], &minwin, &maxwin);
+
+        minwin.x = (0 > minwin.x) ? 0 : minwin.x;
+        minwin.y = (0 > minwin.y) ? 0 : minwin.y;
+        maxwin.x = ((int)(w - 1) < maxwin.x) ? (int)(w - 1) : maxwin.x;
+        maxwin.y = ((int)(h - 1) < maxwin.y) ? (int)(h - 1) : maxwin.y;
+
+        work.state = state;
+        work.ABx = winCords[i + 1].x - winCords[i + 0].x;
+        work.ABy = winCords[i + 1].y - winCords[i + 0].y;
+        work.ACx = winCords[i + 2].x - winCords[i + 0].x;
+        work.ACy = winCords[i + 2].y - winCords[i + 0].y;
+        work.vsout = &vertices[i];
+        work.tri = &winCords[i];
+        // work.barymul = barymul;
+        work.area2 = area2;
+        work.invarea = 1.0f / (float)(area2_flipped);
+        work.invw.x = 1.0f / vertices[i + 0].position.w;
+        work.invw.y = 1.0f / vertices[i + 1].position.w;
+        work.invw.z = 1.0f / vertices[i + 2].position.w; 
+        work.invw.w = 0.0f;
+        work.depth.x = vertices[i + 0].position.z * work.invw.x;
+        work.depth.y = vertices[i + 1].position.z * work.invw.y;
+        work.depth.z = vertices[i + 2].position.z * work.invw.z;
+        work.depth.w = 0.0f;
+        
+        const int blockSize = 32;
+
+        int xblocks = 1 + (maxwin.x - minwin.x) / blockSize;
+        int yblocks = 1 + (maxwin.y - minwin.y) / blockSize;
+
+        {
+
+            for(int x = 0; x < xblocks; x++)
+            {
+                for(int y = 0; y < yblocks; y++)
+                {
+                    work.minwin = minwin;
+                    work.minwin.x += blockSize * x;
+                    work.minwin.y += blockSize * y;
+
+                    work.maxwin.x = (maxwin.x <  work.minwin.x + blockSize) ? maxwin.x : work.minwin.x + blockSize;
+                    work.maxwin.y = (maxwin.y <  work.minwin.y + blockSize) ? maxwin.y : work.minwin.y + blockSize;
+                }
+            }
+        }
+        // rasterize
+        ProcessTriangles(&work);
+
+    }
+}
+
+static void execute(VkCommandBuffer commandBuffer)
+{
+    LIBRESOC_FROM_HANDLE(libresoc_cmd_buffer, cmd_buffer, commandBuffer);
+    struct GPUState state = {0};
+    list_for_each_entry(struct libresoc_cmd, lcmd,
+            &cmd_buffer->cmd.link, link) {
+        int *commandId = (int *)&lcmd->command; 
+        switch (*commandId) {
+            case BeginRenderPassID:
+            {
+                LIBRESOC_FROM_HANDLE(libresoc_render_pass, render_pass, lcmd->command.beginRenderPass.renderPass);
+                LIBRESOC_FROM_HANDLE(libresoc_framebuffer, frame_buffer, lcmd->command.beginRenderPass.framebuffer);
+                LIBRESOC_FROM_HANDLE(libresoc_image, img, state.col[0]);
+                struct libresoc_subpass subpass = render_pass->subpasses[0];
+                struct libresoc_subpass_attachment attachment = subpass.attachments[0];
+
+                state.col[0] = (struct VkImage_T *)frame_buffer->attachments[attachment.attachment]->image;
+
+                int clearIdx = 0;
+
+                if(attachment.clear)
+                {
+                    ClearTarget(state.col[0], &lcmd->command.beginRenderPass.clearval[clearIdx++].color);
+                }
+
+                // VkRenderPass_T::Attachment &depth = subpass.depthAttachment;
+
+                // if(depth.idx >= 0)
+                // {
+                //     state.depth = data.framebuffer->attachments[depth.idx]->image;
+
+                //     if(depth.clear)
+                //     {
+                //         ClearTarget(state.depth, data.clearval[clearIdx++].depthStencil);
+                //     }
+                // }
+                break;
+             }
+            case EndRenderPassID:
+            {
+                state.col[0] = VK_NULL_HANDLE;
+                break;
+            }
+            case BindPipelineID:
+            {
+                state.pipeline = lcmd->command.bindPipeline.pipeline; 
+                break;
+            }
+            case BindDescriptorSetsID:
+            {
+                state.sets[lcmd->command.bindDescriptorSets.idx] = lcmd->command.bindDescriptorSets.set;
+                break;
+            }
+            case BindVBID:
+            {
+                state.vbs[lcmd->command.bindVB.slot].buffer = lcmd->command.bindVB.buffer;
+                state.vbs[lcmd->command.bindVB.slot].offset = lcmd->command.bindVB.offset;
+                break;
+            }
+            case BindIBID:
+            {
+                state.ib.buffer = lcmd->command.bindIB.buffer;
+                state.ib.offset = lcmd->command.bindIB.offset;
+                state.ib.indexType = lcmd->command.bindIB.indexType;
+                break;
+            }
+            case SetViewportID:
+            {
+                state.view = lcmd->command.setViewport.view;
+                break;
+            }
+            case SetScissorsID:
+                break;
+            case PushConstantsID:
+            {
+                memcpy(state.pushconsts + lcmd->command.pushConstants.offset,
+                        lcmd->command.pushConstants.values, 
+                        lcmd->command.pushConstants.size);
+                break;
+            }
+            case DrawID:
+            {
+                DrawTriangles(&state, lcmd->command.draw.vertexCount, lcmd->command.draw.firstVertex, false);                
+                break;
+            }
+            case DrawIndexedID:
+                break;
+            case CopyBuf2ImgID:
+                break;
+            case CopyBufID:
+                break;
+            case PipelineBarrierID:
+                break;
+        }
+    }
 }
 
 VkResult libresoc_QueueSubmit(
@@ -1566,58 +1910,15 @@ VkResult libresoc_QueueSubmit(
        const VkSubmitInfo*                         pSubmits,
        VkFence                                     fence)
 {
-       LIBRESOC_FROM_HANDLE(libresoc_queue, queue, _queue);
-       VkResult result;
-       uint32_t fence_idx = 0;
-       bool flushed_caches = false;
-
-       if (fence != VK_NULL_HANDLE) {
-               for (uint32_t i = 0; i < submitCount; ++i)
-                       if (libresoc_submit_has_effects(pSubmits + i))
-                               fence_idx = i;
-       } else
-               fence_idx = UINT32_MAX;
-
-       for (uint32_t i = 0; i < submitCount; i++) {
-               if (!libresoc_submit_has_effects(pSubmits + i) && fence_idx != i)
-                       continue;
-
-               VkPipelineStageFlags wait_dst_stage_mask = 0;
-               for (unsigned j = 0; j < pSubmits[i].waitSemaphoreCount; ++j) {
-                       wait_dst_stage_mask |= pSubmits[i].pWaitDstStageMask[j];
-               }
-
-               const VkTimelineSemaphoreSubmitInfo *timeline_info =
-                       vk_find_struct_const(pSubmits[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
-
-               result = libresoc_queue_submit(queue, &(struct libresoc_queue_submission) {
-                               .cmd_buffers = pSubmits[i].pCommandBuffers,
-                               .cmd_buffer_count = pSubmits[i].commandBufferCount,
-                               .wait_dst_stage_mask = wait_dst_stage_mask,
-                               .flush_caches = !flushed_caches,
-                               .wait_semaphores = pSubmits[i].pWaitSemaphores,
-                               .wait_semaphore_count = pSubmits[i].waitSemaphoreCount,
-                               .signal_semaphores = pSubmits[i].pSignalSemaphores,
-                               .signal_semaphore_count = pSubmits[i].signalSemaphoreCount,
-                               .fence = i == fence_idx ? fence : VK_NULL_HANDLE,
-                               .wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
-                               .wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues ? timeline_info->waitSemaphoreValueCount : 0,
-                               .signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
-                               .signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues ? timeline_info->signalSemaphoreValueCount : 0,
-                       });
-               if (result != VK_SUCCESS)
-                       return result;
-
-               flushed_caches  = true;
-       }
-
-       if (fence != VK_NULL_HANDLE && !submitCount) {
-               result = libresoc_signal_fence(queue, fence);
-               if (result != VK_SUCCESS)
-                       return result;
-       }
-
-       return VK_SUCCESS;
+    // LIBRESOC_FROM_HANDLE(libresoc_queue, queue, _queue);
+    for(uint32_t i = 0; i < submitCount; i++)
+    {
+        for(uint32_t c = 0; c < pSubmits[i].commandBufferCount; c++)
+        {
+           execute(pSubmits[i].pCommandBuffers[c]);
+        }
+    }
+    return VK_SUCCESS;
 }
 
 VkResult libresoc_CreateFence(
@@ -1627,10 +1928,10 @@ VkResult libresoc_CreateFence(
        VkFence*                                    pFence)
 {
        LIBRESOC_FROM_HANDLE(libresoc_device, device, _device);
-       const VkExportFenceCreateInfo *export =
-               vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO);
-       VkExternalFenceHandleTypeFlags handleTypes =
-               export ? export->handleTypes : 0;
+       // const VkExportFenceCreateInfo *export =
+       //      vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO);
+       // VkExternalFenceHandleTypeFlags handleTypes =
+       //      export ? export->handleTypes : 0;
        struct libresoc_fence *fence;
 
        fence = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*fence), 8,
@@ -1705,7 +2006,7 @@ void libresoc_UnmapMemory(
        VkDevice                                    _device,
        VkDeviceMemory                              _memory)
 {
-       LIBRESOC_FROM_HANDLE(libresoc_device, device, _device);
+       // LIBRESOC_FROM_HANDLE(libresoc_device, device, _device);
        LIBRESOC_FROM_HANDLE(libresoc_device_memory, mem, _memory);
 
        if (mem == NULL)
@@ -1741,15 +2042,108 @@ VkResult libresoc_CreateFramebuffer(
        const VkAllocationCallbacks*                pAllocator,
        VkFramebuffer*                              pFramebuffer)
 {
-       //TODO: stub
+       LIBRESOC_FROM_HANDLE(libresoc_device, device, _device);
+       struct libresoc_framebuffer *framebuffer;
+       const VkFramebufferAttachmentsCreateInfo *imageless_create_info =
+               vk_find_struct_const(pCreateInfo->pNext,
+                       FRAMEBUFFER_ATTACHMENTS_CREATE_INFO);
+
+       assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
+
+       size_t size = sizeof(*framebuffer);
+       if (!imageless_create_info)
+               size += sizeof(struct libresoc_image_view*) * pCreateInfo->attachmentCount;
+       framebuffer = vk_alloc2(&device->vk.alloc, pAllocator, size, 8,
+                                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+       if (framebuffer == NULL)
+               return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+       vk_object_base_init(&device->vk, &framebuffer->base,
+                           VK_OBJECT_TYPE_FRAMEBUFFER);
+
+       framebuffer->attachment_count = pCreateInfo->attachmentCount;
+       framebuffer->width = pCreateInfo->width;
+       framebuffer->height = pCreateInfo->height;
+       framebuffer->layers = pCreateInfo->layers;
+       if (imageless_create_info) {
+               for (unsigned i = 0; i < imageless_create_info->attachmentImageInfoCount; ++i) {
+                       const VkFramebufferAttachmentImageInfo *attachment =
+                               imageless_create_info->pAttachmentImageInfos + i;
+                       framebuffer->width = MIN2(framebuffer->width, attachment->width);
+                       framebuffer->height = MIN2(framebuffer->height, attachment->height);
+                       framebuffer->layers = MIN2(framebuffer->layers, attachment->layerCount);
+               }
+       } else {
+               for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
+                       VkImageView _iview = pCreateInfo->pAttachments[i];
+                       struct libresoc_image_view *iview = libresoc_image_view_from_handle(_iview);
+                       framebuffer->attachments[i] = iview;
+                       framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
+                       framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
+                       framebuffer->layers = MIN2(framebuffer->layers, libresoc_surface_max_layer_count(iview));
+               }
+       }
+
+       *pFramebuffer = libresoc_framebuffer_to_handle(framebuffer);
+       return VK_SUCCESS;
+}
+
+VkResult libresoc_CreateBuffer(
+       VkDevice                                    _device,
+       const VkBufferCreateInfo*                   pCreateInfo,
+       const VkAllocationCallbacks*                pAllocator,
+       VkBuffer*                                   pBuffer)
+{
+       LIBRESOC_FROM_HANDLE(libresoc_device, device, _device);
+       struct libresoc_buffer *buffer;
+
+       buffer = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*buffer), 8,
+                            VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+       if (buffer == NULL)
+               return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+       vk_object_base_init(&device->vk, &buffer->base, VK_OBJECT_TYPE_BUFFER);
+
+       buffer->size = pCreateInfo->size;
+    buffer->bytes = NULL;
+       *pBuffer = libresoc_buffer_to_handle(buffer);
+
        return VK_SUCCESS;
 }
+
 void libresoc_DestroyBuffer(
        VkDevice                                    _device,
        VkBuffer                                    _buffer,
        const VkAllocationCallbacks*                pAllocator)
 {}
 
+void libresoc_GetBufferMemoryRequirements(
+       VkDevice                                    _device,
+       VkBuffer                                    _buffer,
+       VkMemoryRequirements*                       pMemoryRequirements)
+{
+    LIBRESOC_FROM_HANDLE(libresoc_device, device, _device);
+    LIBRESOC_FROM_HANDLE(libresoc_buffer, buffer, _buffer);
+
+    pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
+
+    pMemoryRequirements->alignment = 16;
+
+    pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
+}
+
+VkResult libresoc_BindBufferMemory(
+       VkDevice                                    device,
+       VkBuffer                                    _buffer,
+       VkDeviceMemory                              _memory,
+       VkDeviceSize                                memoryOffset)
+{
+    LIBRESOC_FROM_HANDLE(libresoc_buffer, buffer, _buffer);
+       LIBRESOC_FROM_HANDLE(libresoc_device_memory, memory, _memory);
+    buffer->bytes = memory->bytes + memoryOffset;
+    return VK_SUCCESS;
+}
+
 void libresoc_DestroyFence(
        VkDevice                                    _device,
        VkFence                                     _fence,
index cd8fe7cd02337534d263778d20592cfd4c4b1178..9e6b508ab3ffa26a7d3cb64eb5f80a2d175e10f9 100644 (file)
@@ -150,6 +150,7 @@ libresoc_CreateImage(VkDevice device,
                                 pAllocator,
                                 pImage);
 }
+
 void libresoc_GetImageSubresourceLayout(
        VkDevice                                    _device,
        VkImage                                     _image,
@@ -159,7 +160,7 @@ void libresoc_GetImageSubresourceLayout(
        LIBRESOC_FROM_HANDLE(libresoc_image, image, _image);
 
     pLayout->size = image->size;
-    pLayout->rowPitch = image->width;
+    pLayout->rowPitch = image->width * 4;
 }
 
 VkResult
@@ -168,24 +169,28 @@ libresoc_CreateImageView(VkDevice _device,
                     const VkAllocationCallbacks *pAllocator,
                     VkImageView *pView)
 {
-       //TODO: stub
-       return VK_SUCCESS;
-       // LIBRESOC_FROM_HANDLE(libresoc_device, device, _device);
-       // struct libresoc_image_view *view;
+       LIBRESOC_FROM_HANDLE(libresoc_device, device, _device);
+       LIBRESOC_FROM_HANDLE(libresoc_image, image, pCreateInfo->image);
+       struct libresoc_image_view *view;
 
-       // view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8,
-       //                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-       // if (view == NULL)
-       //      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+       view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8,
+                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+       if (view == NULL)
+               return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+       vk_object_base_init(&device->vk, &view->base,
+                           VK_OBJECT_TYPE_IMAGE_VIEW);
 
-       // vk_object_base_init(&device->vk, &view->base,
-       //                  VK_OBJECT_TYPE_IMAGE_VIEW);
 
-       // libresoc_image_view_init(view, device, pCreateInfo, NULL);
+       view->image = image;
+       view->type = pCreateInfo->viewType;
+       view->vk_format = pCreateInfo->format;
+    view->extent.width  = image->width;
+    view->extent.height = image->height;
 
-       // *pView = libresoc_image_view_to_handle(view);
+       *pView = libresoc_image_view_to_handle(view);
 
-       // return VK_SUCCESS;
+       return VK_SUCCESS;
 }
 void
 libresoc_DestroyImage(VkDevice _device, VkImage _image,
index 06a62424300eef00c4c9c69a06151e5be7e1daba..054a85c7dbac71a4b3faf7025402bd78c6b94da1 100644 (file)
@@ -2689,7 +2689,7 @@ LLVMModuleRef libresoc_nir_translate(struct libresoc_llvm *llvm_ref, struct nir_
    //TODO: this is zero argument function and returns void
    LLVMTypeRef main_function_type = LLVMFunctionType(ret_type, arg_types, ctx.args.arg_count, 0);
 
-   LLVMValueRef main_function = LLVMAddFunction(mod, "main_function", main_function_type);
+   LLVMValueRef main_function = LLVMAddFunction(mod, gl_shader_stage_name(nir->info.stage), main_function_type);
    LLVMBasicBlockRef main_function_body =
       LLVMAppendBasicBlockInContext(ctx.lc.context, main_function, "main_body");
    LLVMPositionBuilderAtEnd(ctx.lc.builder, main_function_body);
@@ -2732,52 +2732,60 @@ LLVMModuleRef libresoc_nir_translate(struct libresoc_llvm *llvm_ref, struct nir_
                                       0, NULL,
                                      NULL);
     if (disasm) {
-LLVMOrcTargetAddress MainAddr;
-LLVMOrcGetSymbolAddress(llvm_ref->orc_ref, &MainAddr ,"main_function");
-   const uint8_t *bytes = (const uint8_t *)MainAddr; 
-   char outline[1024];
-uint64_t pc;
-   pc = 0;
-uint64_t extent = 200;
-   while (pc < extent) {
-      size_t Size;
-
-      /*
-       * Print address.  We use addresses relative to the start of the function,
-       * so that between runs.
-       */
-
-
-      Size = LLVMDisasmInstruction(disasm, (uint8_t *)bytes + pc, extent - pc, 0, outline,
-                                   sizeof outline);
-
-      /*
-       * Print the instruction.
-       */
-      printf("\t%s \n", outline);
-
-
-      /*
-       * Stop disassembling on return statements, if there is no record of a
-       * jump to a successive address.
-       *
-       * XXX: This currently assumes x86
-       */
-
-      if (Size == 1 && bytes[pc] == 0xc3) {
-         break;
-      }
+        LLVMOrcTargetAddress MainAddr;
+        LLVMOrcGetSymbolAddress(llvm_ref->orc_ref, &MainAddr ,gl_shader_stage_name(nir->info.stage));
+        // if(nir->info.stage == MESA_SHADER_VERTEX)
+        // {
+        //     pipeline->vs = (VertexShader)MainAddr;
+        // }
+        // else if(nir->info.stage == MESA_SHADER_FRAGMENT)
+        // {
+        //     pipeline->fs = (FragmentShader)MainAddr;
+        // }
+        const uint8_t *bytes = (const uint8_t *)MainAddr; 
+        char outline[1024];
+        uint64_t pc;
+        pc = 0;
+        uint64_t extent = 200;
+        while (pc < extent) {
+            size_t Size;
+
+            /*
+             * Print address.  We use addresses relative to the start of the function,
+             * so that between runs.
+             */
+
+
+            Size = LLVMDisasmInstruction(disasm, (uint8_t *)bytes + pc, extent - pc, 0, outline,
+                    sizeof outline);
+
+            /*
+             * Print the instruction.
+             */
+            printf("\t%s \n", outline);
+
+
+            /*
+             * Stop disassembling on return statements, if there is no record of a
+             * jump to a successive address.
+             *
+             * XXX: This currently assumes x86
+             */
+
+            if (Size == 1 && bytes[pc] == 0xc3) {
+                break;
+            }
 
-      /*
-       * Advance.
-       */
+            /*
+             * Advance.
+             */
 
-      pc += Size;
+            pc += Size;
 
-      if (pc >= extent) {
-         break;
-      }
-   }    
+            if (pc >= extent) {
+                break;
+            }
+        }    
     }
     return mod;
     // LLVMModuleRef mod = LLVMModuleCreateWithName("libresoc_mod");
@@ -2800,3 +2808,9 @@ uint64_t extent = 200;
     //         orc_sym_resolver,
     //         (void *)(llvm_ref->orc_ref));
 }
+
+Shader GetFuncPointer(struct libresoc_llvm *llvm_ref, const char *name) {
+    LLVMOrcTargetAddress MainAddr;
+    LLVMOrcGetSymbolAddress(llvm_ref->orc_ref, &MainAddr , name);
+    return (Shader)MainAddr;
+}
index 6aeae357aee545d6373c91f99d04ae861b704af0..160d207b0f017cbd4b56baa25a722069b22f9b2e 100644 (file)
@@ -12,6 +12,7 @@
 #include "nir/nir_deref.h"
 #include <float.h>
 
+typedef void (*Shader)();
 enum
 {
    ADDR_SPACE_FLAT = 0, 
@@ -173,5 +174,6 @@ void handle_shader_output_decl(struct libresoc_nir_tran_ctx *ctx,
                                   struct nir_shader *nir, struct nir_variable *variable,
                                   gl_shader_stage stage);
 LLVMModuleRef libresoc_nir_translate(struct libresoc_llvm *llvm_ref, struct nir_shader *nir);
+Shader GetFuncPointer(struct libresoc_llvm *llvm_ref, const char *name);
 #endif
 
index 80ed9904080fb948bce271d77fa53d58ea7ddb3c..4868269d18decbf6c8a1c29a332f25649b11e625 100644 (file)
 #include "libresoc_private.h"
 
 void libresoc_CmdClearColorImage(
-       VkCommandBuffer                             commandBuffer,
-       VkImage                                     image_h,
-       VkImageLayout                               imageLayout,
-       const VkClearColorValue*                    pColor,
-       uint32_t                                    rangeCount,
-       const VkImageSubresourceRange*              pRanges)
+        VkCommandBuffer                             commandBuffer,
+        VkImage                                     image_h,
+        VkImageLayout                               imageLayout,
+        const VkClearColorValue*                    pColor,
+        uint32_t                                    rangeCount,
+        const VkImageSubresourceRange*              pRanges)
 {
 
-       LIBRESOC_FROM_HANDLE(libresoc_image, image, image_h);
-    uint32_t r = pColor->uint32[0];
-    uint32_t g = pColor->uint32[1];
-    uint32_t b = pColor->uint32[2];
-    uint32_t a = pColor->uint32[3];
-    for (int i=0; i < image->size / 4; ) {
-        image->bytes[i] = r;
-        i += 1;
-        image->bytes[i] = g;
-        i += 1;
-        image->bytes[i] = b;
-        i += 1;
-        image->bytes[i] = a;
-        i += 1;
+    LIBRESOC_FROM_HANDLE(libresoc_image, image, image_h);
+    byte *bits = image->bytes;
+    const uint32_t w = image->width;
+    const uint32_t h = image->height;
+    const uint32_t bpp = 4;
+
+    byte eval[4];
+    eval[2] = (byte)(pColor->float32[0] * 255.0f);
+    eval[1] = (byte)(pColor->float32[1] * 255.0f);
+    eval[0] = (byte)(pColor->float32[2] * 255.0f);
+    eval[3] = (byte)(pColor->float32[3] * 255.0f);
+
+    if(bpp == 1)
+    {
+        memset(bits, eval[2], w * h);
+    }
+    else if(bpp == 4)
+    {
+        for(uint32_t y = 0; y < h; y++)
+        {
+            for(uint32_t x = 0; x < w; x++)
+            {
+                memcpy(&bits[(y * w + x) * bpp], eval, 4);
+            }
+        }
     }
 }
index c05f60f6634d52f3eebcaf8b6eefc4e3b375f021..330d11df7b7999cb41e88895de69e3b947fa511c 100644 (file)
 
 #include "vk_util.h"
 
+static void
+libresoc_render_pass_add_subpass_dep(struct libresoc_render_pass *pass,
+                                const VkSubpassDependency2 *dep)
+{
+       uint32_t src = dep->srcSubpass;
+       uint32_t dst = dep->dstSubpass;
+
+       /* Ignore subpass self-dependencies as they allow the app to call
+        * vkCmdPipelineBarrier() inside the render pass and the driver should
+        * only do the barrier when called, not when starting the render pass.
+        */
+       if (src == dst)
+               return;
+
+       /* Accumulate all ingoing external dependencies to the first subpass. */
+       if (src == VK_SUBPASS_EXTERNAL)
+               dst = 0;
+
+       if (dst == VK_SUBPASS_EXTERNAL) {
+               if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
+                       pass->end_barrier.src_stage_mask |= dep->srcStageMask;
+               pass->end_barrier.src_access_mask |= dep->srcAccessMask;
+               pass->end_barrier.dst_access_mask |= dep->dstAccessMask;
+       } else {
+               if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
+                       pass->subpasses[dst].start_barrier.src_stage_mask |= dep->srcStageMask;
+               pass->subpasses[dst].start_barrier.src_access_mask |= dep->srcAccessMask;
+               pass->subpasses[dst].start_barrier.dst_access_mask |= dep->dstAccessMask;
+       }
+}
+
+static bool
+libresoc_pass_has_layout_transitions(const struct libresoc_render_pass *pass)
+{
+       for (unsigned i = 0; i < pass->subpass_count; i++) {
+               const struct libresoc_subpass *subpass = &pass->subpasses[i];
+               for (unsigned j = 0; j < subpass->attachment_count; j++) {
+                       const uint32_t a = subpass->attachments[j].attachment;
+                       if (a == VK_ATTACHMENT_UNUSED)
+                               continue;
+
+                       uint32_t initial_layout = pass->attachments[a].initial_layout;
+                       uint32_t stencil_initial_layout = pass->attachments[a].stencil_initial_layout;
+                       uint32_t final_layout = pass->attachments[a].final_layout;
+                       uint32_t stencil_final_layout = pass->attachments[a].stencil_final_layout;
+
+                       if (subpass->attachments[j].layout != initial_layout ||
+                           subpass->attachments[j].layout != stencil_initial_layout ||
+                           subpass->attachments[j].layout != final_layout ||
+                           subpass->attachments[j].layout != stencil_final_layout)
+                               return true;
+               }
+       }
+
+       return false;
+}
+static void
+libresoc_render_pass_add_implicit_deps(struct libresoc_render_pass *pass,
+                                  bool has_ingoing_dep, bool has_outgoing_dep)
+{
+       /* From the Vulkan 1.0.39 spec:
+       *
+       *    If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the
+       *    first subpass that uses an attachment, then an implicit subpass
+       *    dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is
+       *    used in. The implicit subpass dependency only exists if there
+       *    exists an automatic layout transition away from initialLayout.
+       *    The subpass dependency operates as if defined with the
+       *    following parameters:
+       *
+       *    VkSubpassDependency implicitDependency = {
+       *        .srcSubpass = VK_SUBPASS_EXTERNAL;
+       *        .dstSubpass = firstSubpass; // First subpass attachment is used in
+       *        .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+       *        .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
+       *        .srcAccessMask = 0;
+       *        .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
+       *                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+       *                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+       *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+       *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+       *        .dependencyFlags = 0;
+       *    };
+       *
+       *    Similarly, if there is no subpass dependency from the last subpass
+       *    that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit
+       *    subpass dependency exists from the last subpass it is used in to
+       *    VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists
+       *    if there exists an automatic layout transition into finalLayout.
+       *    The subpass dependency operates as if defined with the following
+       *    parameters:
+       *
+       *    VkSubpassDependency implicitDependency = {
+       *        .srcSubpass = lastSubpass; // Last subpass attachment is used in
+       *        .dstSubpass = VK_SUBPASS_EXTERNAL;
+       *        .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
+       *        .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
+       *        .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
+       *                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+       *                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+       *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+       *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+       *        .dstAccessMask = 0;
+       *        .dependencyFlags = 0;
+       *    };
+       */
+
+       /* Implicit subpass dependencies only make sense if automatic layout
+        * transitions are performed.
+        */
+       if (!libresoc_pass_has_layout_transitions(pass))
+               return;
+
+       if (!has_ingoing_dep) {
+               const VkSubpassDependency2KHR implicit_ingoing_dep = {
+                       .srcSubpass = VK_SUBPASS_EXTERNAL,
+                       .dstSubpass = 0,
+                       .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                       .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+                       .srcAccessMask = 0,
+                       .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
+                                        VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+                                        VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+                                        VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+                                        VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
+                       .dependencyFlags = 0,
+               };
+
+               libresoc_render_pass_add_subpass_dep(pass, &implicit_ingoing_dep);
+       }
+
+       if (!has_outgoing_dep) {
+               const VkSubpassDependency2KHR implicit_outgoing_dep = {
+                       .srcSubpass = 0,
+                       .dstSubpass = VK_SUBPASS_EXTERNAL,
+                       .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+                       .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                       .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
+                                        VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+                                        VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+                                        VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+                                        VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
+                       .dstAccessMask = 0,
+                       .dependencyFlags = 0,
+               };
+
+               libresoc_render_pass_add_subpass_dep(pass, &implicit_outgoing_dep);
+       }
+}
+
+static void
+libresoc_render_pass_compile(struct libresoc_render_pass *pass)
+{
+       for (uint32_t i = 0; i < pass->subpass_count; i++) {
+               struct libresoc_subpass *subpass = &pass->subpasses[i];
+
+               for (uint32_t j = 0; j < subpass->attachment_count; j++) {
+                       struct libresoc_subpass_attachment *subpass_att =
+                               &subpass->attachments[j];
+                       if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
+                               continue;
+
+                       struct libresoc_render_pass_attachment *pass_att =
+                               &pass->attachments[subpass_att->attachment];
+
+                       pass_att->first_subpass_idx = UINT32_MAX;
+               }
+       }
+
+       for (uint32_t i = 0; i < pass->subpass_count; i++) {
+               struct libresoc_subpass *subpass = &pass->subpasses[i];
+               uint32_t color_sample_count = 1, depth_sample_count = 1;
+
+               /* We don't allow depth_stencil_attachment to be non-NULL and
+                * be VK_ATTACHMENT_UNUSED.  This way something can just check
+                * for NULL and be guaranteed that they have a valid
+                * attachment.
+                */
+               if (subpass->depth_stencil_attachment &&
+                   subpass->depth_stencil_attachment->attachment == VK_ATTACHMENT_UNUSED)
+                       subpass->depth_stencil_attachment = NULL;
+
+               if (subpass->ds_resolve_attachment &&
+                   subpass->ds_resolve_attachment->attachment == VK_ATTACHMENT_UNUSED)
+                       subpass->ds_resolve_attachment = NULL;
+
+               for (uint32_t j = 0; j < subpass->attachment_count; j++) {
+                       struct libresoc_subpass_attachment *subpass_att =
+                               &subpass->attachments[j];
+                       if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
+                               continue;
+
+                       struct libresoc_render_pass_attachment *pass_att =
+                               &pass->attachments[subpass_att->attachment];
+
+                       if (i < pass_att->first_subpass_idx)
+                               pass_att->first_subpass_idx = i;
+                       pass_att->last_subpass_idx = i;
+               }
+
+               subpass->has_color_att = false;
+               for (uint32_t j = 0; j < subpass->color_count; j++) {
+                       struct libresoc_subpass_attachment *subpass_att =
+                               &subpass->color_attachments[j];
+                       if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
+                               continue;
+
+                       subpass->has_color_att = true;
+
+                       struct libresoc_render_pass_attachment *pass_att =
+                               &pass->attachments[subpass_att->attachment];
+
+                       color_sample_count = pass_att->samples;
+               }
+
+               if (subpass->depth_stencil_attachment) {
+                       const uint32_t a =
+                               subpass->depth_stencil_attachment->attachment;
+                       struct libresoc_render_pass_attachment *pass_att =
+                               &pass->attachments[a];
+                       depth_sample_count = pass_att->samples;
+               }
+
+               subpass->max_sample_count = MAX2(color_sample_count,
+                                                depth_sample_count);
+               subpass->color_sample_count = color_sample_count;
+               subpass->depth_sample_count = depth_sample_count;
+
+               /* We have to handle resolve attachments specially */
+               subpass->has_color_resolve = false;
+               if (subpass->resolve_attachments) {
+                       for (uint32_t j = 0; j < subpass->color_count; j++) {
+                               struct libresoc_subpass_attachment *resolve_att =
+                                       &subpass->resolve_attachments[j];
+
+                               if (resolve_att->attachment == VK_ATTACHMENT_UNUSED)
+                                       continue;
+
+                               subpass->has_color_resolve = true;
+                       }
+               }
+
+               for (uint32_t j = 0; j < subpass->input_count; ++j) {
+                       if (subpass->input_attachments[j].attachment == VK_ATTACHMENT_UNUSED)
+                               continue;
+
+                       for (uint32_t k = 0; k < subpass->color_count; ++k) {
+                               if (subpass->color_attachments[k].attachment == subpass->input_attachments[j].attachment) {
+                                       subpass->input_attachments[j].in_render_loop = true;
+                                       subpass->color_attachments[k].in_render_loop = true;
+                               }
+                       }
+
+                       if (subpass->depth_stencil_attachment &&
+                           subpass->depth_stencil_attachment->attachment == subpass->input_attachments[j].attachment) {
+                               subpass->input_attachments[j].in_render_loop = true;
+                               subpass->depth_stencil_attachment->in_render_loop = true;
+                       }
+               }
+       }
+}
+
+static unsigned
+libresoc_num_subpass_attachments(const VkSubpassDescription *desc)
+{
+       return desc->inputAttachmentCount +
+              desc->colorAttachmentCount +
+              (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
+              (desc->pDepthStencilAttachment != NULL);
+}
+
+static void
+libresoc_destroy_render_pass(struct libresoc_device *device,
+                        const VkAllocationCallbacks *pAllocator,
+                        struct libresoc_render_pass *pass)
+{
+       vk_object_base_finish(&pass->base);
+       vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);
+       vk_free2(&device->vk.alloc, pAllocator, pass);
+}
 VkResult libresoc_CreateRenderPass(
        VkDevice                                    _device,
        const VkRenderPassCreateInfo*               pCreateInfo,
        const VkAllocationCallbacks*                pAllocator,
        VkRenderPass*                               pRenderPass)
 {
+       LIBRESOC_FROM_HANDLE(libresoc_device, device, _device);
+       struct libresoc_render_pass *pass;
+       size_t size;
+       size_t attachments_offset;
+
+       assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
+
+       size = sizeof(*pass);
+       size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
+       attachments_offset = size;
+       size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
+
+       pass = vk_alloc2(&device->vk.alloc, pAllocator, size, 8,
+                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+       if (pass == NULL)
+               return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+       memset(pass, 0, size);
+
+       vk_object_base_init(&device->vk, &pass->base,
+                           VK_OBJECT_TYPE_RENDER_PASS);
+
+       pass->attachment_count = pCreateInfo->attachmentCount;
+       pass->subpass_count = pCreateInfo->subpassCount;
+       pass->attachments = (void *) pass + attachments_offset;
+
+       for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
+               struct libresoc_render_pass_attachment *att = &pass->attachments[i];
+
+               att->format = pCreateInfo->pAttachments[i].format;
+               att->samples = pCreateInfo->pAttachments[i].samples;
+               att->load_op = pCreateInfo->pAttachments[i].loadOp;
+               att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp;
+               att->initial_layout =  pCreateInfo->pAttachments[i].initialLayout;
+               att->final_layout =  pCreateInfo->pAttachments[i].finalLayout;
+               att->stencil_initial_layout = pCreateInfo->pAttachments[i].initialLayout;
+               att->stencil_final_layout = pCreateInfo->pAttachments[i].finalLayout;
+               // att->store_op = pCreateInfo->pAttachments[i].storeOp;
+               // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp;
+       }
+       uint32_t subpass_attachment_count = 0;
+       struct libresoc_subpass_attachment *p;
+       for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
+               subpass_attachment_count +=
+                       libresoc_num_subpass_attachments(&pCreateInfo->pSubpasses[i]);
+       }
+
+       if (subpass_attachment_count) {
+               pass->subpass_attachments =
+                       vk_alloc2(&device->vk.alloc, pAllocator,
+                                   subpass_attachment_count * sizeof(struct libresoc_subpass_attachment), 8,
+                                   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+               if (pass->subpass_attachments == NULL) {
+                       libresoc_destroy_render_pass(device, pAllocator, pass);
+                       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+               }
+       } else
+               pass->subpass_attachments = NULL;
+
+       p = pass->subpass_attachments;
+       for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
+               const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
+               struct libresoc_subpass *subpass = &pass->subpasses[i];
+
+               subpass->input_count = desc->inputAttachmentCount;
+               subpass->color_count = desc->colorAttachmentCount;
+               subpass->attachment_count = libresoc_num_subpass_attachments(desc);
+               subpass->attachments = p;
+
+               for (uint32_t j = 0; j < subpass->attachment_count; j++) {
+            subpass->attachments[j].clear = (pCreateInfo->pAttachments[i].loadOp ==
+                                     VK_ATTACHMENT_LOAD_OP_CLEAR); 
+        }
+
+               if (desc->inputAttachmentCount > 0) {
+                       subpass->input_attachments = p;
+                       p += desc->inputAttachmentCount;
+
+                       for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
+                               subpass->input_attachments[j] = (struct libresoc_subpass_attachment) {
+                                       .attachment = desc->pInputAttachments[j].attachment,
+                                       .layout = desc->pInputAttachments[j].layout,
+                                       .stencil_layout = desc->pInputAttachments[j].layout,
+                               };
+                       }
+               }
+
+               if (desc->colorAttachmentCount > 0) {
+                       subpass->color_attachments = p;
+                       p += desc->colorAttachmentCount;
+
+                       for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
+                               subpass->color_attachments[j] = (struct libresoc_subpass_attachment) {
+                                       .attachment = desc->pColorAttachments[j].attachment,
+                                       .layout = desc->pColorAttachments[j].layout,
+                               };
+                       }
+               }
+
+               if (desc->pResolveAttachments) {
+                       subpass->resolve_attachments = p;
+                       p += desc->colorAttachmentCount;
+
+                       for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
+                               subpass->resolve_attachments[j] = (struct libresoc_subpass_attachment) {
+                                       .attachment = desc->pResolveAttachments[j].attachment,
+                                       .layout = desc->pResolveAttachments[j].layout,
+                                       .stencil_layout = desc->pResolveAttachments[j].layout,
+                               };
+                       }
+               }
+
+               if (desc->pDepthStencilAttachment) {
+                       subpass->depth_stencil_attachment = p++;
+
+                       *subpass->depth_stencil_attachment = (struct libresoc_subpass_attachment) {
+                               .attachment = desc->pDepthStencilAttachment->attachment,
+                               .layout = desc->pDepthStencilAttachment->layout,
+                               .stencil_layout = desc->pDepthStencilAttachment->layout,
+                       };
+               }
+       }
+
+       bool has_ingoing_dep = false;
+       bool has_outgoing_dep = false;
+
+       for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
+               /* Convert to a Dependency2 */
+               struct VkSubpassDependency2 dep2 = {
+                       .srcSubpass       = pCreateInfo->pDependencies[i].srcSubpass,
+                       .dstSubpass       = pCreateInfo->pDependencies[i].dstSubpass,
+                       .srcStageMask     = pCreateInfo->pDependencies[i].srcStageMask,
+                       .dstStageMask     = pCreateInfo->pDependencies[i].dstStageMask,
+                       .srcAccessMask    = pCreateInfo->pDependencies[i].srcAccessMask,
+                       .dstAccessMask    = pCreateInfo->pDependencies[i].dstAccessMask,
+                       .dependencyFlags  = pCreateInfo->pDependencies[i].dependencyFlags,
+               };
+               libresoc_render_pass_add_subpass_dep(pass, &dep2);
+
+               /* Determine if the subpass has explicit dependencies from/to
+                * VK_SUBPASS_EXTERNAL.
+                */
+               if (pCreateInfo->pDependencies[i].srcSubpass == VK_SUBPASS_EXTERNAL)
+                       has_ingoing_dep = true;
+               if (pCreateInfo->pDependencies[i].dstSubpass == VK_SUBPASS_EXTERNAL)
+                       has_outgoing_dep = true;
+       }
+
+       libresoc_render_pass_add_implicit_deps(pass,
+                                          has_ingoing_dep, has_outgoing_dep);
+
+       libresoc_render_pass_compile(pass);
+
+       *pRenderPass = libresoc_render_pass_to_handle(pass);
        //TODO: stub
        return VK_SUCCESS;
 }
+
+void libresoc_DestroyRenderPass(
+       VkDevice                                    _device,
+       VkRenderPass                                _pass,
+       const VkAllocationCallbacks*                pAllocator)
+{
+       LIBRESOC_FROM_HANDLE(libresoc_device, device, _device);
+       LIBRESOC_FROM_HANDLE(libresoc_render_pass, pass, _pass);
+
+       if (!_pass)
+               return;
+
+       libresoc_destroy_render_pass(device, pAllocator, pass);
+}
+
+void libresoc_GetRenderAreaGranularity(
+    VkDevice                                    device,
+    VkRenderPass                                renderPass,
+    VkExtent2D*                                 pGranularity)
+{
+       pGranularity->width = 1;
+       pGranularity->height = 1;
+}
index 097f37b6721a078d495625418bf0a1bc9356e13b..e80d48e1f1c080b4ac25844c6c4b17b4e30c5919 100644 (file)
@@ -62,6 +62,14 @@ VkResult libresoc_create_shaders(struct libresoc_pipeline *pipeline,
                                                    subgroup_size, ballot_bit_size);
 
         modules[i]->llvm_module = libresoc_nir_translate(&device->instance->llvm_ref, nir[i]);
+        if(nir[i]->info.stage == MESA_SHADER_VERTEX)
+        {
+            pipeline->vs = (VertexShader)GetFuncPointer(&device->instance->llvm_ref, gl_shader_stage_name(nir[i]->info.stage));
+        }
+        else if(nir[i]->info.stage == MESA_SHADER_FRAGMENT)
+        {
+            pipeline->fs = (FragmentShader)GetFuncPointer(&device->instance->llvm_ref, gl_shader_stage_name(nir[i]->info.stage));
+        }
                /* We don't want to alter meta shaders IR directly so clone it
                 * first.
                 */
index 84eb55c70d4d44f10836f1ad977a09c36c3cebda..cbfff1661e284704c383fde78b879968fcb1b3ac 100644 (file)
@@ -52,6 +52,7 @@
 
 typedef unsigned char byte;
 
+
 static inline gl_shader_stage
 vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage)
 {
@@ -144,6 +145,216 @@ libresoc_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)
        }
 }
 
+struct GPUState
+{
+  struct
+  {
+    VkBuffer buffer;
+    VkDeviceSize offset;
+    VkIndexType indexType;
+  } ib;
+  struct
+  {
+    VkBuffer buffer;
+    VkDeviceSize offset;
+    VkDeviceSize stride;
+  } vbs[4];
+  VkViewport view;
+  VkImage col[8];
+  VkImage depth;
+  VkPipeline pipeline;
+  VkDescriptorSet sets[8];
+  byte pushconsts[128];
+};
+
+struct int4
+{
+  union
+  {
+    struct
+    {
+      int x, y, z, w;
+    };
+    int v[4];
+  };
+};
+
+struct float4
+{
+  union
+  {
+    struct
+    {
+      float x, y, z, w;
+    };
+    float v[4];
+  };
+};
+
+struct VertexCacheEntry
+{
+  struct float4 position;
+  struct float4 interps[10];
+};
+
+struct TriangleWork
+{
+  struct GPUState *state;
+
+  int ABx;
+  int ABy;
+  int ACx;
+  int ACy;
+
+  struct VertexCacheEntry *vsout;
+  struct int4 *tri;
+
+  int barymul;
+  int area2;
+
+  float invarea;
+
+  struct float4 invw;
+  struct float4 depth;
+
+  struct int4 minwin, maxwin;
+};
+
+enum Command {
+    BeginRenderPassID = 200,
+    EndRenderPassID = 201,
+    BindPipelineID = 202,
+    BindDescriptorSetsID = 203,
+    BindVBID = 204,
+    BindIBID = 205,
+    SetViewportID = 206,
+    SetScissorsID = 207,
+    PushConstantsID = 208,
+    DrawID = 209,
+    DrawIndexedID = 210,
+    CopyBuf2ImgID = 211,
+    CopyBufID = 212,
+    PipelineBarrierID = 213,
+};
+
+struct PipelineBarrier
+{
+  unsigned int commandId;
+};
+
+struct BeginRenderPass
+{
+  unsigned int commandId;
+  VkRenderPass renderPass;
+  VkFramebuffer framebuffer;
+  VkClearValue clearval[8];
+};
+
+struct EndRenderPass
+{
+  unsigned int commandId;
+};
+
+struct BindPipeline
+{
+  unsigned int commandId;
+  VkPipeline pipeline;
+};
+
+struct BindDescriptorSets
+{
+  unsigned int commandId;
+  uint32_t idx;
+  VkDescriptorSet set;
+};
+
+struct BindVB
+{
+  unsigned int commandId;
+  uint32_t slot;
+  VkBuffer buffer;
+  VkDeviceSize offset;
+};
+
+struct BindIB
+{
+  unsigned int commandId;
+  VkBuffer buffer;
+  VkDeviceSize offset;
+  VkIndexType indexType;
+};
+
+struct SetViewport
+{
+  unsigned int commandId;
+  VkViewport view;
+};
+
+struct SetScissors
+{
+  unsigned int commandId;
+};
+
+struct PushConstants
+{
+  unsigned int commandId;
+  uint32_t offset, size;
+  byte values[128];
+};
+
+struct Draw
+{
+  unsigned int commandId;
+  uint32_t vertexCount, instanceCount, firstVertex, firstInstance;
+};
+
+struct DrawIndexed
+{
+  unsigned int commandId;
+  uint32_t indexCount, instanceCount, firstIndex, firstInstance;
+  int32_t vertexOffset;
+};
+
+struct CopyBuf2Img
+{
+  unsigned int commandId;
+  VkBuffer srcBuffer;
+  VkImage dstImage;
+  VkBufferImageCopy region;
+};
+
+struct CopyBuf
+{
+  unsigned int commandId;
+  VkBuffer srcBuffer;
+  VkBuffer dstBuffer;
+  VkBufferCopy region;
+};
+
+struct libresoc_cmd {
+  struct list_head link;
+  union {
+        struct PipelineBarrier pipelineBarrier; 
+        struct EndRenderPass endRenderPass;
+        struct BeginRenderPass beginRenderPass;
+        struct BindPipeline bindPipeline;
+        struct BindDescriptorSets bindDescriptorSets;
+        struct BindVB bindVB;
+        struct BindIB bindIB;
+        struct SetViewport setViewport;
+        struct SetScissors setScissors;
+        struct PushConstants pushConstants;
+        struct Draw draw;
+        struct DrawIndexed drawIndexed;
+        struct CopyBuf2Img copyBuf2Img;
+        struct CopyBuf copyBuf;
+   } command;
+};
+
+typedef void (*Shader)();
+typedef void (*VertexShader)(struct GPUState *state, uint32_t vertexIndex, struct VertexCacheEntry *out);
+typedef void (*FragmentShader)(struct GPUState *state, float pixdepth, struct float4 *bary,
+                               struct VertexCacheEntry tri[3], struct float4 *out);
 struct libresoc_fence {
        struct vk_object_base base;
 };
@@ -224,6 +435,8 @@ struct libresoc_pipeline {
 
        VkShaderStageFlags                           active_stages;
 
+    VertexShader vs;
+    FragmentShader fs;
 };
 void
 libresoc_pipeline_cache_init(struct libresoc_pipeline_cache *cache,
@@ -369,41 +582,10 @@ enum libresoc_cmd_buffer_status {
 };
 
 struct libresoc_cmd_buffer {
-       struct vk_object_base                         base;
-
-       struct libresoc_device *                          device;
-
-       struct libresoc_cmd_pool *                        pool;
-       struct list_head                             pool_link;
-
-       VkCommandBufferUsageFlags                    usage_flags;
-       VkCommandBufferLevel                         level;
-       enum libresoc_cmd_buffer_status status;
-       //struct radeon_cmdbuf *cs;
-       // struct libresoc_cmd_state state;
-       // struct libresoc_vertex_binding                   vertex_bindings[MAX_VBS];
-       // struct libresoc_streamout_binding                streamout_bindings[MAX_SO_BUFFERS];
-       uint32_t queue_family_index;
-
-       uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
-       VkShaderStageFlags push_constant_stages;
-       // struct libresoc_descriptor_set meta_push_descriptors;
-
-       // struct libresoc_descriptor_state descriptors[MAX_BIND_POINTS];
-
-       struct libresoc_cmd_buffer_upload upload;
-
-       uint32_t scratch_size_per_wave_needed;
-       uint32_t scratch_waves_wanted;
-       uint32_t compute_scratch_size_per_wave_needed;
-       uint32_t compute_scratch_waves_wanted;
-       uint32_t esgs_ring_size_needed;
-       uint32_t gsvs_ring_size_needed;
-       bool tess_rings_needed;
-       bool sample_positions_needed;
-
-       VkResult record_result;
-
+       struct vk_object_base    base;
+    struct libresoc_device *device;
+    uint64_t size;
+    struct libresoc_cmd cmd;
 };
 
 struct libresoc_device_memory {
@@ -419,6 +601,12 @@ struct libresoc_device_memory {
     byte *bytes;
 };
 
+struct libresoc_buffer {
+  struct vk_object_base                        base;
+  VkDeviceSize size;
+  byte *bytes;
+};
+
 void libresoc_free_memory(struct libresoc_device *device,
                      const VkAllocationCallbacks* pAllocator,
                      struct libresoc_device_memory *mem);
@@ -514,6 +702,124 @@ libresoc_graphics_pipeline_create(VkDevice device,
                              const VkAllocationCallbacks *alloc,
                              VkPipeline *pPipeline);
 
+union libresoc_descriptor {
+       struct {
+               uint32_t plane0_descriptor[8];
+               uint32_t fmask_descriptor[8];
+       };
+       struct {
+               uint32_t plane_descriptors[3][8];
+       };
+};
+
+struct libresoc_image_view {
+       struct vk_object_base base;
+       struct libresoc_image *image; /**< VkImageViewCreateInfo::image */
+
+       VkImageViewType type;
+       VkImageAspectFlags aspect_mask;
+       VkFormat vk_format;
+       unsigned plane_id;
+       bool multiple_planes;
+       uint32_t base_layer;
+       uint32_t layer_count;
+       uint32_t base_mip;
+       uint32_t level_count;
+       VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */
+
+       union libresoc_descriptor descriptor;
+
+       /* Descriptor for use as a storage image as opposed to a sampled image.
+        * This has a few differences for cube maps (e.g. type).
+        */
+       union libresoc_descriptor storage_descriptor;
+};
+
+struct libresoc_framebuffer {
+       struct vk_object_base                        base;
+       uint32_t                                     width;
+       uint32_t                                     height;
+       uint32_t                                     layers;
+
+       uint32_t                                     attachment_count;
+       struct libresoc_image_view                       *attachments[0];
+};
+
+struct libresoc_subpass_barrier {
+       VkPipelineStageFlags src_stage_mask;
+       VkAccessFlags        src_access_mask;
+       VkAccessFlags        dst_access_mask;
+};
+
+void libresoc_subpass_barrier(struct libresoc_cmd_buffer *cmd_buffer,
+                         const struct libresoc_subpass_barrier *barrier);
+
+struct libresoc_subpass_attachment {
+       uint32_t         attachment;
+       VkImageLayout    layout;
+       VkImageLayout    stencil_layout;
+       bool             in_render_loop;
+    bool clear;
+};
+
+struct libresoc_subpass {
+       uint32_t                                     attachment_count;
+       struct libresoc_subpass_attachment *             attachments;
+
+       uint32_t                                     input_count;
+       uint32_t                                     color_count;
+       struct libresoc_subpass_attachment *             input_attachments;
+       struct libresoc_subpass_attachment *             color_attachments;
+       struct libresoc_subpass_attachment *             resolve_attachments;
+       struct libresoc_subpass_attachment *             depth_stencil_attachment;
+       struct libresoc_subpass_attachment *             ds_resolve_attachment;
+       VkResolveModeFlagBits                        depth_resolve_mode;
+       VkResolveModeFlagBits                        stencil_resolve_mode;
+
+       /** Subpass has at least one color resolve attachment */
+       bool                                         has_color_resolve;
+
+       /** Subpass has at least one color attachment */
+       bool                                         has_color_att;
+
+       struct libresoc_subpass_barrier                  start_barrier;
+
+       uint32_t                                     view_mask;
+
+       VkSampleCountFlagBits                        color_sample_count;
+       VkSampleCountFlagBits                        depth_sample_count;
+       VkSampleCountFlagBits                        max_sample_count;
+};
+
+uint32_t
+libresoc_get_subpass_id(struct libresoc_cmd_buffer *cmd_buffer);
+
+struct libresoc_render_pass_attachment {
+       VkFormat                                     format;
+       uint32_t                                     samples;
+       VkAttachmentLoadOp                           load_op;
+       VkAttachmentLoadOp                           stencil_load_op;
+       VkImageLayout                                initial_layout;
+       VkImageLayout                                final_layout;
+       VkImageLayout                                stencil_initial_layout;
+       VkImageLayout                                stencil_final_layout;
+
+       /* The subpass id in which the attachment will be used first/last. */
+       uint32_t                                     first_subpass_idx;
+       uint32_t                                     last_subpass_idx;
+};
+
+struct libresoc_render_pass {
+       struct vk_object_base                        base;
+       uint32_t                                     attachment_count;
+       uint32_t                                     subpass_count;
+       struct libresoc_subpass_attachment *             subpass_attachments;
+       struct libresoc_render_pass_attachment *         attachments;
+       struct libresoc_subpass_barrier                  end_barrier;
+       struct libresoc_subpass                          subpasses[0];
+};
+
+
 #define libresoc_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
 
 VkResult __vk_errorf(struct libresoc_instance *instance, VkResult error,
@@ -564,7 +870,7 @@ LIBRESOC_DEFINE_HANDLE_CASTS(libresoc_physical_device, VkPhysicalDevice)
 LIBRESOC_DEFINE_HANDLE_CASTS(libresoc_queue, VkQueue)
 
 LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_cmd_pool, VkCommandPool)
-//LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_buffer, VkBuffer)
+LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_buffer, VkBuffer)
 //LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_buffer_view, VkBufferView)
 //LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_descriptor_pool, VkDescriptorPool)
 //LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_descriptor_set, VkDescriptorSet)
@@ -573,14 +879,14 @@ LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_cmd_pool, VkCommandPool)
 LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_device_memory, VkDeviceMemory)
 LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_fence, VkFence)
 //LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_event, VkEvent)
-//LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_framebuffer, VkFramebuffer)
+LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_framebuffer, VkFramebuffer)
 LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_image, VkImage)
-//LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_image_view, VkImageView);
+LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_image_view, VkImageView);
 LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_pipeline_cache, VkPipelineCache)
 LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_pipeline, VkPipeline)
 //LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_pipeline_layout, VkPipelineLayout)
 //LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_query_pool, VkQueryPool)
-//LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_render_pass, VkRenderPass)
+LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_render_pass, VkRenderPass)
 //LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_sampler, VkSampler)
 //LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_sampler_ycbcr_conversion, VkSamplerYcbcrConversion)
 LIBRESOC_DEFINE_NONDISP_HANDLE_CASTS(libresoc_shader_module, VkShaderModule)