X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fvulkan%2Fanv_meta_blit2d.c;h=6b2222c73b5da72cefba59b325d91e28a3718430;hb=a0f5c496e348b918a556dd275289d4dda63b94c9;hp=144a62481b8a5140a4ceb1bfe79b955899cbfcc0;hpb=9553fd2c97bda18b997845610be365d6adf0fd4c;p=mesa.git diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 144a62481b8..6b2222c73b5 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -24,6 +24,44 @@ #include "anv_meta.h" #include "nir/nir_builder.h" +enum blit2d_src_type { + /* We can make a "normal" image view of this source and just texture + * from it like you would in any other shader. + */ + BLIT2D_SRC_TYPE_NORMAL, + + /* The source is W-tiled and we need to detile manually in the shader. + * This will work on any platform but is needed for all W-tiled sources + * prior to Broadwell. + */ + BLIT2D_SRC_TYPE_W_DETILE, + + BLIT2D_NUM_SRC_TYPES, +}; + +enum blit2d_dst_type { + /* We can bind this destination as a "normal" render target and render + * to it just like you would anywhere else. + */ + BLIT2D_DST_TYPE_NORMAL, + + /* The destination is W-tiled and we need to do the tiling manually in + * the shader. This is required for all W-tiled destinations. + * + * Sky Lake adds a feature for providing explicit stencil values in the + * shader but mesa doesn't support that yet so neither do we. + */ + BLIT2D_DST_TYPE_W_TILE, + + /* The destination has a 3-channel RGB format. Since we can't render to + * non-power-of-two textures, we have to bind it as a red texture and + * select the correct component for the given red pixel in the shader. + */ + BLIT2D_DST_TYPE_RGB, + + BLIT2D_NUM_DST_TYPES, +}; + static VkFormat vk_format_for_size(int bs) { @@ -54,32 +92,40 @@ vk_format_for_size(int bs) } } +/* This function returns the format corresponding to a single component of the + * RGB format for the given size returned by vk_format_for_size(). + */ +static VkFormat +vk_single_component_format_for_rgb_size(int bs) +{ + switch (bs) { + case 3: return VK_FORMAT_R8_UNORM; + case 6: return VK_FORMAT_R16_UNORM; + case 12: return VK_FORMAT_R32_UINT; + default: + unreachable("Invalid format block size"); + } +} + static void create_iview(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_blit2d_surf *surf, - struct anv_meta_blit2d_rect *rect, + uint64_t offset, VkImageUsageFlags usage, + uint32_t width, + uint32_t height, + VkFormat format, VkImage *img, struct anv_image_view *iview) { - struct isl_tile_info tile_info; - isl_tiling_get_info(&cmd_buffer->device->isl_dev, - surf->tiling, surf->bs, &tile_info); - const unsigned tile_width_px = tile_info.width > surf->bs ? - tile_info.width / surf->bs : 1; - uint32_t *rect_y = (usage == VK_IMAGE_USAGE_SAMPLED_BIT) ? - &rect->src_y : &rect->dst_y; - uint32_t *rect_x = (usage == VK_IMAGE_USAGE_SAMPLED_BIT) ? - &rect->src_x : &rect->dst_x; - - /* Define the shared state among all created image views */ const VkImageCreateInfo image_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .imageType = VK_IMAGE_TYPE_2D, - .format = vk_format_for_size(surf->bs), + /* W-tiled images must be stencil-formatted. */ + .format = format, .extent = { - .width = rect->width + (*rect_x) % tile_width_px, - .height = rect->height + (*rect_y) % tile_info.height, + .width = width, + .height = height, .depth = 1, }, .mipLevels = 1, @@ -102,17 +148,8 @@ create_iview(struct anv_cmd_buffer *cmd_buffer, * creating a dummy memory object etc. so there's really no point. */ anv_image_from_handle(*img)->bo = surf->bo; - anv_image_from_handle(*img)->offset = surf->base_offset; + anv_image_from_handle(*img)->offset = surf->base_offset + offset; - /* Create a VkImageView that starts at the tile aligned offset closest - * to the provided x/y offset into the surface. - */ - uint32_t img_o = 0; - isl_surf_get_image_intratile_offset_el_xy(&cmd_buffer->device->isl_dev, - &anv_image_from_handle(*img)-> - color_surface.isl, - *rect_x, *rect_y, - &img_o, rect_x, rect_y); anv_image_view_init(iview, cmd_buffer->device, &(VkImageViewCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, @@ -120,211 +157,660 @@ create_iview(struct anv_cmd_buffer *cmd_buffer, .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = image_info.format, .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .aspectMask = anv_image_from_handle(*img)->aspects, .baseMipLevel = 0, .levelCount = 1, .baseArrayLayer = 0, .layerCount = 1 }, - }, cmd_buffer, img_o, usage); + }, cmd_buffer, usage); } -static void -meta_emit_blit2d(struct anv_cmd_buffer *cmd_buffer, - struct anv_image_view *src_iview, - VkOffset3D src_offset, - struct anv_image_view *dest_iview, - VkOffset3D dest_offset, - VkExtent3D extent) -{ - struct anv_device *device = cmd_buffer->device; +struct blit2d_src_temps { + VkImage image; + struct anv_image_view iview; - struct blit_vb_data { - float pos[2]; - float tex_coord[3]; - } *vb_data; + struct anv_buffer buffer; + struct anv_buffer_view bview; - unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data); - - struct anv_state vb_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); - memset(vb_state.map, 0, sizeof(struct anv_vue_header)); - vb_data = vb_state.map + sizeof(struct anv_vue_header); - - vb_data[0] = (struct blit_vb_data) { - .pos = { - dest_offset.x + extent.width, - dest_offset.y + extent.height, - }, - .tex_coord = { - src_offset.x + extent.width, - src_offset.y + extent.height, - src_offset.z, - }, - }; - - vb_data[1] = (struct blit_vb_data) { - .pos = { - dest_offset.x, - dest_offset.y + extent.height, - }, - .tex_coord = { - src_offset.x, - src_offset.y + extent.height, - src_offset.z, - }, - }; - - vb_data[2] = (struct blit_vb_data) { - .pos = { - dest_offset.x, - dest_offset.y, - }, - .tex_coord = { - src_offset.x, - src_offset.y, - src_offset.z, - }, - }; - - anv_state_clflush(vb_state); - - struct anv_buffer vertex_buffer = { - .device = device, - .size = vb_size, - .bo = &device->dynamic_state_block_pool.bo, - .offset = vb_state.offset, - }; + VkDescriptorPool desc_pool; + VkDescriptorSet set; +}; - anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, - (VkBuffer[]) { - anv_buffer_to_handle(&vertex_buffer), - anv_buffer_to_handle(&vertex_buffer) - }, - (VkDeviceSize[]) { - 0, - sizeof(struct anv_vue_header), - }); +static void +blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_blit2d_surf *src, + enum blit2d_src_type src_type, + struct anv_meta_blit2d_rect *rect, + struct blit2d_src_temps *tmp) +{ + struct anv_device *device = cmd_buffer->device; + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - VkDescriptorPool desc_pool; - anv_CreateDescriptorPool(anv_device_to_handle(device), - &(const VkDescriptorPoolCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .pNext = NULL, - .flags = 0, - .maxSets = 1, - .poolSizeCount = 1, - .pPoolSizes = (VkDescriptorPoolSize[]) { + if (src_type == BLIT2D_SRC_TYPE_NORMAL) { + uint32_t offset = 0; + isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev, + src->tiling, src->bs, src->pitch, + rect->src_x, rect->src_y, + &offset, &rect->src_x, &rect->src_y); + + VkImageUsageFlags usage = VK_IMAGE_USAGE_SAMPLED_BIT; + + /* W-tiled images must be stencil-formatted. Outside of meta, + * a stencil image has this usage bit set. Adding it here + * ensures the ISL surface is created correctly. + */ + if (src->tiling == ISL_TILING_W) + usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + + create_iview(cmd_buffer, src, offset, usage, + rect->src_x + rect->width, rect->src_y + rect->height, + src->tiling == ISL_TILING_W ? + VK_FORMAT_S8_UINT : vk_format_for_size(src->bs), + &tmp->image, &tmp->iview); + + anv_CreateDescriptorPool(vk_device, + &(const VkDescriptorPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = (VkDescriptorPoolSize[]) { + { + .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1 + }, + } + }, &cmd_buffer->pool->alloc, &tmp->desc_pool); + + anv_AllocateDescriptorSets(vk_device, + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = tmp->desc_pool, + .descriptorSetCount = 1, + .pSetLayouts = &device->meta_state.blit2d.img_ds_layout + }, &tmp->set); + + anv_UpdateDescriptorSets(vk_device, + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { { - .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .descriptorCount = 1 - }, - } - }, &cmd_buffer->pool->alloc, &desc_pool); - - VkDescriptorSet set; - anv_AllocateDescriptorSets(anv_device_to_handle(device), - &(VkDescriptorSetAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = desc_pool, - .descriptorSetCount = 1, - .pSetLayouts = &device->meta_state.blit2d.ds_layout - }, &set); - - anv_UpdateDescriptorSets(anv_device_to_handle(device), - 1, /* writeCount */ - (VkWriteDescriptorSet[]) { - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstSet = set, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pImageInfo = (VkDescriptorImageInfo[]) { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = tmp->set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = NULL, + .imageView = anv_image_view_to_handle(&tmp->iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + } + } + }, 0, NULL); + + anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit2d.img_p_layout, 0, 1, + &tmp->set, 0, NULL); + } else { + assert(src_type == BLIT2D_SRC_TYPE_W_DETILE); + assert(src->tiling == ISL_TILING_W); + assert(src->bs == 1); + + uint32_t tile_offset = 0; + isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev, + ISL_TILING_W, 1, src->pitch, + rect->src_x, rect->src_y, + &tile_offset, + &rect->src_x, &rect->src_y); + + tmp->buffer = (struct anv_buffer) { + .device = device, + .size = align_u32(rect->src_y + rect->height, 64) * src->pitch, + .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, + .bo = src->bo, + .offset = src->base_offset + tile_offset, + }; + + anv_buffer_view_init(&tmp->bview, device, + &(VkBufferViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .buffer = anv_buffer_to_handle(&tmp->buffer), + .format = VK_FORMAT_R8_UINT, + .offset = 0, + .range = VK_WHOLE_SIZE, + }, cmd_buffer); + + anv_CreateDescriptorPool(vk_device, + &(const VkDescriptorPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = (VkDescriptorPoolSize[]) { { - .sampler = NULL, - .imageView = anv_image_view_to_handle(src_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + .type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .descriptorCount = 1 }, } - } - }, 0, NULL); + }, &cmd_buffer->pool->alloc, &tmp->desc_pool); + + anv_AllocateDescriptorSets(vk_device, + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = tmp->desc_pool, + .descriptorSetCount = 1, + .pSetLayouts = &device->meta_state.blit2d.buf_ds_layout + }, &tmp->set); + + anv_UpdateDescriptorSets(vk_device, + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = tmp->set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .pTexelBufferView = (VkBufferView[]) { + anv_buffer_view_to_handle(&tmp->bview), + }, + } + }, 0, NULL); + anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit2d.buf_p_layout, 0, 1, + &tmp->set, 0, NULL); + } +} + +static void +blit2d_unbind_src(struct anv_cmd_buffer *cmd_buffer, + enum blit2d_src_type src_type, + struct blit2d_src_temps *tmp) +{ + anv_DestroyDescriptorPool(anv_device_to_handle(cmd_buffer->device), + tmp->desc_pool, &cmd_buffer->pool->alloc); + if (src_type == BLIT2D_SRC_TYPE_NORMAL) { + anv_DestroyImage(anv_device_to_handle(cmd_buffer->device), + tmp->image, &cmd_buffer->pool->alloc); + } +} + +struct blit2d_dst_temps { + VkImage image; + struct anv_image_view iview; VkFramebuffer fb; - anv_CreateFramebuffer(anv_device_to_handle(device), +}; + +static void +blit2d_bind_dst(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_blit2d_surf *dst, + uint64_t offset, + uint32_t width, + uint32_t height, + VkFormat format, + struct blit2d_dst_temps *tmp) +{ + create_iview(cmd_buffer, dst, offset, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + width, height, format, &tmp->image, &tmp->iview); + + anv_CreateFramebuffer(anv_device_to_handle(cmd_buffer->device), &(VkFramebufferCreateInfo) { .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, .attachmentCount = 1, .pAttachments = (VkImageView[]) { - anv_image_view_to_handle(dest_iview), + anv_image_view_to_handle(&tmp->iview), }, - .width = dest_iview->extent.width, - .height = dest_iview->extent.height, + .width = width, + .height = height, .layers = 1 - }, &cmd_buffer->pool->alloc, &fb); - - ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), - &(VkRenderPassBeginInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderPass = device->meta_state.blit2d.render_pass, - .framebuffer = fb, - .renderArea = { - .offset = { dest_offset.x, dest_offset.y }, - .extent = { extent.width, extent.height }, - }, - .clearValueCount = 0, - .pClearValues = NULL, - }, VK_SUBPASS_CONTENTS_INLINE); + }, &cmd_buffer->pool->alloc, &tmp->fb); +} + +static void +blit2d_unbind_dst(struct anv_cmd_buffer *cmd_buffer, + struct blit2d_dst_temps *tmp) +{ + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + anv_DestroyFramebuffer(vk_device, tmp->fb, &cmd_buffer->pool->alloc); + anv_DestroyImage(vk_device, tmp->image, &cmd_buffer->pool->alloc); +} - VkPipeline pipeline = device->meta_state.blit2d.pipeline_2d_src; +void +anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_saved_state *save) +{ + anv_meta_restore(save, cmd_buffer); +} + +void +anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_saved_state *save) +{ + anv_meta_save(save, cmd_buffer, 0); +} + +static void +bind_pipeline(struct anv_cmd_buffer *cmd_buffer, + enum blit2d_src_type src_type, + enum blit2d_dst_type dst_type) +{ + VkPipeline pipeline = + cmd_buffer->device->meta_state.blit2d.pipelines[src_type][dst_type]; if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) { anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); } +} - anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, - &(VkViewport) { - .x = 0.0f, - .y = 0.0f, - .width = dest_iview->extent.width, - .height = dest_iview->extent.height, - .minDepth = 0.0f, - .maxDepth = 1.0f, - }); +static void +anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_blit2d_surf *src, + enum blit2d_src_type src_type, + struct anv_meta_blit2d_surf *dst, + unsigned num_rects, + struct anv_meta_blit2d_rect *rects) +{ + struct anv_device *device = cmd_buffer->device; - anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.blit2d.pipeline_layout, 0, 1, - &set, 0, NULL); + for (unsigned r = 0; r < num_rects; ++r) { + struct blit2d_src_temps src_temps; + blit2d_bind_src(cmd_buffer, src, src_type, &rects[r], &src_temps); + + uint32_t offset = 0; + isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev, + dst->tiling, dst->bs, dst->pitch, + rects[r].dst_x, rects[r].dst_y, + &offset, + &rects[r].dst_x, &rects[r].dst_y); + + struct blit2d_dst_temps dst_temps; + blit2d_bind_dst(cmd_buffer, dst, offset, rects[r].dst_x + rects[r].width, + rects[r].dst_y + rects[r].height, + vk_format_for_size(dst->bs), &dst_temps); + + struct blit_vb_data { + float pos[2]; + float tex_coord[3]; + } *vb_data; + + unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data); + + struct anv_state vb_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); + memset(vb_state.map, 0, sizeof(struct anv_vue_header)); + vb_data = vb_state.map + sizeof(struct anv_vue_header); + + vb_data[0] = (struct blit_vb_data) { + .pos = { + rects[r].dst_x + rects[r].width, + rects[r].dst_y + rects[r].height, + }, + .tex_coord = { + rects[r].src_x + rects[r].width, + rects[r].src_y + rects[r].height, + src->pitch, + }, + }; - ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); + vb_data[1] = (struct blit_vb_data) { + .pos = { + rects[r].dst_x, + rects[r].dst_y + rects[r].height, + }, + .tex_coord = { + rects[r].src_x, + rects[r].src_y + rects[r].height, + src->pitch, + }, + }; - ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + vb_data[2] = (struct blit_vb_data) { + .pos = { + rects[r].dst_x, + rects[r].dst_y, + }, + .tex_coord = { + rects[r].src_x, + rects[r].src_y, + src->pitch, + }, + }; + + if (!device->info.has_llc) + anv_state_clflush(vb_state); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = vb_size, + .bo = &device->dynamic_state_block_pool.bo, + .offset = vb_state.offset, + }; + + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, + (VkBuffer[]) { + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) + }, + (VkDeviceSize[]) { + 0, + sizeof(struct anv_vue_header), + }); + + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = device->meta_state.blit2d.render_pass, + .framebuffer = dst_temps.fb, + .renderArea = { + .offset = { rects[r].dst_x, rects[r].dst_y, }, + .extent = { rects[r].width, rects[r].height }, + }, + .clearValueCount = 0, + .pClearValues = NULL, + }, VK_SUBPASS_CONTENTS_INLINE); - /* At the point where we emit the draw call, all data from the - * descriptor sets, etc. has been used. We are free to delete it. - */ - anv_DestroyDescriptorPool(anv_device_to_handle(device), - desc_pool, &cmd_buffer->pool->alloc); - anv_DestroyFramebuffer(anv_device_to_handle(device), fb, - &cmd_buffer->pool->alloc); + bind_pipeline(cmd_buffer, src_type, BLIT2D_DST_TYPE_NORMAL); + + ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); + + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + + /* At the point where we emit the draw call, all data from the + * descriptor sets, etc. has been used. We are free to delete it. + */ + blit2d_unbind_src(cmd_buffer, src_type, &src_temps); + blit2d_unbind_dst(cmd_buffer, &dst_temps); + } } -void -anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer, - struct anv_meta_saved_state *save) +static void +anv_meta_blit2d_w_tiled_dst(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_blit2d_surf *src, + enum blit2d_src_type src_type, + struct anv_meta_blit2d_surf *dst, + unsigned num_rects, + struct anv_meta_blit2d_rect *rects) { - anv_meta_restore(save, cmd_buffer); + struct anv_device *device = cmd_buffer->device; + + for (unsigned r = 0; r < num_rects; ++r) { + struct blit2d_src_temps src_temps; + blit2d_bind_src(cmd_buffer, src, src_type, &rects[r], &src_temps); + + assert(dst->bs == 1); + uint32_t offset; + isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev, + ISL_TILING_W, 1, dst->pitch, + rects[r].dst_x, rects[r].dst_y, + &offset, + &rects[r].dst_x, &rects[r].dst_y); + + /* The original coordinates were in terms of an actual W-tiled offset + * but we are binding this image as Y-tiled. We need to adjust our + * rectangle accordingly. + */ + uint32_t xmin_Y, xmax_Y, ymin_Y, ymax_Y; + xmin_Y = (rects[r].dst_x / 8) * 16; + xmax_Y = DIV_ROUND_UP(rects[r].dst_x + rects[r].width, 8) * 16; + ymin_Y = (rects[r].dst_y / 4) * 2; + ymax_Y = DIV_ROUND_UP(rects[r].dst_y + rects[r].height, 4) * 2; + + struct anv_meta_blit2d_surf dst_Y = { + .bo = dst->bo, + .tiling = ISL_TILING_Y0, + .base_offset = dst->base_offset, + .bs = 1, + .pitch = dst->pitch, + }; + + struct blit2d_dst_temps dst_temps; + blit2d_bind_dst(cmd_buffer, &dst_Y, offset, xmax_Y, ymax_Y, + VK_FORMAT_R8_UINT, &dst_temps); + + struct blit_vb_header { + struct anv_vue_header vue; + int32_t tex_offset[2]; + uint32_t tex_pitch; + uint32_t bounds[4]; + } *vb_header; + + struct blit_vb_data { + float pos[2]; + } *vb_data; + + unsigned vb_size = sizeof(*vb_header) + 3 * sizeof(*vb_data); + + struct anv_state vb_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); + vb_header = vb_state.map; + + *vb_header = (struct blit_vb_header) { + .tex_offset = { + rects[r].src_x - rects[r].dst_x, + rects[r].src_y - rects[r].dst_y, + }, + .tex_pitch = src->pitch, + .bounds = { + rects[r].dst_x, + rects[r].dst_y, + rects[r].dst_x + rects[r].width, + rects[r].dst_y + rects[r].height, + }, + }; + + vb_data = (void *)(vb_header + 1); + + vb_data[0] = (struct blit_vb_data) { + .pos = { + xmax_Y, + ymax_Y, + }, + }; + + vb_data[1] = (struct blit_vb_data) { + .pos = { + xmin_Y, + ymax_Y, + }, + }; + + vb_data[2] = (struct blit_vb_data) { + .pos = { + xmin_Y, + ymin_Y, + }, + }; + + if (!device->info.has_llc) + anv_state_clflush(vb_state); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = vb_size, + .bo = &device->dynamic_state_block_pool.bo, + .offset = vb_state.offset, + }; + + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, + (VkBuffer[]) { + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) + }, + (VkDeviceSize[]) { + 0, + (void *)vb_data - vb_state.map, + }); + + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = device->meta_state.blit2d.render_pass, + .framebuffer = dst_temps.fb, + .renderArea = { + .offset = { xmin_Y, ymin_Y, }, + .extent = { xmax_Y - xmin_Y, ymax_Y - ymin_Y }, + }, + .clearValueCount = 0, + .pClearValues = NULL, + }, VK_SUBPASS_CONTENTS_INLINE); + + bind_pipeline(cmd_buffer, src_type, BLIT2D_DST_TYPE_W_TILE); + + ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); + + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + + /* At the point where we emit the draw call, all data from the + * descriptor sets, etc. has been used. We are free to delete it. + */ + blit2d_unbind_src(cmd_buffer, src_type, &src_temps); + blit2d_unbind_dst(cmd_buffer, &dst_temps); + } } -void -anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer, - struct anv_meta_saved_state *save) +static void +anv_meta_blit2d_rgb_dst(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_blit2d_surf *src, + enum blit2d_src_type src_type, + struct anv_meta_blit2d_surf *dst, + unsigned num_rects, + struct anv_meta_blit2d_rect *rects) { - anv_meta_save(save, cmd_buffer, - (1 << VK_DYNAMIC_STATE_VIEWPORT)); + struct anv_device *device = cmd_buffer->device; + + for (unsigned r = 0; r < num_rects; ++r) { + struct blit2d_src_temps src_temps; + blit2d_bind_src(cmd_buffer, src, src_type, &rects[r], &src_temps); + + assert(dst->bs % 3 == 0); + assert(dst->tiling == ISL_TILING_LINEAR); + + uint32_t offset; + isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev, + dst->tiling, 1, dst->pitch, + rects[r].dst_x, rects[r].dst_y, + &offset, + &rects[r].dst_x, &rects[r].dst_y); + + /* A red surface three times as wide as the actual RGB destination */ + struct anv_meta_blit2d_surf dst_R = { + .bo = dst->bo, + .tiling = dst->tiling, + .base_offset = dst->base_offset, + .bs = dst->bs / 3, + .pitch = dst->pitch, + }; + + struct blit2d_dst_temps dst_temps; + blit2d_bind_dst(cmd_buffer, &dst_R, offset, + (rects[r].dst_x + rects[r].width) * 3, + rects[r].dst_y + rects[r].height, + vk_single_component_format_for_rgb_size(dst->bs), + &dst_temps); + + struct blit_vb_data { + float pos[2]; + float tex_coord[3]; + } *vb_data; + + unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data); + + struct anv_state vb_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); + memset(vb_state.map, 0, sizeof(struct anv_vue_header)); + vb_data = vb_state.map + sizeof(struct anv_vue_header); + + vb_data[0] = (struct blit_vb_data) { + .pos = { + (rects[r].dst_x + rects[r].width) * 3, + rects[r].dst_y + rects[r].height, + }, + .tex_coord = { + rects[r].src_x + rects[r].width, + rects[r].src_y + rects[r].height, + src->pitch, + }, + }; + + vb_data[1] = (struct blit_vb_data) { + .pos = { + rects[r].dst_x * 3, + rects[r].dst_y + rects[r].height, + }, + .tex_coord = { + rects[r].src_x, + rects[r].src_y + rects[r].height, + src->pitch, + }, + }; + + vb_data[2] = (struct blit_vb_data) { + .pos = { + rects[r].dst_x * 3, + rects[r].dst_y, + }, + .tex_coord = { + rects[r].src_x, + rects[r].src_y, + src->pitch, + }, + }; + + if (!device->info.has_llc) + anv_state_clflush(vb_state); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = vb_size, + .bo = &device->dynamic_state_block_pool.bo, + .offset = vb_state.offset, + }; + + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, + (VkBuffer[]) { + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) + }, + (VkDeviceSize[]) { + 0, + sizeof(struct anv_vue_header), + }); + + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = device->meta_state.blit2d.render_pass, + .framebuffer = dst_temps.fb, + .renderArea = { + .offset = { rects[r].dst_x, rects[r].dst_y, }, + .extent = { rects[r].width, rects[r].height }, + }, + .clearValueCount = 0, + .pClearValues = NULL, + }, VK_SUBPASS_CONTENTS_INLINE); + + bind_pipeline(cmd_buffer, src_type, BLIT2D_DST_TYPE_RGB); + + ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); + + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + + /* At the point where we emit the draw call, all data from the + * descriptor sets, etc. has been used. We are free to delete it. + */ + blit2d_unbind_src(cmd_buffer, src_type, &src_temps); + blit2d_unbind_dst(cmd_buffer, &dst_temps); + } } void @@ -334,32 +820,28 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, unsigned num_rects, struct anv_meta_blit2d_rect *rects) { - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - VkImageUsageFlags src_usage = VK_IMAGE_USAGE_SAMPLED_BIT; - VkImageUsageFlags dst_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + enum blit2d_src_type src_type; + if (src->tiling == ISL_TILING_W && cmd_buffer->device->info.gen < 8) { + src_type = BLIT2D_SRC_TYPE_W_DETILE; + } else { + src_type = BLIT2D_SRC_TYPE_NORMAL; + } - for (unsigned r = 0; r < num_rects; ++r) { - VkImage src_img; - VkImage dst_img; - struct anv_image_view src_iview; - struct anv_image_view dst_iview; - create_iview(cmd_buffer, src, &rects[r], src_usage, &src_img, &src_iview); - create_iview(cmd_buffer, dst, &rects[r], dst_usage, &dst_img, &dst_iview); - - /* Perform blit */ - meta_emit_blit2d(cmd_buffer, - &src_iview, - (VkOffset3D){rects[r].src_x, rects[r].src_y, 0}, - &dst_iview, - (VkOffset3D){rects[r].dst_x, rects[r].dst_y, 0}, - (VkExtent3D){rects[r].width, rects[r].height, 1}); - - anv_DestroyImage(vk_device, src_img, &cmd_buffer->pool->alloc); - anv_DestroyImage(vk_device, dst_img, &cmd_buffer->pool->alloc); + if (dst->tiling == ISL_TILING_W) { + anv_meta_blit2d_w_tiled_dst(cmd_buffer, src, src_type, dst, + num_rects, rects); + return; + } else if (dst->bs % 3 == 0) { + anv_meta_blit2d_rgb_dst(cmd_buffer, src, src_type, dst, + num_rects, rects); + return; + } else { + assert(util_is_power_of_two(dst->bs)); + anv_meta_blit2d_normal_dst(cmd_buffer, src, src_type, dst, + num_rects, rects); } } - static nir_shader * build_nir_vertex_shader(void) { @@ -383,55 +865,381 @@ build_nir_vertex_shader(void) nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "v_tex_pos"); tex_pos_out->data.location = VARYING_SLOT_VAR0; - tex_pos_out->data.interpolation = INTERP_QUALIFIER_SMOOTH; + tex_pos_out->data.interpolation = INTERP_MODE_SMOOTH; nir_copy_var(&b, tex_pos_out, tex_pos_in); + nir_variable *other_in = nir_variable_create(b.shader, nir_var_shader_in, + vec4, "a_other"); + other_in->data.location = VERT_ATTRIB_GENERIC2; + nir_variable *other_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "v_other"); + other_out->data.location = VARYING_SLOT_VAR1; + other_out->data.interpolation = INTERP_MODE_FLAT; + nir_copy_var(&b, other_out, other_in); + return b.shader; } -static nir_shader * -build_nir_copy_fragment_shader() +typedef nir_ssa_def* (*texel_fetch_build_func)(struct nir_builder *, + struct anv_device *, + nir_ssa_def *, nir_ssa_def *); + +static nir_ssa_def * +nir_copy_bits(struct nir_builder *b, nir_ssa_def *dst, unsigned dst_offset, + nir_ssa_def *src, unsigned src_offset, unsigned num_bits) { - const struct glsl_type *vec4 = glsl_vec4_type(); - const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2); - nir_builder b; + unsigned src_mask = (~1u >> (32 - num_bits)) << src_offset; + nir_ssa_def *masked = nir_iand(b, src, nir_imm_int(b, src_mask)); + + nir_ssa_def *shifted; + if (dst_offset > src_offset) { + shifted = nir_ishl(b, masked, nir_imm_int(b, dst_offset - src_offset)); + } else if (dst_offset < src_offset) { + shifted = nir_ushr(b, masked, nir_imm_int(b, src_offset - dst_offset)); + } else { + assert(dst_offset == src_offset); + shifted = masked; + } - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); - b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs"); + return nir_ior(b, dst, shifted); +} - nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, - vec2, "v_tex_pos"); - tex_pos_in->data.location = VARYING_SLOT_VAR0; - nir_ssa_def *const tex_pos = nir_f2i(&b, nir_load_var(&b, tex_pos_in)); +static nir_ssa_def * +build_nir_w_tiled_fetch(struct nir_builder *b, struct anv_device *device, + nir_ssa_def *tex_pos, nir_ssa_def *tex_pitch) +{ + nir_ssa_def *x = nir_channel(b, tex_pos, 0); + nir_ssa_def *y = nir_channel(b, tex_pos, 1); + + /* First, compute the block-aligned offset */ + nir_ssa_def *x_major = nir_ushr(b, x, nir_imm_int(b, 6)); + nir_ssa_def *y_major = nir_ushr(b, y, nir_imm_int(b, 6)); + /* W tiles have physical size of 128x32 and logical size of 64x64, hence + * the multiplication by 32 (instead of 64). */ + nir_ssa_def *offset = + nir_iadd(b, nir_imul(b, y_major, + nir_imul(b, tex_pitch, nir_imm_int(b, 32))), + nir_imul(b, x_major, nir_imm_int(b, 4096))); + + /* Compute the bottom 12 bits of the offset */ + offset = nir_copy_bits(b, offset, 0, x, 0, 1); + offset = nir_copy_bits(b, offset, 1, y, 0, 1); + offset = nir_copy_bits(b, offset, 2, x, 1, 1); + offset = nir_copy_bits(b, offset, 3, y, 1, 1); + offset = nir_copy_bits(b, offset, 4, x, 2, 1); + offset = nir_copy_bits(b, offset, 5, y, 2, 4); + offset = nir_copy_bits(b, offset, 9, x, 3, 3); + + if (device->isl_dev.has_bit6_swizzling) { + offset = nir_ixor(b, offset, + nir_ushr(b, nir_iand(b, offset, nir_imm_int(b, 0x0200)), + nir_imm_int(b, 3))); + } + + const struct glsl_type *sampler_type = + glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT); + nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform, + sampler_type, "s_tex"); + sampler->data.descriptor_set = 0; + sampler->data.binding = 0; + + nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1); + tex->sampler_dim = GLSL_SAMPLER_DIM_BUF; + tex->op = nir_texop_txf; + tex->src[0].src_type = nir_tex_src_coord; + tex->src[0].src = nir_src_for_ssa(offset); + tex->dest_type = nir_type_float; /* TODO */ + tex->is_array = false; + tex->coord_components = 1; + tex->texture = nir_deref_var_create(tex, sampler); + tex->sampler = NULL; + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); + nir_builder_instr_insert(b, &tex->instr); + + return &tex->dest.ssa; +} + +static nir_ssa_def * +build_nir_texel_fetch(struct nir_builder *b, struct anv_device *device, + nir_ssa_def *tex_pos, nir_ssa_def *tex_pitch) +{ const struct glsl_type *sampler_type = - glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, - glsl_get_base_type(vec4)); - nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, + glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT); + nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex"); sampler->data.descriptor_set = 0; sampler->data.binding = 0; - nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2); + nir_tex_instr *tex = nir_tex_instr_create(b->shader, 2); tex->sampler_dim = GLSL_SAMPLER_DIM_2D; tex->op = nir_texop_txf; tex->src[0].src_type = nir_tex_src_coord; tex->src[0].src = nir_src_for_ssa(tex_pos); tex->src[1].src_type = nir_tex_src_lod; - tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); + tex->src[1].src = nir_src_for_ssa(nir_imm_int(b, 0)); tex->dest_type = nir_type_float; /* TODO */ tex->is_array = false; - tex->coord_components = tex_pos->num_components; + tex->coord_components = 2; tex->texture = nir_deref_var_create(tex, sampler); tex->sampler = NULL; nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); - nir_builder_instr_insert(&b, &tex->instr); + nir_builder_instr_insert(b, &tex->instr); + + return &tex->dest.ssa; +} + +static const VkPipelineVertexInputStateCreateInfo normal_vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = 0, + .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE + }, + { + .binding = 1, + .stride = 5 * sizeof(float), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = 0 + }, + { + /* Position */ + .location = 1, + .binding = 1, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = 0 + }, + { + /* Texture Coordinate */ + .location = 2, + .binding = 1, + .format = VK_FORMAT_R32G32B32_SFLOAT, + .offset = 8 + }, + }, +}; + +static nir_shader * +build_nir_copy_fragment_shader(struct anv_device *device, + texel_fetch_build_func txf_func) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + const struct glsl_type *vec3 = glsl_vector_type(GLSL_TYPE_FLOAT, 3); + nir_builder b; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs"); + + nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vec3, "v_tex_pos"); + tex_pos_in->data.location = VARYING_SLOT_VAR0; nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color"); color_out->data.location = FRAG_RESULT_DATA0; - nir_store_var(&b, color_out, &tex->dest.ssa, 0xf); + + nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in)); + unsigned swiz[4] = { 0, 1 }; + nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false); + nir_ssa_def *tex_pitch = nir_channel(&b, pos_int, 2); + + nir_ssa_def *color = txf_func(&b, device, tex_pos, tex_pitch); + nir_store_var(&b, color_out, color, 0xf); + + return b.shader; +} + +/* RGB copies have the same interface as normal copies */ +#define rgb_vi_create_info normal_vi_create_info + +static nir_shader * +build_nir_rgb_fragment_shader(struct anv_device *device, + texel_fetch_build_func txf_func) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + const struct glsl_type *vec3 = glsl_vector_type(GLSL_TYPE_FLOAT, 3); + nir_builder b; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs"); + + nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vec3, "v_tex_pos"); + tex_pos_in->data.location = VARYING_SLOT_VAR0; + + nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "f_color"); + color_out->data.location = FRAG_RESULT_DATA0; + + /* We need gl_FragCoord so we know our position */ + nir_variable *frag_coord_in = nir_variable_create(b.shader, + nir_var_shader_in, + vec4, "gl_FragCoord"); + frag_coord_in->data.location = VARYING_SLOT_POS; + frag_coord_in->data.origin_upper_left = true; + + nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in)); + unsigned swiz[4] = { 0, 1 }; + nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false); + nir_ssa_def *tex_pitch = nir_channel(&b, pos_int, 2); + + nir_ssa_def *color = txf_func(&b, device, tex_pos, tex_pitch); + + /* We figure out which component we are by the x component of FragCoord */ + nir_ssa_def *frag_coord_int = nir_f2i(&b, nir_load_var(&b, frag_coord_in)); + nir_ssa_def *comp = nir_umod(&b, nir_channel(&b, frag_coord_int, 0), + nir_imm_int(&b, 3)); + + /* Select the given channel from the texelFetch result */ + nir_ssa_def *color_channel = + nir_bcsel(&b, nir_ieq(&b, comp, nir_imm_int(&b, 0)), + nir_channel(&b, color, 0), + nir_bcsel(&b, nir_ieq(&b, comp, nir_imm_int(&b, 1)), + nir_channel(&b, color, 1), + nir_channel(&b, color, 2))); + + nir_ssa_def *u = nir_ssa_undef(&b, 1, 32); + nir_store_var(&b, color_out, nir_vec4(&b, color_channel, u, u, u), 0x1); + + return b.shader; +} + +static const VkPipelineVertexInputStateCreateInfo w_tiled_vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = 0, + .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE + }, + { + .binding = 1, + .stride = 2 * sizeof(float), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 4, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = 0 + }, + { + /* Position */ + .location = 1, + .binding = 1, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = 0 + }, + { + /* Texture Offset */ + .location = 2, + .binding = 0, + .format = VK_FORMAT_R32G32B32_UINT, + .offset = 16 + }, + { + /* Destination bounds */ + .location = 3, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = 28 + }, + }, +}; + +static nir_shader * +build_nir_w_tiled_fragment_shader(struct anv_device *device, + texel_fetch_build_func txf_func) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + const struct glsl_type *ivec3 = glsl_vector_type(GLSL_TYPE_INT, 3); + const struct glsl_type *uvec4 = glsl_vector_type(GLSL_TYPE_UINT, 4); + nir_builder b; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs"); + + /* We need gl_FragCoord so we know our Y-tiled position */ + nir_variable *frag_coord_in = nir_variable_create(b.shader, + nir_var_shader_in, + vec4, "gl_FragCoord"); + frag_coord_in->data.location = VARYING_SLOT_POS; + frag_coord_in->data.origin_upper_left = true; + + /* In location 0 we have an ivec3 that has the offset from dest to + * source in the first two components and the stride in the third. + */ + nir_variable *tex_off_in = nir_variable_create(b.shader, nir_var_shader_in, + ivec3, "v_tex_off"); + tex_off_in->data.location = VARYING_SLOT_VAR0; + tex_off_in->data.interpolation = INTERP_MODE_FLAT; + + /* In location 1 we have a uvec4 that gives us the bounds of the + * destination. We need to discard if we get outside this boundary. + */ + nir_variable *bounds_in = nir_variable_create(b.shader, nir_var_shader_in, + uvec4, "v_bounds"); + bounds_in->data.location = VARYING_SLOT_VAR1; + bounds_in->data.interpolation = INTERP_MODE_FLAT; + + nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "f_color"); + color_out->data.location = FRAG_RESULT_DATA0; + + nir_ssa_def *frag_coord_int = nir_f2i(&b, nir_load_var(&b, frag_coord_in)); + nir_ssa_def *x_Y = nir_channel(&b, frag_coord_int, 0); + nir_ssa_def *y_Y = nir_channel(&b, frag_coord_int, 1); + + /* Compute the W-tiled position from the Y-tiled position */ + nir_ssa_def *x_W = nir_iand(&b, x_Y, nir_imm_int(&b, 0xffffff80)); + x_W = nir_ushr(&b, x_W, nir_imm_int(&b, 1)); + x_W = nir_copy_bits(&b, x_W, 0, x_Y, 0, 1); + x_W = nir_copy_bits(&b, x_W, 1, x_Y, 2, 1); + x_W = nir_copy_bits(&b, x_W, 2, y_Y, 0, 1); + x_W = nir_copy_bits(&b, x_W, 3, x_Y, 4, 3); + + nir_ssa_def *y_W = nir_iand(&b, y_Y, nir_imm_int(&b, 0xffffffe0)); + y_W = nir_ishl(&b, y_W, nir_imm_int(&b, 1)); + y_W = nir_copy_bits(&b, y_W, 0, x_Y, 1, 1); + y_W = nir_copy_bits(&b, y_W, 1, x_Y, 3, 1); + y_W = nir_copy_bits(&b, y_W, 2, y_Y, 1, 4); + + /* Figure out if we are out-of-bounds and discard */ + nir_ssa_def *bounds = nir_load_var(&b, bounds_in); + nir_ssa_def *oob = + nir_ior(&b, nir_ult(&b, x_W, nir_channel(&b, bounds, 0)), + nir_ior(&b, nir_ult(&b, y_W, nir_channel(&b, bounds, 1)), + nir_ior(&b, nir_uge(&b, x_W, nir_channel(&b, bounds, 2)), + nir_uge(&b, y_W, nir_channel(&b, bounds, 3))))); + + nir_intrinsic_instr *discard = + nir_intrinsic_instr_create(b.shader, nir_intrinsic_discard_if); + discard->src[0] = nir_src_for_ssa(oob); + nir_builder_instr_insert(&b, &discard->instr); + + nir_ssa_def *tex_off = nir_channels(&b, nir_load_var(&b, tex_off_in), 0x3); + nir_ssa_def *tex_pos = nir_iadd(&b, nir_vec2(&b, x_W, y_W), tex_off); + nir_ssa_def *tex_pitch = nir_channel(&b, nir_load_var(&b, tex_off_in), 2); + + nir_ssa_def *color = txf_func(&b, device, tex_pos, tex_pitch); + nir_store_var(&b, color_out, color, 0xf); return b.shader; } @@ -439,57 +1247,88 @@ build_nir_copy_fragment_shader() void anv_device_finish_meta_blit2d_state(struct anv_device *device) { - anv_DestroyRenderPass(anv_device_to_handle(device), - device->meta_state.blit2d.render_pass, - &device->meta_state.alloc); - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit2d.pipeline_2d_src, - &device->meta_state.alloc); - anv_DestroyPipelineLayout(anv_device_to_handle(device), - device->meta_state.blit2d.pipeline_layout, - &device->meta_state.alloc); - anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), - device->meta_state.blit2d.ds_layout, - &device->meta_state.alloc); + if (device->meta_state.blit2d.render_pass) { + anv_DestroyRenderPass(anv_device_to_handle(device), + device->meta_state.blit2d.render_pass, + &device->meta_state.alloc); + } + + if (device->meta_state.blit2d.img_p_layout) { + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit2d.img_p_layout, + &device->meta_state.alloc); + } + + if (device->meta_state.blit2d.img_ds_layout) { + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit2d.img_ds_layout, + &device->meta_state.alloc); + } + + if (device->meta_state.blit2d.buf_p_layout) { + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit2d.buf_p_layout, + &device->meta_state.alloc); + } + + if (device->meta_state.blit2d.buf_ds_layout) { + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit2d.buf_ds_layout, + &device->meta_state.alloc); + } + + for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) { + for (unsigned dst = 0; dst < BLIT2D_NUM_DST_TYPES; dst++) { + if (device->meta_state.blit2d.pipelines[src][dst]) { + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit2d.pipelines[src][dst], + &device->meta_state.alloc); + } + } + } } -VkResult -anv_device_init_meta_blit2d_state(struct anv_device *device) +static VkResult +blit2d_init_pipeline(struct anv_device *device, + enum blit2d_src_type src_type, + enum blit2d_dst_type dst_type) { VkResult result; - result = anv_CreateRenderPass(anv_device_to_handle(device), - &(VkRenderPassCreateInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = &(VkAttachmentDescription) { - .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - .subpassCount = 1, - .pSubpasses = &(VkSubpassDescription) { - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputAttachmentCount = 0, - .colorAttachmentCount = 1, - .pColorAttachments = &(VkAttachmentReference) { - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .pResolveAttachments = NULL, - .pDepthStencilAttachment = &(VkAttachmentReference) { - .attachment = VK_ATTACHMENT_UNUSED, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .preserveAttachmentCount = 1, - .pPreserveAttachments = (uint32_t[]) { 0 }, - }, - .dependencyCount = 0, - }, &device->meta_state.alloc, &device->meta_state.blit2d.render_pass); - if (result != VK_SUCCESS) - goto fail; + texel_fetch_build_func src_func; + switch (src_type) { + case BLIT2D_SRC_TYPE_NORMAL: + src_func = build_nir_texel_fetch; + break; + case BLIT2D_SRC_TYPE_W_DETILE: + src_func = build_nir_w_tiled_fetch; + break; + default: + unreachable("Invalid blit2d source type"); + } + + const VkPipelineVertexInputStateCreateInfo *vi_create_info; + struct anv_shader_module fs = { .nir = NULL }; + switch (dst_type) { + case BLIT2D_DST_TYPE_NORMAL: + fs.nir = build_nir_copy_fragment_shader(device, src_func); + vi_create_info = &normal_vi_create_info; + break; + case BLIT2D_DST_TYPE_W_TILE: + fs.nir = build_nir_w_tiled_fragment_shader(device, src_func); + vi_create_info = &w_tiled_vi_create_info; + break; + case BLIT2D_DST_TYPE_RGB: + /* RGB destinations and W-detiling don't mix */ + if (src_type != BLIT2D_SRC_TYPE_NORMAL) + return VK_SUCCESS; + + fs.nir = build_nir_rgb_fragment_shader(device, src_func); + vi_create_info = &rgb_vi_create_info; + break; + default: + return VK_SUCCESS; + } /* We don't use a vertex shader for blitting, but instead build and pass * the VUEs directly to the rasterization backend. However, we do need @@ -500,81 +1339,6 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) .nir = build_nir_vertex_shader(), }; - struct anv_shader_module fs_2d = { - .nir = build_nir_copy_fragment_shader(), - }; - - VkPipelineVertexInputStateCreateInfo vi_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .vertexBindingDescriptionCount = 2, - .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { - { - .binding = 0, - .stride = 0, - .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE - }, - { - .binding = 1, - .stride = 5 * sizeof(float), - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX - }, - }, - .vertexAttributeDescriptionCount = 3, - .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { - { - /* VUE Header */ - .location = 0, - .binding = 0, - .format = VK_FORMAT_R32G32B32A32_UINT, - .offset = 0 - }, - { - /* Position */ - .location = 1, - .binding = 1, - .format = VK_FORMAT_R32G32_SFLOAT, - .offset = 0 - }, - { - /* Texture Coordinate */ - .location = 2, - .binding = 1, - .format = VK_FORMAT_R32G32B32_SFLOAT, - .offset = 8 - } - } - }; - - VkDescriptorSetLayoutCreateInfo ds_layout_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .bindingCount = 1, - .pBindings = (VkDescriptorSetLayoutBinding[]) { - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - .pImmutableSamplers = NULL - }, - } - }; - result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), - &ds_layout_info, - &device->meta_state.alloc, - &device->meta_state.blit2d.ds_layout); - if (result != VK_SUCCESS) - goto fail_render_pass; - - result = anv_CreatePipelineLayout(anv_device_to_handle(device), - &(VkPipelineLayoutCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .setLayoutCount = 1, - .pSetLayouts = &device->meta_state.blit2d.ds_layout, - }, - &device->meta_state.alloc, &device->meta_state.blit2d.pipeline_layout); - if (result != VK_SUCCESS) - goto fail_descriptor_set_layout; - VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = { { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, @@ -585,7 +1349,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) }, { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */ + .module = anv_shader_module_to_handle(&fs), .pName = "main", .pSpecializationInfo = NULL }, @@ -595,7 +1359,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .stageCount = ARRAY_SIZE(pipeline_shader_stages), .pStages = pipeline_shader_stages, - .pVertexInputState = &vi_create_info, + .pVertexInputState = vi_create_info, .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, @@ -646,7 +1410,7 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) }, }, .flags = 0, - .layout = device->meta_state.blit2d.pipeline_layout, + .layout = device->meta_state.blit2d.img_p_layout, .renderPass = device->meta_state.blit2d.render_pass, .subpass = 0, }; @@ -654,40 +1418,127 @@ anv_device_init_meta_blit2d_state(struct anv_device *device) const struct anv_graphics_pipeline_create_info anv_pipeline_info = { .color_attachment_count = -1, .use_repclear = false, - .disable_viewport = true, - .disable_scissor = true, .disable_vs = true, .use_rectlist = true }; - pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d); result = anv_graphics_pipeline_create(anv_device_to_handle(device), VK_NULL_HANDLE, &vk_pipeline_info, &anv_pipeline_info, - &device->meta_state.alloc, &device->meta_state.blit2d.pipeline_2d_src); - if (result != VK_SUCCESS) - goto fail_pipeline_layout; + &device->meta_state.alloc, + &device->meta_state.blit2d.pipelines[src_type][dst_type]); ralloc_free(vs.nir); - ralloc_free(fs_2d.nir); + ralloc_free(fs.nir); - return VK_SUCCESS; + return result; +} - fail_pipeline_layout: - anv_DestroyPipelineLayout(anv_device_to_handle(device), - device->meta_state.blit2d.pipeline_layout, - &device->meta_state.alloc); - fail_descriptor_set_layout: - anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), - device->meta_state.blit2d.ds_layout, - &device->meta_state.alloc); - fail_render_pass: - anv_DestroyRenderPass(anv_device_to_handle(device), - device->meta_state.blit2d.render_pass, - &device->meta_state.alloc); +VkResult +anv_device_init_meta_blit2d_state(struct anv_device *device) +{ + VkResult result; - ralloc_free(vs.nir); - ralloc_free(fs_2d.nir); - fail: + zero(device->meta_state.blit2d); + + result = anv_CreateRenderPass(anv_device_to_handle(device), + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 1, + .pColorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = &(VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .preserveAttachmentCount = 1, + .pPreserveAttachments = (uint32_t[]) { 0 }, + }, + .dependencyCount = 0, + }, &device->meta_state.alloc, &device->meta_state.blit2d.render_pass); + if (result != VK_SUCCESS) + goto fail; + + result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), + &(VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL + }, + } + }, &device->meta_state.alloc, &device->meta_state.blit2d.img_ds_layout); + if (result != VK_SUCCESS) + goto fail; + + result = anv_CreatePipelineLayout(anv_device_to_handle(device), + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &device->meta_state.blit2d.img_ds_layout, + }, + &device->meta_state.alloc, &device->meta_state.blit2d.img_p_layout); + if (result != VK_SUCCESS) + goto fail; + + result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), + &(VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL + }, + } + }, &device->meta_state.alloc, &device->meta_state.blit2d.buf_ds_layout); + if (result != VK_SUCCESS) + goto fail; + + result = anv_CreatePipelineLayout(anv_device_to_handle(device), + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &device->meta_state.blit2d.buf_ds_layout, + }, + &device->meta_state.alloc, &device->meta_state.blit2d.buf_p_layout); + if (result != VK_SUCCESS) + goto fail; + + for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) { + for (unsigned dst = 0; dst < BLIT2D_NUM_DST_TYPES; dst++) { + result = blit2d_init_pipeline(device, src, dst); + if (result != VK_SUCCESS) + goto fail; + } + } + + return VK_SUCCESS; + +fail: + anv_device_finish_meta_blit2d_state(device); return result; }