From: Eric Anholt Date: Mon, 2 Dec 2019 22:32:53 +0000 (-0800) Subject: turnip: Add basic SSBO support. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e46da7dbeae4b1138fc9e0db7a144d8edbed50e5;p=mesa.git turnip: Add basic SSBO support. This is enough to pass dEQP-VK.binding_model.shader_access.primary_cmd_buf.storage_buffer.fragment.single_descriptor.* with fragmentStoresAndAtomics set, and thus to be able to start working on compute. I haven't enabled that flag yet, because it also implies image load/store support, which I haven't filled in. Reviewed-by: Jonathan Marek --- diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 516bbc19e5b..47f95e4b00a 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -2174,6 +2174,7 @@ enum tu_draw_state_group_id TU_DRAW_STATE_FS_CONST, TU_DRAW_STATE_VS_TEX, TU_DRAW_STATE_FS_TEX, + TU_DRAW_STATE_FS_IBO, TU_DRAW_STATE_COUNT, }; @@ -2495,6 +2496,70 @@ tu6_emit_textures(struct tu_device *device, struct tu_cs *draw_state, return tu_cs_end_sub_stream(draw_state, &cs); } +static struct tu_cs_entry +tu6_emit_ibo(struct tu_device *device, struct tu_cs *draw_state, + const struct tu_pipeline *pipeline, + struct tu_descriptor_state *descriptors_state, + gl_shader_stage type) +{ + const struct tu_program_descriptor_linkage *link = + &pipeline->program.link[type]; + + uint32_t size = link->image_mapping.num_ibo * A6XX_TEX_CONST_DWORDS; + if (!size) + return (struct tu_cs_entry) {}; + + struct tu_cs cs; + tu_cs_begin_sub_stream(device, draw_state, size, &cs); + + for (unsigned i = 0; i < link->image_mapping.num_ibo; i++) { + unsigned idx = link->image_mapping.ibo_to_image[i]; + + if (idx & IBO_SSBO) { + idx &= ~IBO_SSBO; + + uint64_t va = buffer_ptr(descriptors_state, &link->ssbo_map, idx); + /* We don't expose robustBufferAccess, so leave the size unlimited. */ + uint32_t sz = MAX_STORAGE_BUFFER_RANGE / 4; + + tu_cs_emit(&cs, A6XX_IBO_0_FMT(TFMT6_32_UINT)); + tu_cs_emit(&cs, + A6XX_IBO_1_WIDTH(sz & MASK(15)) | + A6XX_IBO_1_HEIGHT(sz >> 15)); + tu_cs_emit(&cs, + A6XX_IBO_2_UNK4 | + A6XX_IBO_2_UNK31 | + A6XX_IBO_2_TYPE(A6XX_TEX_1D)); + tu_cs_emit(&cs, 0); + tu_cs_emit_qw(&cs, va); + for (int i = 6; i < A6XX_TEX_CONST_DWORDS; i++) + tu_cs_emit(&cs, 0); + } else { + tu_finishme("Emit images"); + } + } + + struct tu_cs_entry entry = tu_cs_end_sub_stream(draw_state, &cs); + + uint64_t ibo_addr = entry.bo->iova + entry.offset; + + tu_cs_begin_sub_stream(device, draw_state, 64, &cs); + + /* emit texture state: */ + tu_cs_emit_pkt7(&cs, CP_LOAD_STATE6, 3); + tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(0) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(SB6_IBO) | + CP_LOAD_STATE6_0_NUM_UNIT(link->image_mapping.num_ibo)); + tu_cs_emit_qw(&cs, ibo_addr); /* SRC_ADDR_LO/HI */ + + tu_cs_emit_pkt4(&cs, REG_A6XX_SP_IBO_LO, 2); + tu_cs_emit_qw(&cs, ibo_addr); /* SRC_ADDR_LO/HI */ + + return tu_cs_end_sub_stream(draw_state, &cs); +} + static void tu6_emit_border_color(struct tu_cmd_buffer *cmd, struct tu_cs *cs) @@ -2679,6 +2744,13 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd, descriptors_state, MESA_SHADER_FRAGMENT, &needs_border) }; + draw_state_groups[draw_state_group_count++] = + (struct tu_draw_state_group) { + .id = TU_DRAW_STATE_FS_IBO, + .enable_mask = 0x6, + .ib = tu6_emit_ibo(cmd->device, &cmd->draw_state, pipeline, + descriptors_state, MESA_SHADER_FRAGMENT) + }; if (needs_border) tu6_emit_border_color(cmd, cs); diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index 7fbec1ba619..8a350a054b6 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -725,7 +725,7 @@ tu_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, .maxImageArrayLayers = (1 << 11), .maxTexelBufferElements = 128 * 1024 * 1024, .maxUniformBufferRange = UINT32_MAX, - .maxStorageBufferRange = UINT32_MAX, + .maxStorageBufferRange = MAX_STORAGE_BUFFER_RANGE, .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, .maxMemoryAllocationCount = UINT32_MAX, .maxSamplerAllocationCount = 64 * 1024, diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index 290adffe2e0..547115b0359 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -450,7 +450,8 @@ tu6_emit_fs_config(struct tu_cs *cs, const struct ir3_shader_variant *fs) sp_fs_ctrl |= A6XX_SP_FS_CTRL_REG0_PIXLODENABLE; uint32_t sp_fs_config = A6XX_SP_FS_CONFIG_NTEX(fs->num_samp) | - A6XX_SP_FS_CONFIG_NSAMP(fs->num_samp); + A6XX_SP_FS_CONFIG_NSAMP(fs->num_samp) | + A6XX_SP_FS_CONFIG_NIBO(fs->image_mapping.num_ibo); if (fs->instrlen) sp_fs_config |= A6XX_SP_FS_CONFIG_ENABLED; @@ -470,6 +471,9 @@ tu6_emit_fs_config(struct tu_cs *cs, const struct ir3_shader_variant *fs) tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_FS_CNTL, 1); tu_cs_emit(cs, A6XX_HLSQ_FS_CNTL_CONSTLEN(align(fs->constlen, 4)) | A6XX_HLSQ_FS_CNTL_ENABLED); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_IBO_COUNT, 1); + tu_cs_emit(cs, fs->image_mapping.num_ibo); } static void diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 3b0096d1688..6387fa50501 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -93,6 +93,8 @@ typedef uint32_t xcb_window_t; #define NUM_META_FS_KEYS 13 #define TU_MAX_DRM_DEVICES 8 #define MAX_VIEWS 8 +/* The Qualcomm driver exposes 0x20000058 */ +#define MAX_STORAGE_BUFFER_RANGE 0x20000000 #define NUM_DEPTH_CLEAR_PIPELINES 3