From a92d2e11095d9f1f8bc1188fd3d2b8391acc4591 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Tue, 21 Apr 2020 12:14:23 -0400 Subject: [PATCH] turnip: implement VK_EXT_sample_locations Passes tests in: dEQP-VK.pipeline.multisample.sample_locations_ext.* Note that these tests fail because of gl_PrimitiveID not working correctly: dEQP-VK.pipeline.multisample.sample_locations_ext.verify_location.* Signed-off-by: Jonathan Marek Part-of: --- src/freedreno/registers/a6xx.xml | 39 +++++++++----- src/freedreno/vulkan/tu_cmd_buffer.c | 25 ++++----- src/freedreno/vulkan/tu_device.c | 29 ++++++++++ src/freedreno/vulkan/tu_extensions.py | 1 + src/freedreno/vulkan/tu_pipeline.c | 54 +++++++++++++++++++ src/freedreno/vulkan/tu_private.h | 6 ++- src/freedreno/vulkan/tu_shader.c | 10 +++- src/gallium/drivers/freedreno/a6xx/fd6_emit.c | 10 ++-- src/gallium/drivers/freedreno/a6xx/fd6_gmem.c | 9 ---- 9 files changed, 135 insertions(+), 48 deletions(-) diff --git a/src/freedreno/registers/a6xx.xml b/src/freedreno/registers/a6xx.xml index a4a3af50ece..a2bda8aee10 100644 --- a/src/freedreno/registers/a6xx.xml +++ b/src/freedreno/registers/a6xx.xml @@ -1933,12 +1933,25 @@ to upconvert to 32b float internally? - - - - - - + + + + + + + + + + + + + + + + + + + @@ -2058,12 +2071,9 @@ to upconvert to 32b float internally? - - - - - - + + + - + + + diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 6cf74b3f0c4..f830f07e2ef 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -692,15 +692,6 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd, tu_cs_emit_pkt7(cs, CP_SET_MODE, 1); tu_cs_emit(cs, 0x0); - - tu_cs_emit_regs(cs, - A6XX_RB_UNKNOWN_8804(0)); - - tu_cs_emit_regs(cs, - A6XX_SP_TP_UNKNOWN_B304(0)); - - tu_cs_emit_regs(cs, - A6XX_GRAS_UNKNOWN_80A4(0)); } else { tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1); tu_cs_emit(cs, 0x1); @@ -852,14 +843,7 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9981, 0x3); tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9E72, 0); tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9108, 0x3); - tu_cs_emit_write_reg(cs, REG_A6XX_SP_TP_UNKNOWN_B304, 0); tu_cs_emit_write_reg(cs, REG_A6XX_SP_TP_UNKNOWN_B309, 0x000000a2); - tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8804, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A4, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A5, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A6, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8805, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8806, 0); tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8878, 0); tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8879, 0); tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_CONTROL_5_REG, 0xfc); @@ -2152,6 +2136,15 @@ tu_CmdSetStencilReference(VkCommandBuffer commandBuffer, cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; } +void +tu_CmdSetSampleLocationsEXT(VkCommandBuffer commandBuffer, + const VkSampleLocationsInfoEXT* pSampleLocationsInfo) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + + tu6_emit_sample_locations(&cmd->draw_cs, pSampleLocationsInfo); +} + void tu_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index 006f7d4e73b..73c264417eb 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -928,6 +928,22 @@ tu_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, properties->transformFeedbackDraw = true; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: { + VkPhysicalDeviceSampleLocationsPropertiesEXT *properties = + (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext; + properties->sampleLocationSampleCounts = 0; + if (pdevice->supported_extensions.EXT_sample_locations) { + properties->sampleLocationSampleCounts = + VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT; + } + properties->maxSampleLocationGridSize = (VkExtent2D) { 1 , 1 }; + properties->sampleLocationCoordinateRange[0] = 0.0f; + properties->sampleLocationCoordinateRange[1] = 0.9375f; + properties->sampleLocationSubPixelBits = 4; + properties->variableSampleLocations = true; + break; + } + default: break; } @@ -2319,3 +2335,16 @@ tu_GetDeviceGroupPeerMemoryFeatures( VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT | VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT; } + +void tu_GetPhysicalDeviceMultisamplePropertiesEXT( + VkPhysicalDevice physicalDevice, + VkSampleCountFlagBits samples, + VkMultisamplePropertiesEXT* pMultisampleProperties) +{ + TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice); + + if (samples <= VK_SAMPLE_COUNT_4_BIT && pdevice->supported_extensions.EXT_sample_locations) + pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 1, 1 }; + else + pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 }; +} diff --git a/src/freedreno/vulkan/tu_extensions.py b/src/freedreno/vulkan/tu_extensions.py index 574e9411c08..498b38613b6 100644 --- a/src/freedreno/vulkan/tu_extensions.py +++ b/src/freedreno/vulkan/tu_extensions.py @@ -77,6 +77,7 @@ EXTENSIONS = [ Extension('VK_KHR_external_memory_fd', 1, True), Extension('VK_EXT_external_memory_dma_buf', 1, True), Extension('VK_EXT_image_drm_format_modifier', 1, False), + Extension('VK_EXT_sample_locations', 1, 'device->gpu_id == 650'), Extension('VK_EXT_transform_feedback', 1, True), Extension('VK_ANDROID_native_buffer', 1, True), Extension('VK_KHR_external_semaphore_fd', 1, True), diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index c3a467ebbfc..d9ffa2411fe 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -325,6 +325,8 @@ tu_dynamic_state_bit(VkDynamicState state) return TU_DYNAMIC_STENCIL_WRITE_MASK; case VK_DYNAMIC_STATE_STENCIL_REFERENCE: return TU_DYNAMIC_STENCIL_REFERENCE; + case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT: + return TU_DYNAMIC_SAMPLE_LOCATIONS; default: unreachable("invalid dynamic state"); return 0; @@ -1733,6 +1735,47 @@ tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissor) A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(max.y - 1)); } +void +tu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp_loc) +{ + if (!samp_loc) { + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SAMPLE_CONFIG, 1); + tu_cs_emit(cs, 0); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_CONFIG, 1); + tu_cs_emit(cs, 0); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_SAMPLE_CONFIG, 1); + tu_cs_emit(cs, 0); + return; + } + + assert(samp_loc->sampleLocationsPerPixel == samp_loc->sampleLocationsCount); + assert(samp_loc->sampleLocationGridSize.width == 1); + assert(samp_loc->sampleLocationGridSize.height == 1); + + uint32_t sample_config = + A6XX_RB_SAMPLE_CONFIG_LOCATION_ENABLE; + uint32_t sample_locations = 0; + for (uint32_t i = 0; i < samp_loc->sampleLocationsCount; i++) { + sample_locations |= + (A6XX_RB_SAMPLE_LOCATION_0_SAMPLE_0_X(samp_loc->pSampleLocations[i].x) | + A6XX_RB_SAMPLE_LOCATION_0_SAMPLE_0_Y(samp_loc->pSampleLocations[i].y)) << i*8; + } + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SAMPLE_CONFIG, 2); + tu_cs_emit(cs, sample_config); + tu_cs_emit(cs, sample_locations); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_CONFIG, 2); + tu_cs_emit(cs, sample_config); + tu_cs_emit(cs, sample_locations); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_SAMPLE_CONFIG, 2); + tu_cs_emit(cs, sample_config); + tu_cs_emit(cs, sample_locations); +} + static void tu6_emit_gras_unknowns(struct tu_cs *cs) { @@ -2415,6 +2458,17 @@ tu_pipeline_builder_parse_multisample_and_color_blend( if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_BLEND_CONSTANTS)) tu6_emit_blend_constants(&blend_cs, blend_info->blendConstants); + if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_SAMPLE_LOCATIONS)) { + const struct VkPipelineSampleLocationsStateCreateInfoEXT *sample_locations = + vk_find_struct_const(msaa_info->pNext, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT); + const VkSampleLocationsInfoEXT *samp_loc = NULL; + + if (sample_locations && sample_locations->sampleLocationsEnable) + samp_loc = &sample_locations->sampleLocationsInfo; + + tu6_emit_sample_locations(&blend_cs, samp_loc); + } + tu6_emit_blend_control(&blend_cs, blend_enable_mask, msaa_info); pipeline->blend.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &blend_cs); diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index eb026998293..dfd9d5101b5 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -722,7 +722,8 @@ enum tu_dynamic_state_bits TU_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, TU_DYNAMIC_STENCIL_REFERENCE = 1 << 8, TU_DYNAMIC_DISCARD_RECTANGLE = 1 << 9, - TU_DYNAMIC_ALL = (1 << 10) - 1, + TU_DYNAMIC_SAMPLE_LOCATIONS = 1 << 10, + TU_DYNAMIC_ALL = (1 << 11) - 1, }; struct tu_vertex_binding @@ -1265,6 +1266,9 @@ tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport); void tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissor); +void +tu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp_loc); + void tu6_emit_gras_su_cntl(struct tu_cs *cs, uint32_t gras_su_cntl, diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c index aa07f5df066..d58209c404c 100644 --- a/src/freedreno/vulkan/tu_shader.c +++ b/src/freedreno/vulkan/tu_shader.c @@ -26,6 +26,7 @@ #include "spirv/nir_spirv.h" #include "util/mesa-sha1.h" #include "nir/nir_xfb_info.h" +#include "vk_util.h" #include "ir3/ir3_nir.h" @@ -605,9 +606,16 @@ tu_shader_compile_options_init( } } + const VkPipelineMultisampleStateCreateInfo *msaa_info = pipeline_info->pMultisampleState; + const struct VkPipelineSampleLocationsStateCreateInfoEXT *sample_locations = + vk_find_struct_const(msaa_info->pNext, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT); if (!pipeline_info->pRasterizationState->rasterizerDiscardEnable && - pipeline_info->pMultisampleState->rasterizationSamples > 1) + (msaa_info->rasterizationSamples > 1 || + /* also set msaa key when sample location is not the default + * since this affects varying interpolation */ + (sample_locations && sample_locations->sampleLocationsEnable))) { msaa = true; + } } *options = (struct tu_shader_compile_options) { diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index 8fcb603bfa1..024b139d130 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -1322,17 +1322,13 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) WRITE(REG_A6XX_PC_UNKNOWN_9981, 0x3); WRITE(REG_A6XX_PC_UNKNOWN_9E72, 0); WRITE(REG_A6XX_VPC_UNKNOWN_9108, 0x3); - WRITE(REG_A6XX_SP_TP_UNKNOWN_B304, 0); + WRITE(REG_A6XX_SP_TP_SAMPLE_CONFIG, 0); /* NOTE blob seems to (mostly?) use 0xb2 for SP_TP_UNKNOWN_B309 * but this seems to kill texture gather offsets. */ WRITE(REG_A6XX_SP_TP_UNKNOWN_B309, 0xa2); - WRITE(REG_A6XX_RB_UNKNOWN_8804, 0); - WRITE(REG_A6XX_GRAS_UNKNOWN_80A4, 0); - WRITE(REG_A6XX_GRAS_UNKNOWN_80A5, 0); - WRITE(REG_A6XX_GRAS_UNKNOWN_80A6, 0); - WRITE(REG_A6XX_RB_UNKNOWN_8805, 0); - WRITE(REG_A6XX_RB_UNKNOWN_8806, 0); + WRITE(REG_A6XX_RB_SAMPLE_CONFIG, 0); + WRITE(REG_A6XX_GRAS_SAMPLE_CONFIG, 0); WRITE(REG_A6XX_RB_UNKNOWN_8878, 0); WRITE(REG_A6XX_RB_UNKNOWN_8879, 0); WRITE(REG_A6XX_HLSQ_CONTROL_5_REG, 0xfc); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c index f56b3422d55..b42287486a8 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c @@ -889,15 +889,6 @@ fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) OUT_PKT7(ring, CP_SET_MODE, 1); OUT_RING(ring, 0x0); - - OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8804, 1); - OUT_RING(ring, 0x0); - - OUT_PKT4(ring, REG_A6XX_SP_TP_UNKNOWN_B304, 1); - OUT_RING(ring, 0x0); - - OUT_PKT4(ring, REG_A6XX_GRAS_UNKNOWN_80A4, 1); - OUT_RING(ring, 0x0); } else { set_window_offset(ring, x1, y1); -- 2.30.2