From: Jonathan Marek Date: Mon, 27 Jul 2020 17:20:04 +0000 (-0400) Subject: turnip: implement VK_EXT_custom_border_color X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=commitdiff_plain;h=0c97e601a422532ea673e842683982b1b4012e6d turnip: implement VK_EXT_custom_border_color Signed-off-by: Jonathan Marek Part-of: --- diff --git a/src/freedreno/registers/adreno/a6xx.xml b/src/freedreno/registers/adreno/a6xx.xml index 15314fbb736..8c5d61c5487 100644 --- a/src/freedreno/registers/adreno/a6xx.xml +++ b/src/freedreno/registers/adreno/a6xx.xml @@ -3689,7 +3689,7 @@ to upconvert to 32b float internally? - + diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 36d0e462edc..d647cb13f34 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -902,10 +902,10 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu_cs_emit_regs(cs, A6XX_SP_TP_BORDER_COLOR_BASE_ADDR(.bo = &dev->global_bo, - .bo_offset = gb_offset(border_color))); + .bo_offset = gb_offset(bcolor_builtin))); tu_cs_emit_regs(cs, A6XX_SP_PS_TP_BORDER_COLOR_BASE_ADDR(.bo = &dev->global_bo, - .bo_offset = gb_offset(border_color))); + .bo_offset = gb_offset(bcolor_builtin))); /* VSC buffers: * use vsc pitches from the largest values used so far with this device @@ -914,7 +914,7 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) * * if overflow is detected, the stream size is increased by 2x */ - mtx_lock(&dev->vsc_pitch_mtx); + mtx_lock(&dev->mutex); struct tu6_global *global = dev->global_bo.map; @@ -930,7 +930,7 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) cmd->vsc_prim_strm_pitch = dev->vsc_prim_strm_pitch; cmd->vsc_draw_strm_pitch = dev->vsc_draw_strm_pitch; - mtx_unlock(&dev->vsc_pitch_mtx); + mtx_unlock(&dev->mutex); struct tu_bo *vsc_bo; uint32_t size0 = cmd->vsc_prim_strm_pitch * MAX_VSC_PIPES + diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index 9dd95ce62bb..0ab94e2bf6e 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -603,6 +603,12 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, features->formatA4B4G4R4 = true; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: { + VkPhysicalDeviceCustomBorderColorFeaturesEXT *features = (void *) ext; + features->customBorderColors = true; + features->customBorderColorWithoutFormat = true; + break; + } default: break; } @@ -851,6 +857,11 @@ tu_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, props->maxVertexAttribDivisor = UINT32_MAX; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: { + VkPhysicalDeviceCustomBorderColorPropertiesEXT *props = (void *)ext; + props->maxCustomBorderColorSamplers = TU_BORDER_COLOR_COUNT; + break; + } default: break; } @@ -976,60 +987,6 @@ tu_get_device_extension_index(const char *name) return -1; } -struct PACKED bcolor_entry { - uint32_t fp32[4]; - uint16_t ui16[4]; - int16_t si16[4]; - uint16_t fp16[4]; - uint16_t rgb565; - uint16_t rgb5a1; - uint16_t rgba4; - uint8_t __pad0[2]; - uint8_t ui8[4]; - int8_t si8[4]; - uint32_t rgb10a2; - uint32_t z24; /* also s8? */ - uint16_t srgb[4]; /* appears to duplicate fp16[], but clamped, used for srgb */ - uint8_t __pad1[56]; -} border_color[] = { - [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = {}, - [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = {}, - [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { - .fp32[3] = 0x3f800000, - .ui16[3] = 0xffff, - .si16[3] = 0x7fff, - .fp16[3] = 0x3c00, - .rgb5a1 = 0x8000, - .rgba4 = 0xf000, - .ui8[3] = 0xff, - .si8[3] = 0x7f, - .rgb10a2 = 0xc0000000, - .srgb[3] = 0x3c00, - }, - [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { - .fp32[3] = 1, - .fp16[3] = 1, - }, - [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { - .fp32[0 ... 3] = 0x3f800000, - .ui16[0 ... 3] = 0xffff, - .si16[0 ... 3] = 0x7fff, - .fp16[0 ... 3] = 0x3c00, - .rgb565 = 0xffff, - .rgb5a1 = 0xffff, - .rgba4 = 0xffff, - .ui8[0 ... 3] = 0xff, - .si8[0 ... 3] = 0x7f, - .rgb10a2 = 0xffffffff, - .z24 = 0xffffff, - .srgb[0 ... 3] = 0x3c00, - }, - [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { - .fp32[0 ... 3] = 1, - .fp16[0 ... 3] = 1, - }, -}; - VkResult tu_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo, @@ -1039,6 +996,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, TU_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice); VkResult result; struct tu_device *device; + bool custom_border_colors = false; /* Check enabled features */ if (pCreateInfo->pEnabledFeatures) { @@ -1055,6 +1013,18 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, } } + vk_foreach_struct_const(ext, pCreateInfo->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: { + const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features = (const void *)ext; + custom_border_colors = border_color_features->customBorderColors; + break; + } + default: + break; + } + } + device = vk_zalloc2(&physical_device->instance->alloc, pAllocator, sizeof(*device), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); if (!device) @@ -1113,8 +1083,11 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, device->vsc_draw_strm_pitch = 0x1000 + VSC_PAD; device->vsc_prim_strm_pitch = 0x4000 + VSC_PAD; - STATIC_ASSERT(sizeof(border_color) == sizeof(((struct tu6_global*) 0)->border_color)); - result = tu_bo_init_new(device, &device->global_bo, sizeof(struct tu6_global)); + uint32_t global_size = sizeof(struct tu6_global); + if (custom_border_colors) + global_size += TU_BORDER_COLOR_COUNT * sizeof(struct bcolor_entry); + + result = tu_bo_init_new(device, &device->global_bo, global_size); if (result != VK_SUCCESS) goto fail_global_bo; @@ -1123,9 +1096,23 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, goto fail_global_bo_map; struct tu6_global *global = device->global_bo.map; - memcpy(global->border_color, border_color, sizeof(border_color)); + tu_init_clear_blit_shaders(device->global_bo.map); global->predicate = 0; - tu_init_clear_blit_shaders(global); + tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK], + &(VkClearColorValue) {}, false); + tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_INT_TRANSPARENT_BLACK], + &(VkClearColorValue) {}, true); + tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK], + &(VkClearColorValue) { .float32[3] = 1.0f }, false); + tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_INT_OPAQUE_BLACK], + &(VkClearColorValue) { .int32[3] = 1 }, true); + tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE], + &(VkClearColorValue) { .float32[0 ... 3] = 1.0f }, false); + tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_INT_OPAQUE_WHITE], + &(VkClearColorValue) { .int32[0 ... 3] = 1 }, true); + + /* initialize to ones so ffs can be used to find unused slots */ + BITSET_ONES(device->custom_border_color); VkPipelineCacheCreateInfo ci; ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; @@ -1144,7 +1131,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++) mtx_init(&device->scratch_bos[i].construct_mtx, mtx_plain); - mtx_init(&device->vsc_pitch_mtx, mtx_plain); + mtx_init(&device->mutex, mtx_plain); *pDevice = tu_device_to_handle(device); return VK_SUCCESS; @@ -1901,6 +1888,24 @@ tu_init_sampler(struct tu_device *device, vk_find_struct_const(pCreateInfo->pNext, SAMPLER_REDUCTION_MODE_CREATE_INFO); const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion = vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO); + const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color = + vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT); + /* for non-custom border colors, the VK enum is translated directly to an offset in + * the border color buffer. custom border colors are located immediately after the + * builtin colors, and thus an offset of TU_BORDER_COLOR_BUILTIN is added. + */ + uint32_t border_color = (unsigned) pCreateInfo->borderColor; + if (pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT || + pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT) { + mtx_lock(&device->mutex); + border_color = BITSET_FFS(device->custom_border_color); + BITSET_CLEAR(device->custom_border_color, border_color); + mtx_unlock(&device->mutex); + tu6_pack_border_color(device->global_bo.map + gb_offset(bcolor[border_color]), + &custom_border_color->customBorderColor, + pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT); + border_color += TU_BORDER_COLOR_BUILTIN; + } unsigned aniso = pCreateInfo->anisotropyEnable ? util_last_bit(MIN2((uint32_t)pCreateInfo->maxAnisotropy >> 1, 8)) : 0; @@ -1924,13 +1929,7 @@ tu_init_sampler(struct tu_device *device, A6XX_TEX_SAMP_1_MAX_LOD(max_lod) | COND(pCreateInfo->compareEnable, A6XX_TEX_SAMP_1_COMPARE_FUNC(tu6_compare_func(pCreateInfo->compareOp))); - /* This is an offset into the border_color BO, which we fill with all the - * possible Vulkan border colors in the correct order, so we can just use - * the Vulkan enum with no translation necessary. - */ - sampler->descriptor[2] = - A6XX_TEX_SAMP_2_BCOLOR_OFFSET((unsigned) pCreateInfo->borderColor * - sizeof(struct bcolor_entry)); + sampler->descriptor[2] = A6XX_TEX_SAMP_2_BCOLOR(border_color); sampler->descriptor[3] = 0; if (reduction) { @@ -1980,10 +1979,21 @@ tu_DestroySampler(VkDevice _device, { TU_FROM_HANDLE(tu_device, device, _device); TU_FROM_HANDLE(tu_sampler, sampler, _sampler); + uint32_t border_color; if (!sampler) return; + border_color = (sampler->descriptor[2] & A6XX_TEX_SAMP_2_BCOLOR__MASK) >> A6XX_TEX_SAMP_2_BCOLOR__SHIFT; + if (border_color >= TU_BORDER_COLOR_BUILTIN) { + border_color -= TU_BORDER_COLOR_BUILTIN; + /* if the sampler had a custom border color, free it. TODO: no lock */ + mtx_lock(&device->mutex); + assert(!BITSET_TEST(device->custom_border_color, border_color)); + BITSET_SET(device->custom_border_color, border_color); + mtx_unlock(&device->mutex); + } + vk_object_free(&device->vk, pAllocator, sampler); } diff --git a/src/freedreno/vulkan/tu_extensions.py b/src/freedreno/vulkan/tu_extensions.py index 50afba18920..4a2d37f94ed 100644 --- a/src/freedreno/vulkan/tu_extensions.py +++ b/src/freedreno/vulkan/tu_extensions.py @@ -91,6 +91,7 @@ EXTENSIONS = [ Extension('VK_KHR_draw_indirect_count', 1, True), Extension('VK_EXT_4444_formats', 1, True), Extension('VK_EXT_conditional_rendering', 1, True), + Extension('VK_EXT_custom_border_color', 12, True), ] MAX_API_VERSION = VkVersion(MAX_API_VERSION) diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 3d1ed0fe5f4..28ba3ec84c1 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -353,12 +353,12 @@ enum global_shader { GLOBAL_SH_COUNT, }; +#define TU_BORDER_COLOR_COUNT 4096 +#define TU_BORDER_COLOR_BUILTIN 6 + /* This struct defines the layout of the global_bo */ struct tu6_global { - /* 6 bcolor_entry entries, one for each VK_BORDER_COLOR */ - uint8_t border_color[128 * 6]; - /* clear/blit shaders, all <= 16 instrs (16 instr = 1 instrlen unit) */ instr_t shaders[GLOBAL_SH_COUNT][16]; @@ -375,6 +375,9 @@ struct tu6_global uint32_t offset; uint32_t pad[7]; } flush_base[4]; + + /* note: larger global bo will be used for customBorderColors */ + struct bcolor_entry bcolor_builtin[TU_BORDER_COLOR_BUILTIN], bcolor[]; }; #define gb_offset(member) offsetof(struct tu6_global, member) #define global_iova(cmd, member) ((cmd)->device->global_bo.iova + gb_offset(member)) @@ -417,7 +420,8 @@ struct tu_device uint32_t vsc_draw_strm_pitch; uint32_t vsc_prim_strm_pitch; - mtx_t vsc_pitch_mtx; + BITSET_DECLARE(custom_border_color, TU_BORDER_COLOR_COUNT); + mtx_t mutex; }; VkResult _tu_device_set_lost(struct tu_device *device, diff --git a/src/freedreno/vulkan/tu_util.h b/src/freedreno/vulkan/tu_util.h index e4dd3094fd3..266dca24548 100644 --- a/src/freedreno/vulkan/tu_util.h +++ b/src/freedreno/vulkan/tu_util.h @@ -14,6 +14,8 @@ #include "util/macros.h" #include "util/u_math.h" +#include "util/format/u_format_pack.h" +#include "util/format/u_format_zs.h" #include "compiler/shader_enums.h" #include "adreno_common.xml.h" @@ -250,4 +252,48 @@ tu6_polygon_mode(VkPolygonMode mode) } } +struct bcolor_entry { + uint32_t fp32[4]; + uint64_t ui16; + uint64_t si16; + uint64_t fp16; + uint16_t rgb565; + uint16_t rgb5a1; + uint16_t rgba4; + uint8_t __pad0[2]; + uint32_t ui8; + uint32_t si8; + uint32_t rgb10a2; + uint32_t z24; /* also s8? */ + uint64_t srgb; + uint8_t __pad1[56]; +} __attribute__((aligned(128))); + +static inline void +tu6_pack_border_color(struct bcolor_entry *bcolor, const VkClearColorValue *val, bool is_int) +{ + memcpy(bcolor->fp32, val, 4 * sizeof(float)); + if (is_int) { + /* TODO: clamp? */ + util_format_r16g16b16a16_uint_pack_unsigned((uint8_t*) &bcolor->fp16, + 0, val->uint32, 0, 1, 1); + return; + } +#define PACK_F(x, type) util_format_##type##_pack_rgba_float \ + ( (uint8_t*) (&bcolor->x), 0, val->float32, 0, 1, 1) + PACK_F(ui16, r16g16b16a16_unorm); + PACK_F(si16, r16g16b16a16_snorm); + PACK_F(fp16, r16g16b16a16_float); + PACK_F(rgb565, r5g6b5_unorm); + PACK_F(rgb5a1, r5g5b5a1_unorm); + PACK_F(rgba4, r4g4b4a4_unorm); + PACK_F(ui8, r8g8b8a8_unorm); + PACK_F(si8, r8g8b8a8_snorm); + PACK_F(rgb10a2, r10g10b10a2_unorm); + util_format_x8z24_unorm_pack_z_float((uint8_t*) &bcolor->z24, + 0, val->float32, 0, 1, 1); + PACK_F(srgb, r16g16b16a16_float); /* TODO: clamp? */ +#undef PACK_F +} + #endif /* TU_UTIL_H */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index 67d9a8f13ae..c055ac0d2f4 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -346,7 +346,7 @@ fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring, OUT_RING(state, sampler->texsamp0); OUT_RING(state, sampler->texsamp1); OUT_RING(state, sampler->texsamp2 | - A6XX_TEX_SAMP_2_BCOLOR_OFFSET((i + bcolor_offset) * sizeof(struct bcolor_entry))); + A6XX_TEX_SAMP_2_BCOLOR(i + bcolor_offset)); OUT_RING(state, sampler->texsamp3); needs_border |= sampler->needs_border; }