#include "compiler/glsl_types.h"
#include "util/debug.h"
#include "util/disk_cache.h"
+#include "util/u_atomic.h"
#include "vk_format.h"
#include "vk_util.h"
#include "drm-uapi/msm_drm.h"
+/* for fd_get_driver/device_uuid() */
+#include "freedreno/common/freedreno_uuid.h"
+
static int
tu_device_get_cache_uuid(uint16_t family, void *uuid)
{
return 0;
}
-static void
-tu_get_driver_uuid(void *uuid)
-{
- memset(uuid, 0, VK_UUID_SIZE);
- snprintf(uuid, VK_UUID_SIZE, "freedreno");
-}
-
-static void
-tu_get_device_uuid(void *uuid)
-{
- memset(uuid, 0, VK_UUID_SIZE);
-}
-
static VkResult
tu_bo_init(struct tu_device *dev,
struct tu_bo *bo,
switch (device->gpu_id) {
case 618:
- device->magic.RB_UNKNOWN_8E04_blit = 0x00100000;
device->ccu_offset_gmem = 0x7c000; /* 0x7e000 in some cases? */
device->ccu_offset_bypass = 0x10000;
+ device->tile_align_w = 64;
device->magic.PC_UNKNOWN_9805 = 0x0;
device->magic.SP_UNKNOWN_A0F8 = 0x0;
break;
case 630:
case 640:
- device->magic.RB_UNKNOWN_8E04_blit = 0x01000000;
device->ccu_offset_gmem = 0xf8000;
device->ccu_offset_bypass = 0x20000;
+ device->tile_align_w = 64;
device->magic.PC_UNKNOWN_9805 = 0x1;
device->magic.SP_UNKNOWN_A0F8 = 0x1;
break;
+ case 650:
+ device->ccu_offset_gmem = 0x114000;
+ device->ccu_offset_bypass = 0x30000;
+ device->tile_align_w = 96;
+ device->magic.PC_UNKNOWN_9805 = 0x2;
+ device->magic.SP_UNKNOWN_A0F8 = 0x2;
+ break;
default:
result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
"device %s is unsupported", device->name);
fprintf(stderr, "WARNING: tu is not a conformant vulkan implementation, "
"testing use only.\n");
- tu_get_driver_uuid(&device->device_uuid);
- tu_get_device_uuid(&device->device_uuid);
+ fd_get_driver_uuid(device->driver_uuid);
+ fd_get_device_uuid(device->device_uuid, device->gpu_id);
- tu_fill_device_extension_table(device, &device->supported_extensions);
+ tu_physical_device_get_supported_extensions(device, &device->supported_extensions);
if (result != VK_SUCCESS) {
vk_error(instance, result);
{ "nobin", TU_DEBUG_NOBIN },
{ "sysmem", TU_DEBUG_SYSMEM },
{ "forcebin", TU_DEBUG_FORCEBIN },
+ { "noubwc", TU_DEBUG_NOUBWC },
{ NULL, 0 }
};
const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
int index = tu_get_instance_extension_index(ext_name);
- if (index < 0 || !tu_supported_instance_extensions.extensions[index]) {
+ if (index < 0 || !tu_instance_extensions_supported.extensions[index]) {
vk_free2(&default_alloc, pAllocator, instance);
return vk_error(instance, VK_ERROR_EXTENSION_NOT_PRESENT);
}
memset(pFeatures, 0, sizeof(*pFeatures));
*pFeatures = (VkPhysicalDeviceFeatures) {
- .robustBufferAccess = false,
+ .robustBufferAccess = true,
.fullDrawIndexUint32 = true,
- .imageCubeArray = false,
+ .imageCubeArray = true,
.independentBlend = true,
.geometryShader = true,
- .tessellationShader = false,
+ .tessellationShader = true,
.sampleRateShading = true,
.dualSrcBlend = true,
.logicOp = true,
.depthBounds = false,
.wideLines = false,
.largePoints = false,
- .alphaToOne = false,
+ .alphaToOne = true,
.multiViewport = false,
.samplerAnisotropy = true,
.textureCompressionETC2 = true,
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
VkPhysicalDeviceSamplerYcbcrConversionFeatures *features =
(VkPhysicalDeviceSamplerYcbcrConversionFeatures *) ext;
- features->samplerYcbcrConversion = false;
+ features->samplerYcbcrConversion = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT: {
features->geometryStreams = false;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
+ VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
+ (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
+ features->indexTypeUint8 = true;
+ break;
+ }
default:
break;
}
.maxComputeWorkGroupInvocations = 2048,
.maxComputeWorkGroupSize = { 2048, 2048, 2048 },
.subPixelPrecisionBits = 8,
- .subTexelPrecisionBits = 4 /* FIXME */,
- .mipmapPrecisionBits = 4 /* FIXME */,
+ .subTexelPrecisionBits = 8,
+ .mipmapPrecisionBits = 8,
.maxDrawIndexedIndexValue = UINT32_MAX,
.maxDrawIndirectCount = UINT32_MAX,
- .maxSamplerLodBias = 16,
+ .maxSamplerLodBias = 4095.0 / 256.0, /* [-16, 15.99609375] */
.maxSamplerAnisotropy = 16,
.maxViewports = MAX_VIEWPORTS,
.maxViewportDimensions = { (1 << 14), (1 << 14) },
.minTexelBufferOffsetAlignment = 64,
.minUniformBufferOffsetAlignment = 64,
.minStorageBufferOffsetAlignment = 64,
- .minTexelOffset = -32,
- .maxTexelOffset = 31,
+ .minTexelOffset = -16,
+ .maxTexelOffset = 15,
.minTexelGatherOffset = -32,
.maxTexelGatherOffset = 31,
- .minInterpolationOffset = -2,
- .maxInterpolationOffset = 2,
- .subPixelInterpolationOffsetBits = 8,
+ .minInterpolationOffset = -0.5,
+ .maxInterpolationOffset = 0.4375,
+ .subPixelInterpolationOffsetBits = 4,
.maxFramebufferWidth = (1 << 14),
.maxFramebufferHeight = (1 << 14),
.maxFramebufferLayers = (1 << 10),
properties->maxTransformFeedbackStreamDataSize = 512;
properties->maxTransformFeedbackBufferDataSize = 512;
properties->maxTransformFeedbackBufferDataStride = 512;
- /* TODO: enable xfb query */
- properties->transformFeedbackQueries = false;
+ properties->transformFeedbackQueries = true;
properties->transformFeedbackStreamsLinesTriangles = false;
properties->transformFeedbackRasterizationStreamSelect = false;
properties->transformFeedbackDraw = true;
properties->variableSampleLocations = true;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES: {
+ VkPhysicalDeviceSamplerFilterMinmaxProperties *properties =
+ (VkPhysicalDeviceSamplerFilterMinmaxProperties *)ext;
+ properties->filterMinmaxImageComponentMapping = true;
+ properties->filterMinmaxSingleComponentFormats = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
+ VkPhysicalDeviceSubgroupProperties *properties =
+ (VkPhysicalDeviceSubgroupProperties *)ext;
+ properties->subgroupSize = 64;
+ properties->supportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
+ properties->supportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
+ VK_SUBGROUP_FEATURE_VOTE_BIT;
+ properties->quadOperationsInAllStages = false;
+ break;
+ }
default:
break;
if (!device->compiler)
goto fail_queues;
-#define VSC_DATA_SIZE(pitch) ((pitch) * 32 + 0x100) /* extra size to store VSC_SIZE */
-#define VSC_DATA2_SIZE(pitch) ((pitch) * 32)
+#define VSC_DRAW_STRM_SIZE(pitch) ((pitch) * 32 + 0x100) /* extra size to store VSC_SIZE */
+#define VSC_PRIM_STRM_SIZE(pitch) ((pitch) * 32)
- device->vsc_data_pitch = 0x440 * 4;
- device->vsc_data2_pitch = 0x1040 * 4;
+ device->vsc_draw_strm_pitch = 0x440 * 4;
+ device->vsc_prim_strm_pitch = 0x1040 * 4;
- result = tu_bo_init_new(device, &device->vsc_data, VSC_DATA_SIZE(device->vsc_data_pitch));
+ result = tu_bo_init_new(device, &device->vsc_draw_strm, VSC_DRAW_STRM_SIZE(device->vsc_draw_strm_pitch));
if (result != VK_SUCCESS)
goto fail_vsc_data;
- result = tu_bo_init_new(device, &device->vsc_data2, VSC_DATA2_SIZE(device->vsc_data2_pitch));
+ result = tu_bo_init_new(device, &device->vsc_prim_strm, VSC_PRIM_STRM_SIZE(device->vsc_prim_strm_pitch));
if (result != VK_SUCCESS)
goto fail_vsc_data2;
device->mem_cache = tu_pipeline_cache_from_handle(pc);
+ for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++)
+ mtx_init(&device->scratch_bos[i].construct_mtx, mtx_plain);
+
*pDevice = tu_device_to_handle(device);
return VK_SUCCESS;
tu_bo_finish(device, &device->border_color);
fail_border_color:
- tu_bo_finish(device, &device->vsc_data2);
+ tu_bo_finish(device, &device->vsc_prim_strm);
fail_vsc_data2:
- tu_bo_finish(device, &device->vsc_data);
+ tu_bo_finish(device, &device->vsc_draw_strm);
fail_vsc_data:
ralloc_free(device->compiler);
if (!device)
return;
- tu_bo_finish(device, &device->vsc_data);
- tu_bo_finish(device, &device->vsc_data2);
+ tu_bo_finish(device, &device->vsc_draw_strm);
+ tu_bo_finish(device, &device->vsc_prim_strm);
for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
for (unsigned q = 0; q < device->queue_count[i]; q++)
vk_free(&device->alloc, device->queues[i]);
}
+ for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++) {
+ if (device->scratch_bos[i].initialized)
+ tu_bo_finish(device, &device->scratch_bos[i].bo);
+ }
+
/* the compiler does not use pAllocator */
ralloc_free(device->compiler);
vk_free(&device->alloc, device);
}
+VkResult
+tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo)
+{
+ unsigned size_log2 = MAX2(util_logbase2_ceil64(size), MIN_SCRATCH_BO_SIZE_LOG2);
+ unsigned index = size_log2 - MIN_SCRATCH_BO_SIZE_LOG2;
+ assert(index < ARRAY_SIZE(dev->scratch_bos));
+
+ for (unsigned i = index; i < ARRAY_SIZE(dev->scratch_bos); i++) {
+ if (p_atomic_read(&dev->scratch_bos[i].initialized)) {
+ /* Fast path: just return the already-allocated BO. */
+ *bo = &dev->scratch_bos[i].bo;
+ return VK_SUCCESS;
+ }
+ }
+
+ /* Slow path: actually allocate the BO. We take a lock because the process
+ * of allocating it is slow, and we don't want to block the CPU while it
+ * finishes.
+ */
+ mtx_lock(&dev->scratch_bos[index].construct_mtx);
+
+ /* Another thread may have allocated it already while we were waiting on
+ * the lock. We need to check this in order to avoid double-allocating.
+ */
+ if (dev->scratch_bos[index].initialized) {
+ mtx_unlock(&dev->scratch_bos[index].construct_mtx);
+ *bo = &dev->scratch_bos[index].bo;
+ return VK_SUCCESS;
+ }
+
+ unsigned bo_size = 1ull << size_log2;
+ VkResult result = tu_bo_init_new(dev, &dev->scratch_bos[index].bo, bo_size);
+ if (result != VK_SUCCESS) {
+ mtx_unlock(&dev->scratch_bos[index].construct_mtx);
+ return result;
+ }
+
+ p_atomic_set(&dev->scratch_bos[index].initialized, true);
+
+ mtx_unlock(&dev->scratch_bos[index].construct_mtx);
+
+ *bo = &dev->scratch_bos[index].bo;
+ return VK_SUCCESS;
+}
+
VkResult
tu_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
VkLayerProperties *pProperties)
return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
for (int i = 0; i < TU_INSTANCE_EXTENSION_COUNT; i++) {
- if (tu_supported_instance_extensions.extensions[i]) {
+ if (tu_instance_extensions_supported.extensions[i]) {
vk_outarray_append(&out, prop) { *prop = tu_instance_extensions[i]; }
}
}
vk_free2(&device->alloc, pAllocator, fb);
}
-static enum a6xx_tex_clamp
-tu6_tex_wrap(VkSamplerAddressMode address_mode)
-{
- switch (address_mode) {
- case VK_SAMPLER_ADDRESS_MODE_REPEAT:
- return A6XX_TEX_REPEAT;
- case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
- return A6XX_TEX_MIRROR_REPEAT;
- case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
- return A6XX_TEX_CLAMP_TO_EDGE;
- case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
- return A6XX_TEX_CLAMP_TO_BORDER;
- case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
- /* only works for PoT.. need to emulate otherwise! */
- return A6XX_TEX_MIRROR_CLAMP;
- default:
- unreachable("illegal tex wrap mode");
- break;
- }
-}
-
-static enum a6xx_tex_filter
-tu6_tex_filter(VkFilter filter, unsigned aniso)
-{
- switch (filter) {
- case VK_FILTER_NEAREST:
- return A6XX_TEX_NEAREST;
- case VK_FILTER_LINEAR:
- return aniso ? A6XX_TEX_ANISO : A6XX_TEX_LINEAR;
- case VK_FILTER_CUBIC_EXT:
- return A6XX_TEX_CUBIC;
- default:
- unreachable("illegal texture filter");
- break;
- }
-}
-
-static inline enum adreno_compare_func
-tu6_compare_func(VkCompareOp op)
-{
- return (enum adreno_compare_func) op;
-}
-
static void
tu_init_sampler(struct tu_device *device,
struct tu_sampler *sampler,
const VkSamplerCreateInfo *pCreateInfo)
{
+ const struct VkSamplerReductionModeCreateInfo *reduction =
+ vk_find_struct_const(pCreateInfo->pNext, SAMPLER_REDUCTION_MODE_CREATE_INFO);
+ const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
+ vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO);
+
unsigned aniso = pCreateInfo->anisotropyEnable ?
util_last_bit(MIN2((uint32_t)pCreateInfo->maxAnisotropy >> 1, 8)) : 0;
bool miplinear = (pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_LINEAR);
+ float min_lod = CLAMP(pCreateInfo->minLod, 0.0f, 4095.0f / 256.0f);
+ float max_lod = CLAMP(pCreateInfo->maxLod, 0.0f, 4095.0f / 256.0f);
sampler->descriptor[0] =
COND(miplinear, A6XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
sampler->descriptor[1] =
/* COND(!cso->seamless_cube_map, A6XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) | */
COND(pCreateInfo->unnormalizedCoordinates, A6XX_TEX_SAMP_1_UNNORM_COORDS) |
- A6XX_TEX_SAMP_1_MIN_LOD(pCreateInfo->minLod) |
- A6XX_TEX_SAMP_1_MAX_LOD(pCreateInfo->maxLod) |
+ A6XX_TEX_SAMP_1_MIN_LOD(min_lod) |
+ A6XX_TEX_SAMP_1_MAX_LOD(max_lod) |
COND(pCreateInfo->compareEnable,
A6XX_TEX_SAMP_1_COMPARE_FUNC(tu6_compare_func(pCreateInfo->compareOp)));
/* This is an offset into the border_color BO, which we fill with all the
sizeof(struct bcolor_entry));
sampler->descriptor[3] = 0;
+ if (reduction) {
+ sampler->descriptor[2] |= A6XX_TEX_SAMP_2_REDUCTION_MODE(
+ tu6_reduction_mode(reduction->reductionMode));
+ }
+
+ sampler->ycbcr_sampler = ycbcr_conversion ?
+ tu_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion) : NULL;
+
+ if (sampler->ycbcr_sampler &&
+ sampler->ycbcr_sampler->chroma_filter == VK_FILTER_LINEAR) {
+ sampler->descriptor[2] |= A6XX_TEX_SAMP_2_CHROMA_LINEAR;
+ }
+
/* TODO:
* A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR disables mipmapping, but vk has no NONE mipfilter?
*/