X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_device.c;h=8989ec3553fbf78bcf98765612b786bbde53a045;hb=90819abb56f6b1a0cd4946b13b6caf24fb46e500;hp=62e1b9dba66ff174969e2f59f27b86c6ce2c57a7;hpb=1df586be12e53917cd71b996f1d6ed2de02e4fc3;p=mesa.git diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 62e1b9dba66..8989ec3553f 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -60,7 +60,7 @@ radv_device_get_cache_uuid(enum radeon_family family, void *uuid) memcpy(uuid, &mesa_timestamp, 4); memcpy((char*)uuid + 4, &llvm_timestamp, 4); memcpy((char*)uuid + 8, &f, 2); - snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv"); + snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv%zd", sizeof(void *)); return 0; } @@ -436,6 +436,7 @@ static const struct debug_control radv_debug_options[] = { {"errors", RADV_DEBUG_ERRORS}, {"startup", RADV_DEBUG_STARTUP}, {"checkir", RADV_DEBUG_CHECKIR}, + {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM}, {NULL, 0} }; @@ -479,6 +480,9 @@ radv_handle_per_app_options(struct radv_instance *instance, */ instance->perftest_flags |= RADV_PERFTEST_SISCHED; } + } else if (!strcmp(name, "DOOM_VFR")) { + /* Work around a Doom VFR game bug */ + instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS; } } @@ -684,6 +688,7 @@ void radv_GetPhysicalDeviceFeatures( VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures* pFeatures) { + RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); memset(pFeatures, 0, sizeof(*pFeatures)); *pFeatures = (VkPhysicalDeviceFeatures) { @@ -707,7 +712,8 @@ void radv_GetPhysicalDeviceFeatures( .alphaToOne = true, .multiViewport = true, .samplerAnisotropy = true, - .textureCompressionETC2 = false, + .textureCompressionETC2 = pdevice->rad_info.chip_class >= GFX9 || + pdevice->rad_info.family == CHIP_STONEY, .textureCompressionASTC_LDR = false, .textureCompressionBC = true, .occlusionQueryPrecise = true, @@ -739,6 +745,7 @@ void radv_GetPhysicalDeviceFeatures2( VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures2KHR *pFeatures) { + RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); vk_foreach_struct(ext, pFeatures->pNext) { switch (ext->sType) { case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: { @@ -769,10 +776,11 @@ void radv_GetPhysicalDeviceFeatures2( case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: { VkPhysicalDevice16BitStorageFeatures *features = (VkPhysicalDevice16BitStorageFeatures*)ext; - features->storageBuffer16BitAccess = false; - features->uniformAndStorageBuffer16BitAccess = false; - features->storagePushConstant16 = false; - features->storageInputOutput16 = false; + bool enabled = HAVE_LLVM >= 0x0700 && pdevice->rad_info.chip_class >= VI; + features->storageBuffer16BitAccess = enabled; + features->uniformAndStorageBuffer16BitAccess = enabled; + features->storagePushConstant16 = enabled; + features->storageInputOutput16 = enabled; break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: { @@ -806,6 +814,20 @@ void radv_GetPhysicalDeviceFeatures2( features->runtimeDescriptorArray = true; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: { + VkPhysicalDeviceConditionalRenderingFeaturesEXT *features = + (VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext; + features->conditionalRendering = true; + features->inheritedConditionalRendering = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: { + VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features = + (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext; + features->vertexAttributeInstanceRateDivisor = VK_TRUE; + features->vertexAttributeInstanceRateZeroDivisor = VK_TRUE; + break; + } default: break; } @@ -1124,6 +1146,12 @@ void radv_GetPhysicalDeviceProperties2( properties->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: { + VkPhysicalDeviceProtectedMemoryProperties *properties = + (VkPhysicalDeviceProtectedMemoryProperties *)ext; + properties->protectedNoFault = false; + break; + } default: break; } @@ -1501,10 +1529,12 @@ VkResult radv_CreateDevice( } device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 && - (device->instance->perftest_flags & RADV_PERFTEST_BINNING); + ((device->instance->perftest_flags & RADV_PERFTEST_BINNING) || + device->physical_device->rad_info.family == CHIP_RAVEN); /* Disabled and not implemented for now. */ - device->dfsm_allowed = device->pbb_allowed && false; + device->dfsm_allowed = device->pbb_allowed && + device->physical_device->rad_info.family == CHIP_RAVEN; #ifdef ANDROID device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit; @@ -1551,6 +1581,10 @@ VkResult radv_CreateDevice( if (!radv_init_trace(device)) goto fail; + fprintf(stderr, "*****************************************************************************\n"); + fprintf(stderr, "* WARNING: RADV_TRACE_FILE is costly and should only be used for debugging! *\n"); + fprintf(stderr, "*****************************************************************************\n"); + fprintf(stderr, "Trace file will be dumped to %s\n", filename); radv_dump_enabled_options(device, stderr); } @@ -1874,10 +1908,30 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff device->physical_device->rad_info.family != CHIP_CARRIZO && device->physical_device->rad_info.family != CHIP_STONEY; unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64; - unsigned max_offchip_buffers = max_offchip_buffers_per_se * - device->physical_device->rad_info.max_se; + unsigned max_offchip_buffers; unsigned offchip_granularity; unsigned hs_offchip_param; + + /* + * Per RadeonSI: + * This must be one less than the maximum number due to a hw limitation. + * Various hardware bugs in SI, CIK, and GFX9 need this. + * + * Per AMDVLK: + * Vega10 should limit max_offchip_buffers to 508 (4 * 127). + * Gfx7 should limit max_offchip_buffers to 508 + * Gfx6 should limit max_offchip_buffers to 126 (2 * 63) + * + * Follow AMDVLK here. + */ + if (device->physical_device->rad_info.family == CHIP_VEGA10 || + device->physical_device->rad_info.chip_class == CIK || + device->physical_device->rad_info.chip_class == SI) + --max_offchip_buffers_per_se; + + max_offchip_buffers = max_offchip_buffers_per_se * + device->physical_device->rad_info.max_se; + switch (device->tess_offchip_block_dw_size) { default: assert(0); @@ -1927,10 +1981,10 @@ radv_emit_gs_ring_sizes(struct radv_queue *queue, struct radeon_cmdbuf *cs, return; if (esgs_ring_bo) - radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo, 8); + radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo); if (gsvs_ring_bo) - radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo, 8); + radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo); if (queue->device->physical_device->rad_info.chip_class >= CIK) { radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2); @@ -1955,7 +2009,7 @@ radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs, tf_va = radv_buffer_get_va(tess_rings_bo); - radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo, 8); + radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo); if (queue->device->physical_device->rad_info.chip_class >= CIK) { radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE, @@ -1989,7 +2043,7 @@ radv_emit_compute_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs, scratch_va = radv_buffer_get_va(compute_scratch_bo); - radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo, 8); + radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo); radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2); radeon_emit(cs, scratch_va); @@ -2009,7 +2063,7 @@ radv_emit_global_shader_pointers(struct radv_queue *queue, va = radv_buffer_get_va(descriptor_bo); - radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo, 8); + radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo); if (queue->device->physical_device->rad_info.chip_class >= GFX9) { uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, @@ -2036,6 +2090,33 @@ radv_emit_global_shader_pointers(struct radv_queue *queue, } } +static void +radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue) +{ + struct radv_device *device = queue->device; + + if (device->gfx_init) { + uint64_t va = radv_buffer_get_va(device->gfx_init); + + radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, device->gfx_init_size_dw & 0xffff); + + radv_cs_add_buffer(device->ws, cs, device->gfx_init); + } else { + struct radv_physical_device *physical_device = device->physical_device; + si_emit_graphics(physical_device, cs); + } +} + +static void +radv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_queue *queue) +{ + struct radv_physical_device *physical_device = queue->device->physical_device; + si_emit_compute(physical_device, cs); +} + static VkResult radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size, @@ -2188,7 +2269,19 @@ radv_get_preamble_cs(struct radv_queue *queue, dest_cs[i] = cs; if (scratch_bo) - radv_cs_add_buffer(queue->device->ws, cs, scratch_bo, 8); + radv_cs_add_buffer(queue->device->ws, cs, scratch_bo); + + /* Emit initial configuration. */ + switch (queue->queue_family_index) { + case RADV_QUEUE_GENERAL: + radv_init_graphics_state(cs, queue); + break; + case RADV_QUEUE_COMPUTE: + radv_init_compute_state(cs, queue); + break; + case RADV_QUEUE_TRANSFER: + break; + } if (descriptor_bo != queue->descriptor_bo) { uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo); @@ -2238,7 +2331,8 @@ radv_get_preamble_cs(struct radv_queue *queue, RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SMEM_L1 | RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_INV_GLOBAL_L2); + RADV_CMD_FLAG_INV_GLOBAL_L2 | + RADV_CMD_FLAG_START_PIPELINE_STATS, 0); } else if (i == 1) { si_cs_emit_cache_flush(cs, queue->device->physical_device->rad_info.chip_class, @@ -2248,7 +2342,8 @@ radv_get_preamble_cs(struct radv_queue *queue, RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SMEM_L1 | RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_INV_GLOBAL_L2); + RADV_CMD_FLAG_INV_GLOBAL_L2 | + RADV_CMD_FLAG_START_PIPELINE_STATS, 0); } if (!queue->device->ws->cs_finalize(cs))