X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fvulkan%2FgenX_state.c;h=df76b33a7c35a24a5d95e2e933a1269b2954b2a0;hb=f118ca20758c85da1aaf1792e61aadb298b32a47;hp=4a175b9234d0842b0a6db1e5cee49ffe62aabf56;hpb=bea4d4c78c3a6a85d1c7d0ad5c2c5694e19d20f2;p=mesa.git diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c index 4a175b9234d..df76b33a7c3 100644 --- a/src/intel/vulkan/genX_state.c +++ b/src/intel/vulkan/genX_state.c @@ -88,11 +88,88 @@ gen10_emit_wa_lri_to_cache_mode_zero(struct anv_batch *batch) } #endif +static void +genX(emit_slice_hashing_state)(struct anv_device *device, + struct anv_batch *batch) +{ + device->slice_hash = (struct anv_state) { 0 }; + +#if GEN_GEN == 11 + const unsigned *ppipe_subslices = device->info.ppipe_subslices; + int subslices_delta = ppipe_subslices[0] - ppipe_subslices[1]; + if (subslices_delta == 0) + return; + + unsigned size = GENX(SLICE_HASH_TABLE_length) * 4; + device->slice_hash = + anv_state_pool_alloc(&device->dynamic_state_pool, size, 64); + + struct GENX(SLICE_HASH_TABLE) table0 = { + .Entry = { + { 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 }, + { 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 }, + { 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 }, + { 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 }, + { 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 }, + { 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 }, + { 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 }, + { 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 }, + { 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 }, + { 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 }, + { 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 }, + { 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 }, + { 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 }, + { 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 }, + { 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 }, + { 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 } + } + }; + + struct GENX(SLICE_HASH_TABLE) table1 = { + .Entry = { + { 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0 }, + { 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0 }, + { 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1 }, + { 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0 }, + { 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0 }, + { 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1 }, + { 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0 }, + { 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0 }, + { 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1 }, + { 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0 }, + { 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0 }, + { 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1 }, + { 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0 }, + { 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0 }, + { 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1 }, + { 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0 } + } + }; + + const struct GENX(SLICE_HASH_TABLE) *table = + subslices_delta < 0 ? &table0 : &table1; + GENX(SLICE_HASH_TABLE_pack)(NULL, device->slice_hash.map, table); + + anv_batch_emit(batch, GENX(3DSTATE_SLICE_TABLE_STATE_POINTERS), ptr) { + ptr.SliceHashStatePointerValid = true; + ptr.SliceHashTableStatePointer = device->slice_hash.offset; + } + + anv_batch_emit(batch, GENX(3DSTATE_3D_MODE), mode) { + mode.SliceHashingTableEnable = true; + } +#endif +} + VkResult genX(init_device_state)(struct anv_device *device) { - GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs, - &GENX(MOCS)); + device->default_mocs = GENX(MOCS); +#if GEN_GEN >= 8 + device->external_mocs = GENX(EXTERNAL_MOCS); +#else + device->external_mocs = device->default_mocs; +#endif struct anv_batch batch; @@ -151,6 +228,16 @@ genX(init_device_state)(struct anv_device *device) GEN_SAMPLE_POS_16X(sp._16xSample); #endif } + + /* The BDW+ docs describe how to use the 3DSTATE_WM_HZ_OP instruction in the + * section titled, "Optimized Depth Buffer Clear and/or Stencil Buffer + * Clear." It mentions that the packet overrides GPU state for the clear + * operation and needs to be reset to 0s to clear the overrides. Depending + * on the kernel, we may not get a context with the state for this packet + * zeroed. Do it ourselves just in case. We've observed this to prevent a + * number of GPU hangs on ICL. + */ + anv_batch_emit(&batch, GENX(3DSTATE_WM_HZ_OP), hzp); #endif #if GEN_GEN == 10 @@ -172,6 +259,39 @@ genX(init_device_state)(struct anv_device *device) lri.RegisterOffset = GENX(SAMPLER_MODE_num); lri.DataDWord = sampler_mode; } + + /* Bit 1 "Enabled Texel Offset Precision Fix" must be set in + * HALF_SLICE_CHICKEN7 register. + */ + uint32_t half_slice_chicken7; + anv_pack_struct(&half_slice_chicken7, GENX(HALF_SLICE_CHICKEN7), + .EnabledTexelOffsetPrecisionFix = true, + .EnabledTexelOffsetPrecisionFixMask = true); + + anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) { + lri.RegisterOffset = GENX(HALF_SLICE_CHICKEN7_num); + lri.DataDWord = half_slice_chicken7; + } + +#endif + genX(emit_slice_hashing_state)(device, &batch); + +#if GEN_GEN >= 11 + /* hardware specification recommends disabling repacking for + * the compatibility with decompression mechanism in display controller. + */ + if (device->info.disable_ccs_repack) { + uint32_t cache_mode_0; + anv_pack_struct(&cache_mode_0, + GENX(CACHE_MODE_0), + .DisableRepackingforCompression = true, + .DisableRepackingforCompressionMask = true); + + anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) { + lri.RegisterOffset = GENX(CACHE_MODE_0_num); + lri.DataDWord = cache_mode_0; + } + } #endif /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so @@ -277,6 +397,8 @@ VkResult genX(CreateSampler)( VkSampler* pSampler) { ANV_FROM_HANDLE(anv_device, device, _device); + const struct anv_physical_device *pdevice = + &device->instance->physicalDevice; struct anv_sampler *sampler; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); @@ -288,8 +410,10 @@ VkResult genX(CreateSampler)( sampler->n_planes = 1; + uint32_t border_color_stride = GEN_IS_HASWELL ? 512 : 64; uint32_t border_color_offset = device->border_colors.offset + - pCreateInfo->borderColor * 64; + pCreateInfo->borderColor * + border_color_stride; #if GEN_GEN >= 9 unsigned sampler_reduction_mode = STD_FILTER; @@ -304,7 +428,12 @@ VkResult genX(CreateSampler)( ANV_FROM_HANDLE(anv_ycbcr_conversion, conversion, pSamplerConversion->conversion); - if (conversion == NULL) + /* Ignore conversion for non-YUV formats. This fulfills a requirement + * for clients that want to utilize same code path for images with + * external formats (VK_FORMAT_UNDEFINED) and "regular" RGBA images + * where format is known. + */ + if (conversion == NULL || !conversion->format->can_ycbcr) break; sampler->n_planes = conversion->format->n_planes; @@ -327,6 +456,17 @@ VkResult genX(CreateSampler)( } } + if (pdevice->has_bindless_samplers) { + /* If we have bindless, allocate enough samplers. We allocate 32 bytes + * for each sampler instead of 16 bytes because we want all bindless + * samplers to be 32-byte aligned so we don't have to use indirect + * sampler messages on them. + */ + sampler->bindless_state = + anv_state_pool_alloc(&device->dynamic_state_pool, + sampler->n_planes * 32, 32); + } + for (unsigned p = 0; p < sampler->n_planes; p++) { const bool plane_has_chroma = sampler->conversion && sampler->conversion->format->planes[p].has_chroma; @@ -396,6 +536,11 @@ VkResult genX(CreateSampler)( }; GENX(SAMPLER_STATE_pack)(NULL, sampler->state[p], &sampler_state); + + if (sampler->bindless_state.map) { + memcpy(sampler->bindless_state.map + p * 32, + sampler->state[p], GENX(SAMPLER_STATE_length) * 4); + } } *pSampler = anv_sampler_to_handle(sampler);