intel/assembler: Add labels support
[mesa.git] / src / intel / vulkan / genX_state.c
index 84843ad58dbf3b2381b4637809364925047172b3..908c676a880550702c494772d3eda5be918e9ecc 100644 (file)
 
 #include "anv_private.h"
 
+#include "common/gen_aux_map.h"
 #include "common/gen_sample_positions.h"
 #include "genxml/gen_macros.h"
 #include "genxml/genX_pack.h"
 
 #include "vk_util.h"
 
-#if GEN_GEN == 10
-/**
- * From Gen10 Workarounds page in h/w specs:
- * WaSampleOffsetIZ:
- *    "Prior to the 3DSTATE_SAMPLE_PATTERN driver must ensure there are no
- *     markers in the pipeline by programming a PIPE_CONTROL with stall."
- */
-static void
-gen10_emit_wa_cs_stall_flush(struct anv_batch *batch)
-{
-
-   anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) {
-      pc.CommandStreamerStallEnable = true;
-      pc.StallAtPixelScoreboard = true;
-   }
-}
-
-/**
- * From Gen10 Workarounds page in h/w specs:
- * WaSampleOffsetIZ:_cs_stall_flush
- *    "When 3DSTATE_SAMPLE_PATTERN is programmed, driver must then issue an
- *     MI_LOAD_REGISTER_IMM command to an offset between 0x7000 and 0x7FFF(SVL)
- *     after the command to ensure the state has been delivered prior to any
- *     command causing a marker in the pipeline."
- */
-static void
-gen10_emit_wa_lri_to_cache_mode_zero(struct anv_batch *batch)
-{
-   /* Before changing the value of CACHE_MODE_0 register, GFX pipeline must
-    * be idle; i.e., full flush is required.
-    */
-   anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) {
-      pc.DepthCacheFlushEnable = true;
-      pc.DCFlushEnable = true;
-      pc.RenderTargetCacheFlushEnable = true;
-      pc.InstructionCacheInvalidateEnable = true;
-      pc.StateCacheInvalidationEnable = true;
-      pc.TextureCacheInvalidationEnable = true;
-      pc.VFCacheInvalidationEnable = true;
-      pc.ConstantCacheInvalidationEnable =true;
-   }
-
-   /* Write to CACHE_MODE_0 (0x7000) */
-   uint32_t cache_mode_0 = 0;
-   anv_pack_struct(&cache_mode_0, GENX(CACHE_MODE_0));
-
-   anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
-      lri.RegisterOffset = GENX(CACHE_MODE_0_num);
-      lri.DataDWord      = cache_mode_0;
-   }
-}
-#endif
-
 static void
 genX(emit_slice_hashing_state)(struct anv_device *device,
                                struct anv_batch *batch)
@@ -205,10 +153,6 @@ genX(init_device_state)(struct anv_device *device)
 #if GEN_GEN >= 8
    anv_batch_emit(&batch, GENX(3DSTATE_WM_CHROMAKEY), ck);
 
-#if GEN_GEN == 10
-   gen10_emit_wa_cs_stall_flush(&batch);
-#endif
-
    /* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and
     * VkPhysicalDeviceFeatures::standardSampleLocations.
     */
@@ -233,10 +177,6 @@ genX(init_device_state)(struct anv_device *device)
    anv_batch_emit(&batch, GENX(3DSTATE_WM_HZ_OP), hzp);
 #endif
 
-#if GEN_GEN == 10
-   gen10_emit_wa_lri_to_cache_mode_zero(&batch);
-#endif
-
 #if GEN_GEN == 11
    /* The default behavior of bit 5 "Headerless Message for Pre-emptable
     * Contexts" in SAMPLER MODE register is set to 0, which means
@@ -297,6 +237,37 @@ genX(init_device_state)(struct anv_device *device)
          lri.DataDWord      = cache_mode_0;
       }
    }
+
+   /* an unknown issue is causing vs push constants to become
+    * corrupted during object-level preemption. For now, restrict
+    * to command buffer level preemption to avoid rendering
+    * corruption.
+    */
+   uint32_t cs_chicken1;
+   anv_pack_struct(&cs_chicken1,
+                   GENX(CS_CHICKEN1),
+                   .ReplayMode = MidcmdbufferPreemption,
+                   .ReplayModeMask = true);
+
+   anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
+      lri.RegisterOffset = GENX(CS_CHICKEN1_num);
+      lri.DataDWord      = cs_chicken1;
+   }
+#endif
+
+#if GEN_GEN == 12
+   if (device->info.has_aux_map) {
+      uint64_t aux_base_addr = gen_aux_map_get_base(device->aux_map_ctx);
+      assert(aux_base_addr % (32 * 1024) == 0);
+      anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
+         lri.RegisterOffset = GENX(GFX_AUX_TABLE_BASE_ADDR_num);
+         lri.DataDWord = aux_base_addr & 0xffffffff;
+      }
+      anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
+         lri.RegisterOffset = GENX(GFX_AUX_TABLE_BASE_ADDR_num) + 4;
+         lri.DataDWord = aux_base_addr >> 32;
+      }
+   }
 #endif
 
    /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
@@ -325,6 +296,20 @@ genX(init_device_state)(struct anv_device *device)
 #endif
    }
 
+#if GEN_GEN >= 12
+   const struct gen_l3_config *cfg = gen_get_default_l3_config(&device->info);
+   if (!cfg) {
+      /* Platforms with no configs just setup full-way allocation. */
+      uint32_t l3cr;
+      anv_pack_struct(&l3cr, GENX(L3ALLOC),
+                      .L3FullWayAllocationEnable = true);
+      anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
+         lri.RegisterOffset = GENX(L3ALLOC_num);
+         lri.DataDWord      = l3cr;
+      }
+   }
+#endif
+
    anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
 
    assert(batch.next <= batch.end);
@@ -405,17 +390,27 @@ VkResult genX(CreateSampler)(
 
    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
 
-   sampler = vk_zalloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
+   sampler = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8,
                         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
    if (!sampler)
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
+   vk_object_base_init(&device->vk, &sampler->base, VK_OBJECT_TYPE_SAMPLER);
    sampler->n_planes = 1;
 
    uint32_t border_color_stride = GEN_IS_HASWELL ? 512 : 64;
-   uint32_t border_color_offset = device->border_colors.offset +
-                                  pCreateInfo->borderColor *
-                                  border_color_stride;
+   uint32_t border_color_offset;
+   ASSERTED bool has_custom_color = false;
+   if (pCreateInfo->borderColor <= VK_BORDER_COLOR_INT_OPAQUE_WHITE) {
+      border_color_offset = device->border_colors.offset +
+                            pCreateInfo->borderColor *
+                            border_color_stride;
+   } else {
+      assert(GEN_GEN >= 8);
+      sampler->custom_border_color =
+         anv_state_reserved_pool_alloc(&device->custom_border_colors);
+      border_color_offset = sampler->custom_border_color.offset;
+   }
 
 #if GEN_GEN >= 9
    unsigned sampler_reduction_mode = STD_FILTER;
@@ -452,12 +447,37 @@ VkResult genX(CreateSampler)(
          break;
       }
 #endif
+      case VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT: {
+         VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
+            (VkSamplerCustomBorderColorCreateInfoEXT *) ext;
+         if (sampler->custom_border_color.map == NULL)
+            break;
+         struct gen8_border_color *cbc = sampler->custom_border_color.map;
+         if (custom_border_color->format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) {
+            /* B4G4R4A4_UNORM_PACK16 is treated as R4G4B4A4_UNORM_PACK16 with
+             * a swizzle, but this does not carry over to the sampler for
+             * border colors, so we need to do the swizzle ourselves here.
+             */
+            cbc->uint32[0] = custom_border_color->customBorderColor.uint32[2];
+            cbc->uint32[1] = custom_border_color->customBorderColor.uint32[1];
+            cbc->uint32[2] = custom_border_color->customBorderColor.uint32[0];
+            cbc->uint32[3] = custom_border_color->customBorderColor.uint32[3];
+         } else {
+            /* Both structs share the same layout, so just copy them over. */
+            memcpy(cbc, &custom_border_color->customBorderColor,
+                   sizeof(VkClearColorValue));
+         }
+         has_custom_color = true;
+         break;
+      }
       default:
          anv_debug_ignored_stype(ext->sType);
          break;
       }
    }
 
+   assert((sampler->custom_border_color.map == NULL) || has_custom_color);
+
    if (device->physical->has_bindless_samplers) {
       /* If we have bindless, allocate enough samplers.  We allocate 32 bytes
        * for each sampler instead of 16 bytes because we want all bindless