anv: Move invariant state to small initial batch
authorKristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
Sat, 6 Feb 2016 00:11:12 +0000 (16:11 -0800)
committerKristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
Sat, 6 Feb 2016 00:13:53 +0000 (16:13 -0800)
We use the simple batch helper to submit a batch at driver startup time
which holds all the state that never changes.  We don't have a whole lot
and once we enable tesselation there'll be even less. Even so, it's a
simple mechanism and reduces our steady state batch sizes a bit.

src/vulkan/anv_device.c
src/vulkan/anv_private.h
src/vulkan/gen7_pipeline.c
src/vulkan/gen7_state.c
src/vulkan/gen8_pipeline.c
src/vulkan/gen8_state.c

index 91485a191c00b70fc0e50db7e691476318a98d41..6f874b2d1abc1d9e6197bd6abec7c8604fc86319 100644 (file)
@@ -673,8 +673,9 @@ anv_device_init_border_colors(struct anv_device *device)
                                                     border_colors);
 }
 
-static VkResult
-submit_simple_batch(struct anv_device *device, struct anv_batch *batch)
+VkResult
+anv_device_submit_simple_batch(struct anv_device *device,
+                               struct anv_batch *batch)
 {
    struct anv_state state;
    struct drm_i915_gem_execbuffer2 execbuf;
@@ -685,6 +686,7 @@ submit_simple_batch(struct anv_device *device, struct anv_batch *batch)
    int64_t timeout;
    int ret;
 
+   /* Kernel driver requires 8 byte aligned batch length */
    size = align_u32(batch->next - batch->start, 8);
    state = anv_state_pool_alloc(&device->dynamic_state_pool, MAX(size, 64), 32);
    bo = &device->dynamic_state_pool.block_pool->bo;
@@ -702,7 +704,7 @@ submit_simple_batch(struct anv_device *device, struct anv_batch *batch)
    execbuf.buffers_ptr = (uintptr_t) exec2_objects;
    execbuf.buffer_count = 1;
    execbuf.batch_start_offset = state.offset;
-   execbuf.batch_len = batch->next - state.map;
+   execbuf.batch_len = size;
    execbuf.cliprects_ptr = 0;
    execbuf.num_cliprects = 0;
    execbuf.DR1 = 0;
@@ -814,6 +816,23 @@ VkResult anv_CreateDevice(
 
    anv_queue_init(device, &device->queue);
 
+   switch (device->info.gen) {
+   case 7:
+      if (!device->info.is_haswell)
+         result = gen7_init_device_state(device);
+      else
+         result = gen75_init_device_state(device);
+      break;
+   case 8:
+      result = gen8_init_device_state(device);
+      break;
+   case 9:
+      result = gen9_init_device_state(device);
+      break;
+   }
+   if (result != VK_SUCCESS)
+      goto fail_fd;
+
    result = anv_device_init_meta(device);
    if (result != VK_SUCCESS)
       goto fail_fd;
@@ -1006,7 +1025,7 @@ VkResult anv_DeviceWaitIdle(
    anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END);
    anv_batch_emit(&batch, GEN7_MI_NOOP);
 
-   return submit_simple_batch(device, &batch);
+   return anv_device_submit_simple_batch(device, &batch);
 }
 
 VkResult
index 8e12792456b95c4f391a8e1f4309a5885196d073..188bd77ab252f3ef58b616099fa2ce7e74ef6c0a 100644 (file)
@@ -662,6 +662,11 @@ struct anv_device {
     pthread_mutex_t                             mutex;
 };
 
+VkResult gen7_init_device_state(struct anv_device *device);
+VkResult gen75_init_device_state(struct anv_device *device);
+VkResult gen8_init_device_state(struct anv_device *device);
+VkResult gen9_init_device_state(struct anv_device *device);
+
 void* anv_gem_mmap(struct anv_device *device,
                    uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags);
 void anv_gem_munmap(void *p, uint64_t size);
@@ -738,6 +743,8 @@ void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords);
 void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);
 uint64_t anv_batch_emit_reloc(struct anv_batch *batch,
                               void *location, struct anv_bo *bo, uint32_t offset);
+VkResult anv_device_submit_simple_batch(struct anv_device *device,
+                                        struct anv_batch *batch);
 
 struct anv_address {
    struct anv_bo *bo;
index 3fedd74f1ea9ca0f8b2f9f6f1e82a59c924be8a2..d4d6131794a67c08602bdc3639b3369475791f0d 100644 (file)
@@ -214,17 +214,8 @@ genX(graphics_pipeline_create)(
    gen7_emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
                                 pCreateInfo->pMultisampleState);
 
-   anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_VF_STATISTICS,
-                   .StatisticsEnable = true);
-   anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_HS, .Enable = false);
-   anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_TE, .TEEnable = false);
-   anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_DS, .DSFunctionEnable = false);
-   anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_STREAMOUT, .SOFunctionEnable = false);
-
    emit_urb_setup(pipeline);
 
-   anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_AA_LINE_PARAMETERS);
-
    const VkPipelineRasterizationStateCreateInfo *rs_info =
       pCreateInfo->pRasterizationState;
 
index 2375070636e139929832fc04ec31c7413228b868..ee4b7f3a5c45c4639fe8510e778d9addb9dc53e7 100644 (file)
 
 #include "genX_state_util.h"
 
+VkResult
+genX(init_device_state)(struct anv_device *device)
+{
+   struct anv_batch batch;
+
+   uint32_t cmds[64];
+   batch.start = batch.next = cmds;
+   batch.end = (void *) cmds + sizeof(cmds);
+
+   anv_batch_emit(&batch, GEN7_PIPELINE_SELECT,
+                  .PipelineSelection = GPGPU);
+
+   anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS),
+                  .StatisticsEnable = true);
+   anv_batch_emit(&batch, GENX(3DSTATE_HS), .Enable = false);
+   anv_batch_emit(&batch, GENX(3DSTATE_TE), .TEEnable = false);
+   anv_batch_emit(&batch, GENX(3DSTATE_DS), .DSFunctionEnable = false);
+   anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false);
+   anv_batch_emit(&batch, GEN7_3DSTATE_AA_LINE_PARAMETERS);
+   anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END));
+
+   assert(batch.next <= batch.end);
+
+   return anv_device_submit_simple_batch(device, &batch);
+}
+
 GENX_FUNC(GEN7, GEN75) void
 genX(fill_buffer_surface_state)(void *state, enum isl_format format,
                                 uint32_t offset, uint32_t range,
index 4097de177ae3fc3f2b051b4503be4c853e6e38f7..389f7f797c04753148fc0540634f36dc5f715a9e 100644 (file)
@@ -247,76 +247,6 @@ emit_ms_state(struct anv_pipeline *pipeline,
 
    anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK),
       .SampleMask = sample_mask);
-
-   /* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and
-    * VkPhysicalDeviceFeatures::standardSampleLocations.
-    */
-   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_PATTERN),
-      ._1xSample0XOffset      = 0.5,
-      ._1xSample0YOffset      = 0.5,
-      ._2xSample0XOffset      = 0.25,
-      ._2xSample0YOffset      = 0.25,
-      ._2xSample1XOffset      = 0.75,
-      ._2xSample1YOffset      = 0.75,
-      ._4xSample0XOffset      = 0.375,
-      ._4xSample0YOffset      = 0.125,
-      ._4xSample1XOffset      = 0.875,
-      ._4xSample1YOffset      = 0.375,
-      ._4xSample2XOffset      = 0.125,
-      ._4xSample2YOffset      = 0.625,
-      ._4xSample3XOffset      = 0.625,
-      ._4xSample3YOffset      = 0.875,
-      ._8xSample0XOffset      = 0.5625,
-      ._8xSample0YOffset      = 0.3125,
-      ._8xSample1XOffset      = 0.4375,
-      ._8xSample1YOffset      = 0.6875,
-      ._8xSample2XOffset      = 0.8125,
-      ._8xSample2YOffset      = 0.5625,
-      ._8xSample3XOffset      = 0.3125,
-      ._8xSample3YOffset      = 0.1875,
-      ._8xSample4XOffset      = 0.1875,
-      ._8xSample4YOffset      = 0.8125,
-      ._8xSample5XOffset      = 0.0625,
-      ._8xSample5YOffset      = 0.4375,
-      ._8xSample6XOffset      = 0.6875,
-      ._8xSample6YOffset      = 0.9375,
-      ._8xSample7XOffset      = 0.9375,
-      ._8xSample7YOffset      = 0.0625,
-#if ANV_GEN >= 9
-      ._16xSample0XOffset     = 0.5625,
-      ._16xSample0YOffset     = 0.5625,
-      ._16xSample1XOffset     = 0.4375,
-      ._16xSample1YOffset     = 0.3125,
-      ._16xSample2XOffset     = 0.3125,
-      ._16xSample2YOffset     = 0.6250,
-      ._16xSample3XOffset     = 0.7500,
-      ._16xSample3YOffset     = 0.4375,
-      ._16xSample4XOffset     = 0.1875,
-      ._16xSample4YOffset     = 0.3750,
-      ._16xSample5XOffset     = 0.6250,
-      ._16xSample5YOffset     = 0.8125,
-      ._16xSample6XOffset     = 0.8125,
-      ._16xSample6YOffset     = 0.6875,
-      ._16xSample7XOffset     = 0.6875,
-      ._16xSample7YOffset     = 0.1875,
-      ._16xSample8XOffset     = 0.3750,
-      ._16xSample8YOffset     = 0.8750,
-      ._16xSample9XOffset     = 0.5000,
-      ._16xSample9YOffset     = 0.0625,
-      ._16xSample10XOffset    = 0.2500,
-      ._16xSample10YOffset    = 0.1250,
-      ._16xSample11XOffset    = 0.1250,
-      ._16xSample11YOffset    = 0.7500,
-      ._16xSample12XOffset    = 0.0000,
-      ._16xSample12YOffset    = 0.5000,
-      ._16xSample13XOffset    = 0.9375,
-      ._16xSample13YOffset    = 0.2500,
-      ._16xSample14XOffset    = 0.8750,
-      ._16xSample14YOffset    = 0.9375,
-      ._16xSample15XOffset    = 0.0625,
-      ._16xSample15YOffset    = 0.0000,
-#endif
-   );
 }
 
 VkResult
@@ -359,19 +289,8 @@ genX(graphics_pipeline_create)(
    emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
                            pCreateInfo->pMultisampleState);
 
-   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_STATISTICS),
-                   .StatisticsEnable = true);
-   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_HS), .Enable = false);
-   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_TE), .TEEnable = false);
-   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_DS), .FunctionEnable = false);
-   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false);
-
    emit_urb_setup(pipeline);
 
-   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM_CHROMAKEY),
-                  .ChromaKeyKillEnable = false);
-   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_AA_LINE_PARAMETERS));
-
    anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP),
                   .ClipEnable = true,
                   .ViewportXYClipTestEnable = !(extra && extra->disable_viewport),
index c749cbfed0c6992ad5d9ab388a174a62b7a70be8..6b077df125a6d64f3970148b5ed5e36f95e2168a 100644 (file)
 
 #include "genX_state_util.h"
 
+VkResult
+genX(init_device_state)(struct anv_device *device)
+{
+   struct anv_batch batch;
+
+   uint32_t cmds[64];
+   batch.start = batch.next = cmds;
+   batch.end = (void *) cmds + sizeof(cmds);
+
+   anv_batch_emit(&batch, GENX(PIPELINE_SELECT),
+#if ANV_GEN >= 9
+                  .MaskBits = 3,
+#endif
+                  .PipelineSelection = _3D);
+
+   anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS),
+                  .StatisticsEnable = true);
+   anv_batch_emit(&batch, GENX(3DSTATE_HS), .Enable = false);
+   anv_batch_emit(&batch, GENX(3DSTATE_TE), .TEEnable = false);
+   anv_batch_emit(&batch, GENX(3DSTATE_DS), .FunctionEnable = false);
+   anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false);
+   anv_batch_emit(&batch, GENX(3DSTATE_WM_CHROMAKEY),
+                  .ChromaKeyKillEnable = false);
+   anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS));
+
+   /* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and
+    * VkPhysicalDeviceFeatures::standardSampleLocations.
+    */
+   anv_batch_emit(&batch, GENX(3DSTATE_SAMPLE_PATTERN),
+      ._1xSample0XOffset      = 0.5,
+      ._1xSample0YOffset      = 0.5,
+      ._2xSample0XOffset      = 0.25,
+      ._2xSample0YOffset      = 0.25,
+      ._2xSample1XOffset      = 0.75,
+      ._2xSample1YOffset      = 0.75,
+      ._4xSample0XOffset      = 0.375,
+      ._4xSample0YOffset      = 0.125,
+      ._4xSample1XOffset      = 0.875,
+      ._4xSample1YOffset      = 0.375,
+      ._4xSample2XOffset      = 0.125,
+      ._4xSample2YOffset      = 0.625,
+      ._4xSample3XOffset      = 0.625,
+      ._4xSample3YOffset      = 0.875,
+      ._8xSample0XOffset      = 0.5625,
+      ._8xSample0YOffset      = 0.3125,
+      ._8xSample1XOffset      = 0.4375,
+      ._8xSample1YOffset      = 0.6875,
+      ._8xSample2XOffset      = 0.8125,
+      ._8xSample2YOffset      = 0.5625,
+      ._8xSample3XOffset      = 0.3125,
+      ._8xSample3YOffset      = 0.1875,
+      ._8xSample4XOffset      = 0.1875,
+      ._8xSample4YOffset      = 0.8125,
+      ._8xSample5XOffset      = 0.0625,
+      ._8xSample5YOffset      = 0.4375,
+      ._8xSample6XOffset      = 0.6875,
+      ._8xSample6YOffset      = 0.9375,
+      ._8xSample7XOffset      = 0.9375,
+      ._8xSample7YOffset      = 0.0625,
+#if ANV_GEN >= 9
+      ._16xSample0XOffset     = 0.5625,
+      ._16xSample0YOffset     = 0.5625,
+      ._16xSample1XOffset     = 0.4375,
+      ._16xSample1YOffset     = 0.3125,
+      ._16xSample2XOffset     = 0.3125,
+      ._16xSample2YOffset     = 0.6250,
+      ._16xSample3XOffset     = 0.7500,
+      ._16xSample3YOffset     = 0.4375,
+      ._16xSample4XOffset     = 0.1875,
+      ._16xSample4YOffset     = 0.3750,
+      ._16xSample5XOffset     = 0.6250,
+      ._16xSample5YOffset     = 0.8125,
+      ._16xSample6XOffset     = 0.8125,
+      ._16xSample6YOffset     = 0.6875,
+      ._16xSample7XOffset     = 0.6875,
+      ._16xSample7YOffset     = 0.1875,
+      ._16xSample8XOffset     = 0.3750,
+      ._16xSample8YOffset     = 0.8750,
+      ._16xSample9XOffset     = 0.5000,
+      ._16xSample9YOffset     = 0.0625,
+      ._16xSample10XOffset    = 0.2500,
+      ._16xSample10YOffset    = 0.1250,
+      ._16xSample11XOffset    = 0.1250,
+      ._16xSample11YOffset    = 0.7500,
+      ._16xSample12XOffset    = 0.0000,
+      ._16xSample12YOffset    = 0.5000,
+      ._16xSample13XOffset    = 0.9375,
+      ._16xSample13YOffset    = 0.2500,
+      ._16xSample14XOffset    = 0.8750,
+      ._16xSample14YOffset    = 0.9375,
+      ._16xSample15XOffset    = 0.0625,
+      ._16xSample15YOffset    = 0.0000,
+#endif
+   );
+
+   anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END));
+
+   assert(batch.next <= batch.end);
+
+   return anv_device_submit_simple_batch(device, &batch);
+}
+
 static const uint32_t
 isl_to_gen_multisample_layout[] = {
    [ISL_MSAA_LAYOUT_NONE]           = MSS,