anv: Store prog data in pipeline cache stream
authorKristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
Fri, 4 Mar 2016 16:15:16 +0000 (08:15 -0800)
committerKristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
Sat, 5 Mar 2016 21:50:07 +0000 (13:50 -0800)
We have to keep it there for the cache to work, so let's not have an
extra copy in struct anv_pipeline too.

src/intel/vulkan/anv_cmd_buffer.c
src/intel/vulkan/anv_pipeline.c
src/intel/vulkan/anv_pipeline_cache.c
src/intel/vulkan/anv_private.h
src/intel/vulkan/gen7_cmd_buffer.c
src/intel/vulkan/gen7_pipeline.c
src/intel/vulkan/gen8_cmd_buffer.c
src/intel/vulkan/gen8_pipeline.c
src/intel/vulkan/genX_cmd_buffer.c
src/intel/vulkan/genX_pipeline.c
src/intel/vulkan/genX_pipeline_util.h

index 6ff5f35bc6ac51b3279b02cc0be4496757621326..5ec242fbf2a985f98f364154bc65d7dddc8825e8 100644 (file)
@@ -757,7 +757,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
    }
 
    if (stage == MESA_SHADER_COMPUTE &&
-       cmd_buffer->state.compute_pipeline->cs_prog_data.uses_num_work_groups) {
+       get_cs_prog_data(cmd_buffer->state.compute_pipeline)->uses_num_work_groups) {
       struct anv_bo *bo = cmd_buffer->state.num_workgroups_bo;
       uint32_t bo_offset = cmd_buffer->state.num_workgroups_offset;
 
@@ -996,7 +996,7 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
 {
    struct anv_push_constants *data =
       cmd_buffer->state.push_constants[stage];
-   struct brw_stage_prog_data *prog_data =
+   const struct brw_stage_prog_data *prog_data =
       cmd_buffer->state.pipeline->prog_data[stage];
 
    /* If we don't actually have any push constants, bail. */
@@ -1027,7 +1027,7 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
    struct anv_push_constants *data =
       cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE];
    struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
-   const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data;
+   const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
    const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
 
    const unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
index c93b1a07246998e4890e5f80681be3b558278610..868215cd22dd3f8f0150f3b5fb61ba344dbf67c7 100644 (file)
@@ -406,7 +406,7 @@ anv_pipeline_compile(struct anv_pipeline *pipeline,
 static void
 anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline,
                                 gl_shader_stage stage,
-                                struct brw_stage_prog_data *prog_data)
+                                const struct brw_stage_prog_data *prog_data)
 {
    struct brw_device_info *devinfo = &pipeline->device->info;
    uint32_t max_threads[] = {
@@ -436,7 +436,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
 {
    const struct brw_compiler *compiler =
       pipeline->device->instance->physicalDevice.compiler;
-   struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data;
+   const struct brw_stage_prog_data *stage_prog_data;
    struct brw_vs_prog_key key;
    uint32_t kernel;
    unsigned char sha1[20], *hash;
@@ -446,17 +446,17 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
    if (module->size > 0) {
       hash = sha1;
       anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info);
-      kernel = anv_pipeline_cache_search(cache, hash, prog_data);
+      kernel = anv_pipeline_cache_search(cache, hash, &stage_prog_data);
    } else {
       hash = NULL;
    }
 
    if (module->size == 0 || kernel == NO_KERNEL) {
-      memset(prog_data, 0, sizeof(*prog_data));
+      struct brw_vs_prog_data prog_data = { 0, };
 
       nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
                                              MESA_SHADER_VERTEX, spec_info,
-                                             &prog_data->base.base);
+                                             &prog_data.base.base);
       if (nir == NULL)
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
@@ -465,31 +465,36 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
       if (module->nir == NULL)
          ralloc_steal(mem_ctx, nir);
 
-      prog_data->inputs_read = nir->info.inputs_read;
+      prog_data.inputs_read = nir->info.inputs_read;
       if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ))
          pipeline->writes_point_size = true;
 
       brw_compute_vue_map(&pipeline->device->info,
-                          &prog_data->base.vue_map,
+                          &prog_data.base.vue_map,
                           nir->info.outputs_written,
                           nir->info.separate_shader);
 
       unsigned code_size;
       const unsigned *shader_code =
-         brw_compile_vs(compiler, NULL, mem_ctx, &key, prog_data, nir,
+         brw_compile_vs(compiler, NULL, mem_ctx, &key, &prog_data, nir,
                         NULL, false, -1, &code_size, NULL);
       if (shader_code == NULL) {
          ralloc_free(mem_ctx);
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
       }
 
+      stage_prog_data = &prog_data.base.base;
       kernel = anv_pipeline_cache_upload_kernel(cache, hash,
                                                 shader_code, code_size,
-                                                prog_data, sizeof(*prog_data));
+                                                &stage_prog_data,
+                                                sizeof(prog_data));
       ralloc_free(mem_ctx);
    }
 
-   if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) {
+   const struct brw_vs_prog_data *vs_prog_data =
+      (const struct brw_vs_prog_data *) stage_prog_data;
+
+   if (vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) {
       pipeline->vs_simd8 = kernel;
       pipeline->vs_vec4 = NO_KERNEL;
    } else {
@@ -498,7 +503,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
    }
 
    anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX,
-                                   &prog_data->base.base);
+                                   stage_prog_data);
 
    return VK_SUCCESS;
 }
@@ -513,7 +518,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
 {
    const struct brw_compiler *compiler =
       pipeline->device->instance->physicalDevice.compiler;
-   struct brw_gs_prog_data *prog_data = &pipeline->gs_prog_data;
+   const struct brw_stage_prog_data *stage_prog_data;
    struct brw_gs_prog_key key;
    uint32_t kernel;
    unsigned char sha1[20], *hash;
@@ -523,17 +528,17 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
    if (module->size > 0) {
       hash = sha1;
       anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info);
-      kernel = anv_pipeline_cache_search(cache, hash, prog_data);
+      kernel = anv_pipeline_cache_search(cache, hash, &stage_prog_data);
    } else {
       hash = NULL;
    }
 
    if (module->size == 0 || kernel == NO_KERNEL) {
-      memset(prog_data, 0, sizeof(*prog_data));
+      struct brw_gs_prog_data prog_data = { 0, };
 
       nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
                                              MESA_SHADER_GEOMETRY, spec_info,
-                                             &prog_data->base.base);
+                                             &prog_data.base.base);
       if (nir == NULL)
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
@@ -546,13 +551,13 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
          pipeline->writes_point_size = true;
 
       brw_compute_vue_map(&pipeline->device->info,
-                          &prog_data->base.vue_map,
+                          &prog_data.base.vue_map,
                           nir->info.outputs_written,
                           nir->info.separate_shader);
 
       unsigned code_size;
       const unsigned *shader_code =
-         brw_compile_gs(compiler, NULL, mem_ctx, &key, prog_data, nir,
+         brw_compile_gs(compiler, NULL, mem_ctx, &key, &prog_data, nir,
                         NULL, -1, &code_size, NULL);
       if (shader_code == NULL) {
          ralloc_free(mem_ctx);
@@ -560,9 +565,10 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
       }
 
       /* TODO: SIMD8 GS */
+      stage_prog_data = &prog_data.base.base;
       kernel = anv_pipeline_cache_upload_kernel(cache, hash,
                                                 shader_code, code_size,
-                                                prog_data, sizeof(*prog_data));
+                                                &stage_prog_data, sizeof(prog_data));
 
       ralloc_free(mem_ctx);
    }
@@ -570,7 +576,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
    pipeline->gs_kernel = kernel;
 
    anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY,
-                                   &prog_data->base.base);
+                                   stage_prog_data);
 
    return VK_SUCCESS;
 }
@@ -586,7 +592,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
 {
    const struct brw_compiler *compiler =
       pipeline->device->instance->physicalDevice.compiler;
-   struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data;
+   const struct brw_stage_prog_data *stage_prog_data;
    struct brw_wm_prog_key key;
    uint32_t kernel;
    unsigned char sha1[20], *hash;
@@ -599,19 +605,19 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
    if (module->size > 0) {
       hash = sha1;
       anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info);
-      kernel = anv_pipeline_cache_search(cache, hash, prog_data);
+      kernel = anv_pipeline_cache_search(cache, hash, &stage_prog_data);
    } else {
       hash = NULL;
    }
 
    if (module->size == 0 || kernel == NO_KERNEL) {
-      memset(prog_data, 0, sizeof(*prog_data));
+      struct brw_wm_prog_data prog_data = { 0, };
 
-      prog_data->binding_table.render_target_start = 0;
+      prog_data.binding_table.render_target_start = 0;
 
       nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
                                              MESA_SHADER_FRAGMENT, spec_info,
-                                             &prog_data->base);
+                                             &prog_data.base);
       if (nir == NULL)
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
@@ -635,27 +641,31 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
 
       unsigned code_size;
       const unsigned *shader_code =
-         brw_compile_fs(compiler, NULL, mem_ctx, &key, prog_data, nir,
+         brw_compile_fs(compiler, NULL, mem_ctx, &key, &prog_data, nir,
                         NULL, -1, -1, pipeline->use_repclear, &code_size, NULL);
       if (shader_code == NULL) {
          ralloc_free(mem_ctx);
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
       }
 
+      stage_prog_data = &prog_data.base;
       kernel = anv_pipeline_cache_upload_kernel(cache, hash,
                                                 shader_code, code_size,
-                                                prog_data, sizeof(*prog_data));
+                                                &stage_prog_data, sizeof(prog_data));
 
       ralloc_free(mem_ctx);
    }
 
-   if (prog_data->no_8)
+   const struct brw_wm_prog_data *wm_prog_data =
+      (const struct brw_wm_prog_data *) stage_prog_data;
+
+   if (wm_prog_data->no_8)
       pipeline->ps_simd8 = NO_KERNEL;
    else
       pipeline->ps_simd8 = kernel;
 
-   if (prog_data->no_8 || prog_data->prog_offset_16) {
-      pipeline->ps_simd16 = kernel + prog_data->prog_offset_16;
+   if (wm_prog_data->no_8 || wm_prog_data->prog_offset_16) {
+      pipeline->ps_simd16 = kernel + wm_prog_data->prog_offset_16;
    } else {
       pipeline->ps_simd16 = NO_KERNEL;
    }
@@ -664,18 +674,18 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
    pipeline->ps_grf_start2 = 0;
    if (pipeline->ps_simd8 != NO_KERNEL) {
       pipeline->ps_ksp0 = pipeline->ps_simd8;
-      pipeline->ps_grf_start0 = prog_data->base.dispatch_grf_start_reg;
+      pipeline->ps_grf_start0 = wm_prog_data->base.dispatch_grf_start_reg;
       if (pipeline->ps_simd16 != NO_KERNEL) {
          pipeline->ps_ksp2 = pipeline->ps_simd16;
-         pipeline->ps_grf_start2 = prog_data->dispatch_grf_start_reg_16;
+         pipeline->ps_grf_start2 = wm_prog_data->dispatch_grf_start_reg_16;
       }
    } else if (pipeline->ps_simd16 != NO_KERNEL) {
       pipeline->ps_ksp0 = pipeline->ps_simd16;
-      pipeline->ps_grf_start0 = prog_data->dispatch_grf_start_reg_16;
+      pipeline->ps_grf_start0 = wm_prog_data->dispatch_grf_start_reg_16;
    }
 
    anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT,
-                                   &prog_data->base);
+                                   stage_prog_data);
 
    return VK_SUCCESS;
 }
@@ -690,7 +700,7 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
 {
    const struct brw_compiler *compiler =
       pipeline->device->instance->physicalDevice.compiler;
-   struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
+   const struct brw_stage_prog_data *stage_prog_data;
    struct brw_cs_prog_key key;
    uint32_t kernel;
    unsigned char sha1[20], *hash;
@@ -700,23 +710,23 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
    if (module->size > 0) {
       hash = sha1;
       anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info);
-      kernel = anv_pipeline_cache_search(cache, hash, prog_data);
+      kernel = anv_pipeline_cache_search(cache, hash, &stage_prog_data);
    } else {
       hash = NULL;
    }
 
    if (module->size == 0 || kernel == NO_KERNEL) {
-      memset(prog_data, 0, sizeof(*prog_data));
+      struct brw_cs_prog_data prog_data = { 0, };
 
-      prog_data->binding_table.work_groups_start = 0;
+      prog_data.binding_table.work_groups_start = 0;
 
       nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
                                              MESA_SHADER_COMPUTE, spec_info,
-                                             &prog_data->base);
+                                             &prog_data.base);
       if (nir == NULL)
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
-      prog_data->base.total_shared = nir->num_shared;
+      prog_data.base.total_shared = nir->num_shared;
 
       void *mem_ctx = ralloc_context(NULL);
 
@@ -725,23 +735,24 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
 
       unsigned code_size;
       const unsigned *shader_code =
-         brw_compile_cs(compiler, NULL, mem_ctx, &key, prog_data, nir,
+         brw_compile_cs(compiler, NULL, mem_ctx, &key, &prog_data, nir,
                         -1, &code_size, NULL);
       if (shader_code == NULL) {
          ralloc_free(mem_ctx);
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
       }
 
+      stage_prog_data = &prog_data.base;
       kernel = anv_pipeline_cache_upload_kernel(cache, hash,
                                                 shader_code, code_size,
-                                                prog_data, sizeof(*prog_data));
+                                                &stage_prog_data, sizeof(prog_data));
       ralloc_free(mem_ctx);
    }
 
    pipeline->cs_simd = kernel;
 
    anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE,
-                                   &prog_data->base);
+                                   stage_prog_data);
 
    return VK_SUCCESS;
 }
@@ -751,10 +762,12 @@ gen7_compute_urb_partition(struct anv_pipeline *pipeline)
 {
    const struct brw_device_info *devinfo = &pipeline->device->info;
    bool vs_present = pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT;
-   unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1;
+   unsigned vs_size = vs_present ?
+      get_vs_prog_data(pipeline)->base.urb_entry_size : 1;
    unsigned vs_entry_size_bytes = vs_size * 64;
    bool gs_present = pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT;
-   unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1;
+   unsigned gs_size = gs_present ?
+      get_gs_prog_data(pipeline)->base.urb_entry_size : 1;
    unsigned gs_entry_size_bytes = gs_size * 64;
 
    /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS):
@@ -1136,7 +1149,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
    if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) {
       /* Vertex is only optional if disable_vs is set */
       assert(extra->disable_vs);
-      memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data));
    }
 
    gen7_compute_urb_partition(pipeline);
@@ -1152,7 +1164,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
        */
       inputs_read = ~0ull;
    } else {
-      inputs_read = pipeline->vs_prog_data.inputs_read;
+      inputs_read = get_vs_prog_data(pipeline)->inputs_read;
    }
 
    pipeline->vb_used = 0;
index 48f36706fef139fe8296a304914433b27c90b9b9..024fdf7d5a9ab636d6aaf25824c132c672b90ebe 100644 (file)
@@ -110,7 +110,8 @@ anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
 
 uint32_t
 anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
-                          const unsigned char *sha1, void *prog_data)
+                          const unsigned char *sha1,
+                          const struct brw_stage_prog_data **prog_data)
 {
    const uint32_t mask = cache->table_size - 1;
    const uint32_t start = (*(uint32_t *) sha1);
@@ -126,7 +127,7 @@ anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
          cache->program_stream.block_pool->map + offset;
       if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
          if (prog_data)
-            memcpy(prog_data, entry->prog_data, entry->prog_data_size);
+            *prog_data = (const struct brw_stage_prog_data *) entry->prog_data;
 
          const uint32_t preamble_size =
             align_u32(sizeof(*entry) + entry->prog_data_size, 64);
@@ -198,17 +199,14 @@ uint32_t
 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
                                  const unsigned char *sha1,
                                  const void *kernel, size_t kernel_size,
-                                 const void *prog_data, size_t prog_data_size)
+                                 const struct brw_stage_prog_data **prog_data,
+                                 size_t prog_data_size)
 {
    pthread_mutex_lock(&cache->mutex);
    struct cache_entry *entry;
 
-   /* Meta pipelines don't have SPIR-V, so we can't hash them.
-    * Consequentally, they just don't get cached.
-    */
-   const uint32_t preamble_size = sha1 ?
-      align_u32(sizeof(*entry) + prog_data_size, 64) :
-      0;
+   const uint32_t preamble_size =
+      align_u32(sizeof(*entry) + prog_data_size, 64);
 
    const uint32_t size = preamble_size + kernel_size;
 
@@ -216,14 +214,16 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
    const struct anv_state state =
       anv_state_stream_alloc(&cache->program_stream, size, 64);
 
+   entry = state.map;
+   entry->prog_data_size = prog_data_size;
+   memcpy(entry->prog_data, *prog_data, prog_data_size);
+   *prog_data = (const struct brw_stage_prog_data *) entry->prog_data;
+   entry->kernel_size = kernel_size;
+
    if (sha1 && env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", false)) {
       assert(anv_pipeline_cache_search(cache, sha1, NULL) == NO_KERNEL);
-      entry = state.map;
-      memcpy(entry->sha1, sha1, sizeof(entry->sha1));
-      entry->prog_data_size = prog_data_size;
-      memcpy(entry->prog_data, prog_data, prog_data_size);
-      entry->kernel_size = kernel_size;
 
+      memcpy(entry->sha1, sha1, sizeof(entry->sha1));
       if (cache->kernel_count == cache->table_size / 2)
          anv_pipeline_cache_grow(cache);
 
@@ -285,9 +285,13 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
       const struct cache_entry *entry = p;
       const void *kernel = &entry->prog_data[entry->prog_data_size];
 
+      const struct brw_stage_prog_data *prog_data =
+         (const struct brw_stage_prog_data *) entry->prog_data;
+
       anv_pipeline_cache_upload_kernel(cache, entry->sha1,
                                        kernel, entry->kernel_size,
-                                       entry->prog_data, entry->prog_data_size);
+                                       &prog_data,
+                                       entry->prog_data_size);
       p = kernel + entry->kernel_size;
    }
 }
@@ -406,9 +410,12 @@ anv_pipeline_cache_merge(struct anv_pipeline_cache *dst,
 
       const void *kernel = (void *) entry +
          align_u32(sizeof(*entry) + entry->prog_data_size, 64);
+      const struct brw_stage_prog_data *prog_data =
+         (const struct brw_stage_prog_data *) entry->prog_data;
+
       anv_pipeline_cache_upload_kernel(dst, entry->sha1,
                                        kernel, entry->kernel_size,
-                                       entry->prog_data, entry->prog_data_size);
+                                       &prog_data, entry->prog_data_size);
    }
 }
 
index 32c8b13c95263e5193836f081001192e2a05f87d..70b6dd995a1d8472d4262db3952022dcf7762dab 100644 (file)
@@ -640,12 +640,13 @@ void anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
                              struct anv_device *device);
 void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache);
 uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
-                                   const unsigned char *sha1, void *prog_data);
+                                   const unsigned char *sha1,
+                                   const struct brw_stage_prog_data **prog_data);
 uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
                                           const unsigned char *sha1,
                                           const void *kernel,
                                           size_t kernel_size,
-                                          const void *prog_data,
+                                          const struct brw_stage_prog_data **prog_data,
                                           size_t prog_data_size);
 
 struct anv_device {
@@ -1404,12 +1405,8 @@ struct anv_pipeline {
 
    bool                                         use_repclear;
 
-   struct brw_vs_prog_data                      vs_prog_data;
-   struct brw_wm_prog_data                      wm_prog_data;
-   struct brw_gs_prog_data                      gs_prog_data;
-   struct brw_cs_prog_data                      cs_prog_data;
    bool                                         writes_point_size;
-   struct brw_stage_prog_data *                 prog_data[MESA_SHADER_STAGES];
+   const struct brw_stage_prog_data *           prog_data[MESA_SHADER_STAGES];
    uint32_t                                     scratch_start[MESA_SHADER_STAGES];
    uint32_t                                     total_scratch;
    struct {
@@ -1457,6 +1454,30 @@ struct anv_pipeline {
    } gen9;
 };
 
+static inline const struct brw_vs_prog_data *
+get_vs_prog_data(struct anv_pipeline *pipeline)
+{
+   return (const struct brw_vs_prog_data *) pipeline->prog_data[MESA_SHADER_VERTEX];
+}
+
+static inline const struct brw_gs_prog_data *
+get_gs_prog_data(struct anv_pipeline *pipeline)
+{
+   return (const struct brw_gs_prog_data *) pipeline->prog_data[MESA_SHADER_GEOMETRY];
+}
+
+static inline const struct brw_wm_prog_data *
+get_wm_prog_data(struct anv_pipeline *pipeline)
+{
+   return (const struct brw_wm_prog_data *) pipeline->prog_data[MESA_SHADER_FRAGMENT];
+}
+
+static inline const struct brw_cs_prog_data *
+get_cs_prog_data(struct anv_pipeline *pipeline)
+{
+   return (const struct brw_cs_prog_data *) pipeline->prog_data[MESA_SHADER_COMPUTE];
+}
+
 struct anv_graphics_pipeline_create_info {
    /**
     * If non-negative, overrides the color attachment count of the pipeline's
index 985907872fa8d4f644caabbb2cb0ab2efbf513af..8dce586eec7f259f9c932564d8a985a713e3dfbd 100644 (file)
@@ -283,7 +283,7 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
 
    struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer);
 
-   const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data;
+   const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
    const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
 
    unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
@@ -395,11 +395,12 @@ void
 genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
 {
    struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
+   const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
    VkResult result;
 
    assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
 
-   bool needs_slm = pipeline->cs_prog_data.base.total_shared > 0;
+   bool needs_slm = cs_prog_data->base.total_shared > 0;
    config_l3(cmd_buffer, needs_slm);
 
    if (cmd_buffer->state.current_pipeline != GPGPU) {
index 5235d399ce5665d4137c7eba258fb227dc833276..5f480edf809bfbead4793a43e9b36a58a58d0f96 100644 (file)
@@ -250,7 +250,7 @@ genX(graphics_pipeline_create)(
    anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK),
       .SampleMask                               = 0xff);
 
-   const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base;
+   const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
 
 #if 0 
    /* From gen7_vs_state.c */
@@ -277,18 +277,18 @@ genX(graphics_pipeline_create)(
       anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS),
          .KernelStartPointer                    = pipeline->vs_vec4,
          .ScratchSpaceBaseOffset                = pipeline->scratch_start[MESA_SHADER_VERTEX],
-         .PerThreadScratchSpace                 = scratch_space(&vue_prog_data->base),
+         .PerThreadScratchSpace                 = scratch_space(&vs_prog_data->base.base),
 
          .DispatchGRFStartRegisterforURBData    =
-            vue_prog_data->base.dispatch_grf_start_reg,
-         .VertexURBEntryReadLength              = vue_prog_data->urb_read_length,
+            vs_prog_data->base.base.dispatch_grf_start_reg,
+         .VertexURBEntryReadLength              = vs_prog_data->base.urb_read_length,
          .VertexURBEntryReadOffset              = 0,
 
          .MaximumNumberofThreads                = device->info.max_vs_threads - 1,
          .StatisticsEnable                      = true,
          .VSFunctionEnable                      = true);
 
-   const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data;
+   const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline);
 
    if (pipeline->gs_kernel == NO_KERNEL || (extra && extra->disable_vs)) {
       anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .GSEnable = false);
@@ -338,7 +338,7 @@ genX(graphics_pipeline_create)(
       anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS),
                      .MaximumNumberofThreads                   = device->info.max_wm_threads - 1);
    } else {
-      const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data;
+      const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
       if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 ||
           wm_prog_data->urb_setup[VARYING_SLOT_BFC1] != -1)
          anv_finishme("two-sided color needs sbe swizzling setup");
index 8d8775fb01dface6acc284276408236396ff1335..0d27c27f5b7add10175dc16b4f0b853654ef5994 100644 (file)
@@ -505,7 +505,7 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
 
    struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer);
 
-   const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data;
+   const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
    const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
 
    unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
@@ -558,11 +558,12 @@ void
 genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
 {
    struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
+   const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
    VkResult result;
 
    assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
 
-   bool needs_slm = pipeline->cs_prog_data.base.total_shared > 0;
+   bool needs_slm = cs_prog_data->base.total_shared > 0;
    config_l3(cmd_buffer, needs_slm);
 
    if (cmd_buffer->state.current_pipeline != GPGPU) {
index 4a5e86741890970a6d4b3d7d1d747435855a4663..5ce1307f09062cc159d778b271d4816c5ff5dfa4 100644 (file)
@@ -354,6 +354,7 @@ genX(graphics_pipeline_create)(
                   .MaximumPointWidth = 255.875,
                   .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1);
 
+   const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
    anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM),
                   .StatisticsEnable = true,
                   .LineEndCapAntialiasingRegionWidth = _05pixels,
@@ -363,15 +364,15 @@ genX(graphics_pipeline_create)(
                   .PointRasterizationRule = RASTRULE_UPPER_RIGHT,
                   .BarycentricInterpolationMode =
                      pipeline->ps_ksp0 == NO_KERNEL ?
-                     0 : pipeline->wm_prog_data.barycentric_interp_modes);
+                     0 : wm_prog_data->barycentric_interp_modes);
 
-   const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data;
-   offset = 1;
-   length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset;
-
-   if (pipeline->gs_kernel == NO_KERNEL)
+   if (pipeline->gs_kernel == NO_KERNEL) {
       anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .Enable = false);
-   else
+   } else {
+      const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline);
+      offset = 1;
+      length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset;
+
       anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS),
                      .SingleProgramFlow = false,
                      .KernelStartPointer = pipeline->gs_kernel,
@@ -412,11 +413,12 @@ genX(graphics_pipeline_create)(
 
                      .VertexURBEntryOutputReadOffset = offset,
                      .VertexURBEntryOutputLength = length);
+   }
 
-   const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base;
+   const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
    /* Skip the VUE header and position slots */
    offset = 1;
-   length = (vue_prog_data->vue_map.num_slots + 1) / 2 - offset;
+   length = (vs_prog_data->base.vue_map.num_slots + 1) / 2 - offset;
 
    uint32_t vs_start = pipeline->vs_simd8 != NO_KERNEL ? pipeline->vs_simd8 :
                                                          pipeline->vs_vec4;
@@ -435,7 +437,7 @@ genX(graphics_pipeline_create)(
                      .VectorMaskEnable = false,
                      .SamplerCount = 0,
                      .BindingTableEntryCount =
-                     vue_prog_data->base.binding_table.size_bytes / 4,
+                        vs_prog_data->base.base.binding_table.size_bytes / 4,
                      .ThreadDispatchPriority = false,
                      .FloatingPointMode = IEEE754,
                      .IllegalOpcodeExceptionEnable = false,
@@ -443,11 +445,11 @@ genX(graphics_pipeline_create)(
                      .SoftwareExceptionEnable = false,
 
                      .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_VERTEX],
-                     .PerThreadScratchSpace = scratch_space(&vue_prog_data->base),
+                     .PerThreadScratchSpace = scratch_space(&vs_prog_data->base.base),
 
                      .DispatchGRFStartRegisterForURBData =
-                     vue_prog_data->base.dispatch_grf_start_reg,
-                     .VertexURBEntryReadLength = vue_prog_data->urb_read_length,
+                        vs_prog_data->base.base.dispatch_grf_start_reg,
+                     .VertexURBEntryReadLength = vs_prog_data->base.urb_read_length,
                      .VertexURBEntryReadOffset = 0,
 
                      .MaximumNumberofThreads = device->info.max_vs_threads - 1,
@@ -461,8 +463,6 @@ genX(graphics_pipeline_create)(
                      .UserClipDistanceClipTestEnableBitmask = 0,
                      .UserClipDistanceCullTestEnableBitmask = 0);
 
-   const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data;
-
    const int num_thread_bias = GEN_GEN == 8 ? 2 : 1;
    if (pipeline->ps_ksp0 == NO_KERNEL) {
       anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS));
index 88cc13b580a9afdbd285e821ae62c7649af99793..2606a66f2a7dcf3798e984905e7c13f34413e128 100644 (file)
@@ -319,11 +319,11 @@ void genX(CmdDraw)(
 {
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
+   const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
 
    genX(cmd_buffer_flush_state)(cmd_buffer);
 
-   if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex ||
-       cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance)
+   if (vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance)
       emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance);
 
    anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE),
@@ -346,11 +346,11 @@ void genX(CmdDrawIndexed)(
 {
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
+   const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
 
    genX(cmd_buffer_flush_state)(cmd_buffer);
 
-   if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex ||
-       cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance)
+   if (vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance)
       emit_base_vertex_instance(cmd_buffer, vertexOffset, firstInstance);
 
    anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE),
@@ -398,13 +398,13 @@ void genX(CmdDrawIndirect)(
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
    struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
+   const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
    struct anv_bo *bo = buffer->bo;
    uint32_t bo_offset = buffer->offset + offset;
 
    genX(cmd_buffer_flush_state)(cmd_buffer);
 
-   if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex ||
-       cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance)
+   if (vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance)
       emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 8);
 
    emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
@@ -429,14 +429,14 @@ void genX(CmdDrawIndexedIndirect)(
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
    struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
+   const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
    struct anv_bo *bo = buffer->bo;
    uint32_t bo_offset = buffer->offset + offset;
 
    genX(cmd_buffer_flush_state)(cmd_buffer);
 
    /* TODO: We need to stomp base vertex to 0 somehow */
-   if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex ||
-       cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance)
+   if (vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance)
       emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 12);
 
    emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
@@ -460,7 +460,7 @@ void genX(CmdDispatch)(
 {
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
-   struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
+   const struct brw_cs_prog_data *prog_data = get_cs_prog_data(pipeline);
 
    if (prog_data->uses_num_work_groups) {
       struct anv_state state =
@@ -507,7 +507,7 @@ void genX(CmdDispatchIndirect)(
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
    struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
-   struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
+   const struct brw_cs_prog_data *prog_data = get_cs_prog_data(pipeline);
    struct anv_bo *bo = buffer->bo;
    uint32_t bo_offset = buffer->offset + offset;
    struct anv_batch *batch = &cmd_buffer->batch;
index 1605661f9715ef75c62c1eaaffed71404f75d89b..cc8841ea8a0afe40f70b110dd1d864d96a1852ec 100644 (file)
@@ -82,7 +82,7 @@ genX(compute_pipeline_create)(
 
    pipeline->use_repclear = false;
 
-   const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data;
+   const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
    const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
 
    unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
index 28b2a032c463558b7eba29e656b4a9e257851b76..cd138dfae61471a6c697a928153f066d2e52bda5 100644 (file)
@@ -52,6 +52,8 @@ emit_vertex_input(struct anv_pipeline *pipeline,
                   const VkPipelineVertexInputStateCreateInfo *info,
                   const struct anv_graphics_pipeline_create_info *extra)
 {
+   const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
+
    uint32_t elements;
    if (extra && extra->disable_vs) {
       /* If the VS is disabled, just assume the user knows what they're
@@ -63,7 +65,7 @@ emit_vertex_input(struct anv_pipeline *pipeline,
          elements |= (1 << info->pVertexAttributeDescriptions[i].location);
    } else {
       /* Pull inputs_read out of the VS prog data */
-      uint64_t inputs_read = pipeline->vs_prog_data.inputs_read;
+      uint64_t inputs_read = vs_prog_data->inputs_read;
       assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0);
       elements = inputs_read >> VERT_ATTRIB_GENERIC0;
    }
@@ -72,16 +74,16 @@ emit_vertex_input(struct anv_pipeline *pipeline,
    /* On BDW+, we only need to allocate space for base ids.  Setting up
     * the actual vertex and instance id is a separate packet.
     */
-   const bool needs_svgs_elem = pipeline->vs_prog_data.uses_basevertex ||
-                                pipeline->vs_prog_data.uses_baseinstance;
+   const bool needs_svgs_elem = vs_prog_data->uses_basevertex ||
+                                vs_prog_data->uses_baseinstance;
 #else
    /* On Haswell and prior, vertex and instance id are created by using the
     * ComponentControl fields, so we need an element for any of them.
     */
-   const bool needs_svgs_elem = pipeline->vs_prog_data.uses_vertexid ||
-                                pipeline->vs_prog_data.uses_instanceid ||
-                                pipeline->vs_prog_data.uses_basevertex ||
-                                pipeline->vs_prog_data.uses_baseinstance;
+   const bool needs_svgs_elem = vs_prog_data->uses_vertexid ||
+                                vs_prog_data->uses_instanceid ||
+                                vs_prog_data->uses_basevertex ||
+                                vs_prog_data->uses_baseinstance;
 #endif
 
    uint32_t elem_count = __builtin_popcount(elements) + needs_svgs_elem;
@@ -148,8 +150,8 @@ emit_vertex_input(struct anv_pipeline *pipeline,
        * This means, that if we have BaseInstance, we need BaseVertex as
        * well.  Just do all or nothing.
        */
-      uint32_t base_ctrl = (pipeline->vs_prog_data.uses_basevertex ||
-                            pipeline->vs_prog_data.uses_baseinstance) ?
+      uint32_t base_ctrl = (vs_prog_data->uses_basevertex ||
+                            vs_prog_data->uses_baseinstance) ?
                            VFCOMP_STORE_SRC : VFCOMP_STORE_0;
 
       struct GENX(VERTEX_ELEMENT_STATE) element = {
@@ -171,10 +173,10 @@ emit_vertex_input(struct anv_pipeline *pipeline,
 
 #if GEN_GEN >= 8
    anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS),
-                  .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid,
+                  .VertexIDEnable = vs_prog_data->uses_vertexid,
                   .VertexIDComponentNumber = 2,
                   .VertexIDElementOffset = id_slot,
-                  .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid,
+                  .InstanceIDEnable = vs_prog_data->uses_instanceid,
                   .InstanceIDComponentNumber = 3,
                   .InstanceIDElementOffset = id_slot);
 #endif
@@ -222,17 +224,21 @@ emit_urb_setup(struct anv_pipeline *pipeline)
 static void
 emit_3dstate_sbe(struct anv_pipeline *pipeline)
 {
+   const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
+   const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline);
+   const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
    const struct brw_vue_map *fs_input_map;
+
    if (pipeline->gs_kernel == NO_KERNEL)
-      fs_input_map = &pipeline->vs_prog_data.base.vue_map;
+      fs_input_map = &vs_prog_data->base.vue_map;
    else
-      fs_input_map = &pipeline->gs_prog_data.base.vue_map;
+      fs_input_map = &gs_prog_data->base.vue_map;
 
    struct GENX(3DSTATE_SBE) sbe = {
       GENX(3DSTATE_SBE_header),
       .AttributeSwizzleEnable = true,
       .PointSpriteTextureCoordinateOrigin = UPPERLEFT,
-      .NumberofSFOutputAttributes = pipeline->wm_prog_data.num_varying_inputs,
+      .NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs,
 
 #if GEN_GEN >= 9
       .Attribute0ActiveComponentFormat = ACF_XYZW,
@@ -283,7 +289,7 @@ emit_3dstate_sbe(struct anv_pipeline *pipeline)
 
    int max_source_attr = 0;
    for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
-      int input_index = pipeline->wm_prog_data.urb_setup[attr];
+      int input_index = wm_prog_data->urb_setup[attr];
 
       if (input_index < 0)
          continue;