radv: add an assertion in radv_gfx10_compute_bin_size()

[mesa.git] / src / amd / vulkan / radv_pipeline.c
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c

index 0a5d94fd5f3d1eca7ff8114f141777cc6c62330b..9c83e22fda2c197bda5f2df7b43f0898614cccf8 100644 (file)
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -90,23 +90,6 @@ struct radv_tessellation_state {
         uint32_t tf_param;
  };
  
-struct radv_gs_state {
-       uint32_t vgt_gs_onchip_cntl;
-       uint32_t vgt_gs_max_prims_per_subgroup;
-       uint32_t vgt_esgs_ring_itemsize;
-       uint32_t lds_size;
-};
-
-struct radv_ngg_state {
-       uint16_t ngg_emit_size; /* in dwords */
-       uint32_t hw_max_esverts;
-       uint32_t max_gsprims;
-       uint32_t max_out_verts;
-       uint32_t prim_amp_factor;
-       uint32_t vgt_esgs_ring_itemsize;
-       bool max_vert_out_per_gs_instance;
-};
-
  bool radv_pipeline_has_ngg(const struct radv_pipeline *pipeline)
  {
         struct radv_shader_variant *variant = NULL;
@@ -184,6 +167,8 @@ static uint32_t get_hash_flags(struct radv_device *device)
                 hash_flags |= RADV_HASH_SHADER_PS_WAVE32;
         if (device->physical_device->ge_wave_size == 32)
                 hash_flags |= RADV_HASH_SHADER_GE_WAVE32;
+       if (device->physical_device->use_aco)
+               hash_flags |= RADV_HASH_SHADER_ACO;
         return hash_flags;
  }
  
@@ -1077,8 +1062,8 @@ radv_pipeline_out_of_order_rast(struct radv_pipeline *pipeline,
                  * except when early Z/S tests are requested.
                  */
                 if (ps &&
-                   ps->info.info.ps.writes_memory &&
-                   ps->info.fs.early_fragment_test &&
+                   ps->info.ps.writes_memory &&
+                   ps->info.ps.early_fragment_test &&
                     !dsa_order_invariant.pass_set)
                         return false;
  
@@ -1129,7 +1114,7 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline,
  
         if (vkms)
                 ps_iter_samples = radv_pipeline_get_ps_iter_samples(vkms);
-       if (vkms && !vkms->sampleShadingEnable && pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.force_persample) {
+       if (vkms && !vkms->sampleShadingEnable && pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.force_persample) {
                 ps_iter_samples = ms->num_samples;
         }
  
@@ -1510,19 +1495,21 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
         pipeline->dynamic_state.mask = states;
  }
  
-static struct radv_gs_state
-calculate_gs_info(const VkGraphicsPipelineCreateInfo *pCreateInfo,
-                       const struct radv_pipeline *pipeline)
+static void
+gfx9_get_gs_info(const VkGraphicsPipelineCreateInfo *pCreateInfo,
+                 const struct radv_pipeline *pipeline,
+                nir_shader **nir,
+                struct radv_shader_info *infos,
+                struct gfx9_gs_info *out)
  {
-       struct radv_gs_state gs = {0};
-       struct radv_shader_variant_info *gs_info = &pipeline->shaders[MESA_SHADER_GEOMETRY]->info;
+       struct radv_shader_info *gs_info = &infos[MESA_SHADER_GEOMETRY];
         struct radv_es_output_info *es_info;
         if (pipeline->device->physical_device->rad_info.chip_class >= GFX9)
-               es_info = radv_pipeline_has_tess(pipeline) ? &gs_info->tes.es_info : &gs_info->vs.es_info;
+               es_info = nir[MESA_SHADER_TESS_CTRL] ? &gs_info->tes.es_info : &gs_info->vs.es_info;
         else
-               es_info = radv_pipeline_has_tess(pipeline) ?
-                       &pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.es_info :
-                       &pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.es_info;
+               es_info = nir[MESA_SHADER_TESS_CTRL] ?
+                       &infos[MESA_SHADER_TESS_EVAL].tes.es_info :
+                       &infos[MESA_SHADER_VERTEX].vs.es_info;
  
         unsigned gs_num_invocations = MAX2(gs_info->gs.invocations, 1);
         bool uses_adjacency;
@@ -1621,15 +1608,13 @@ calculate_gs_info(const VkGraphicsPipelineCreateInfo *pCreateInfo,
         uint32_t gs_prims_per_subgroup = gs_prims;
         uint32_t gs_inst_prims_in_subgroup = gs_prims * gs_num_invocations;
         uint32_t max_prims_per_subgroup = gs_inst_prims_in_subgroup * gs_info->gs.vertices_out;
-       gs.lds_size = align(esgs_lds_size, 128) / 128;
-       gs.vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(es_verts_per_subgroup) |
+       out->lds_size = align(esgs_lds_size, 128) / 128;
+       out->vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(es_verts_per_subgroup) |
                                 S_028A44_GS_PRIMS_PER_SUBGRP(gs_prims_per_subgroup) |
                                 S_028A44_GS_INST_PRIMS_IN_SUBGRP(gs_inst_prims_in_subgroup);
-       gs.vgt_gs_max_prims_per_subgroup = S_028A94_MAX_PRIMS_PER_SUBGROUP(max_prims_per_subgroup);
-       gs.vgt_esgs_ring_itemsize  = esgs_itemsize;
+       out->vgt_gs_max_prims_per_subgroup = S_028A94_MAX_PRIMS_PER_SUBGROUP(max_prims_per_subgroup);
+       out->vgt_esgs_ring_itemsize  = esgs_itemsize;
         assert(max_prims_per_subgroup <= max_out_prims);
-
-       return gs;
  }
  
  static void clamp_gsprims_to_esverts(unsigned *max_gsprims, unsigned max_esverts,
@@ -1642,21 +1627,20 @@ static void clamp_gsprims_to_esverts(unsigned *max_gsprims, unsigned max_esverts
  }
  
  static unsigned
-radv_get_num_input_vertices(struct radv_pipeline *pipeline)
+radv_get_num_input_vertices(nir_shader **nir)
  {
-       if (radv_pipeline_has_gs(pipeline)) {
-               struct radv_shader_variant *gs =
-                       radv_get_shader(pipeline, MESA_SHADER_GEOMETRY);
+       if (nir[MESA_SHADER_GEOMETRY]) {
+               nir_shader *gs = nir[MESA_SHADER_GEOMETRY];
  
                 return gs->info.gs.vertices_in;
         }
  
-       if (radv_pipeline_has_tess(pipeline)) {
-               struct radv_shader_variant *tes = radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL);
+       if (nir[MESA_SHADER_TESS_CTRL]) {
+               nir_shader *tes = nir[MESA_SHADER_TESS_EVAL];
  
-               if (tes->info.tes.point_mode)
+               if (tes->info.tess.point_mode)
                         return 1;
-               if (tes->info.tes.primitive_mode == GL_ISOLINES)
+               if (tes->info.tess.primitive_mode == GL_ISOLINES)
                         return 2;
                 return 3;
         }
@@ -1664,19 +1648,21 @@ radv_get_num_input_vertices(struct radv_pipeline *pipeline)
         return 3;
  }
  
-static struct radv_ngg_state
-calculate_ngg_info(const VkGraphicsPipelineCreateInfo *pCreateInfo,
-                  struct radv_pipeline *pipeline)
+static void
+gfx10_get_ngg_info(const VkGraphicsPipelineCreateInfo *pCreateInfo,
+                  struct radv_pipeline *pipeline,
+                  nir_shader **nir,
+                  struct radv_shader_info *infos,
+                  struct gfx10_ngg_info *ngg)
  {
-       struct radv_ngg_state ngg = {0};
-       struct radv_shader_variant_info *gs_info = &pipeline->shaders[MESA_SHADER_GEOMETRY]->info;
+       struct radv_shader_info *gs_info = &infos[MESA_SHADER_GEOMETRY];
         struct radv_es_output_info *es_info =
-               radv_pipeline_has_tess(pipeline) ? &gs_info->tes.es_info : &gs_info->vs.es_info;
-       unsigned gs_type = radv_pipeline_has_gs(pipeline) ? MESA_SHADER_GEOMETRY : MESA_SHADER_VERTEX;
-       unsigned max_verts_per_prim = radv_get_num_input_vertices(pipeline);
+               nir[MESA_SHADER_TESS_CTRL] ? &gs_info->tes.es_info : &gs_info->vs.es_info;
+       unsigned gs_type = nir[MESA_SHADER_GEOMETRY] ? MESA_SHADER_GEOMETRY : MESA_SHADER_VERTEX;
+       unsigned max_verts_per_prim = radv_get_num_input_vertices(nir);
         unsigned min_verts_per_prim =
                 gs_type == MESA_SHADER_GEOMETRY ? max_verts_per_prim : 1;
-       unsigned gs_num_invocations = radv_pipeline_has_gs(pipeline) ? MAX2(gs_info->gs.invocations, 1) : 1;
+       unsigned gs_num_invocations = nir[MESA_SHADER_GEOMETRY] ? MAX2(gs_info->gs.invocations, 1) : 1;
         bool uses_adjacency;
         switch(pCreateInfo->pInputAssemblyState->topology) {
         case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
@@ -1694,17 +1680,11 @@ calculate_ngg_info(const VkGraphicsPipelineCreateInfo *pCreateInfo,
         /* We can't allow using the whole LDS, because GS waves compete with
          * other shader stages for LDS space.
          *
-        * Streamout can increase the ESGS buffer size later on, so be more
-        * conservative with streamout and use 4K dwords. This may be suboptimal.
-        *
-        * Otherwise, use the limit of 7K dwords. The reason is that we need
-        * to leave some headroom for the max_esverts increase at the end.
-        *
          * TODO: We should really take the shader's internal LDS use into
          *       account. The linker will fail if the size is greater than
          *       8K dwords.
          */
-       const unsigned max_lds_size = (0 /*gs_info->info.so.num_outputs*/ ? 4 : 7) * 1024 - 128;
+       const unsigned max_lds_size = 8 * 1024 - 768;
         const unsigned target_lds_size = max_lds_size;
         unsigned esvert_lds_size = 0;
         unsigned gsprim_lds_size = 0;
@@ -1745,12 +1725,22 @@ calculate_ngg_info(const VkGraphicsPipelineCreateInfo *pCreateInfo,
                 esvert_lds_size = es_info->esgs_itemsize / 4;
                 gsprim_lds_size = (gs_info->gs.gsvs_vertex_size / 4 + 1) * max_out_verts_per_gsprim;
         } else {
-               /* TODO: This needs to be adjusted once LDS use for compaction
-                * after culling is implemented. */
-               /*
-               if (es_info->info.so.num_outputs)
-                       esvert_lds_size = 4 * es_info->info.so.num_outputs + 1;
-               */
+               /* VS and TES. */
+               /* LDS size for passing data from GS to ES. */
+               struct radv_streamout_info *so_info = nir[MESA_SHADER_TESS_CTRL]
+                       ? &infos[MESA_SHADER_TESS_EVAL].so
+                       : &infos[MESA_SHADER_VERTEX].so;
+
+               if (so_info->num_outputs)
+                       esvert_lds_size = 4 * so_info->num_outputs + 1;
+
+               /* GS stores Primitive IDs (one DWORD) into LDS at the address
+                * corresponding to the ES thread of the provoking vertex. All
+                * ES threads load and export PrimitiveID for their thread.
+                */
+               if (!nir[MESA_SHADER_TESS_CTRL] &&
+                   infos[MESA_SHADER_VERTEX].vs.outinfo.export_prim_id)
+                       esvert_lds_size = MAX2(esvert_lds_size, 1);
         }
  
         unsigned max_gsprims = max_gsprims_base;
@@ -1837,28 +1827,28 @@ calculate_ngg_info(const VkGraphicsPipelineCreateInfo *pCreateInfo,
          * this check passes, there is enough space for a full primitive without
          * vertex reuse.
          */
-       ngg.hw_max_esverts = max_esverts - max_verts_per_prim + 1;
-       ngg.max_gsprims = max_gsprims;
-       ngg.max_out_verts = max_out_vertices;
-       ngg.prim_amp_factor = prim_amp_factor;
-       ngg.max_vert_out_per_gs_instance = max_vert_out_per_gs_instance;
-       ngg.ngg_emit_size = max_gsprims * gsprim_lds_size;
+       ngg->hw_max_esverts = max_esverts - max_verts_per_prim + 1;
+       ngg->max_gsprims = max_gsprims;
+       ngg->max_out_verts = max_out_vertices;
+       ngg->prim_amp_factor = prim_amp_factor;
+       ngg->max_vert_out_per_gs_instance = max_vert_out_per_gs_instance;
+       ngg->ngg_emit_size = max_gsprims * gsprim_lds_size;
+       ngg->esgs_ring_size = 4 * max_esverts * esvert_lds_size;
  
         if (gs_type == MESA_SHADER_GEOMETRY) {
-               ngg.vgt_esgs_ring_itemsize = es_info->esgs_itemsize / 4;
+               ngg->vgt_esgs_ring_itemsize = es_info->esgs_itemsize / 4;
         } else {
-               ngg.vgt_esgs_ring_itemsize = 1;
+               ngg->vgt_esgs_ring_itemsize = 1;
         }
  
-       pipeline->graphics.esgs_ring_size = 4 * max_esverts * esvert_lds_size;
-
-       assert(ngg.hw_max_esverts >= 24); /* HW limitation */
+       pipeline->graphics.esgs_ring_size = ngg->esgs_ring_size;
  
-       return ngg;
+       assert(ngg->hw_max_esverts >= 24); /* HW limitation */
  }
  
  static void
-calculate_gs_ring_sizes(struct radv_pipeline *pipeline, const struct radv_gs_state *gs)
+calculate_gs_ring_sizes(struct radv_pipeline *pipeline,
+                       const struct gfx9_gs_info *gs)
  {
         struct radv_device *device = pipeline->device;
         unsigned num_se = device->physical_device->rad_info.max_se;
@@ -1872,7 +1862,7 @@ calculate_gs_ring_sizes(struct radv_pipeline *pipeline, const struct radv_gs_sta
         unsigned alignment = 256 * num_se;
         /* The maximum size is 63.999 MB per SE. */
         unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
-       struct radv_shader_variant_info *gs_info = &pipeline->shaders[MESA_SHADER_GEOMETRY]->info;
+       struct radv_shader_info *gs_info = &pipeline->shaders[MESA_SHADER_GEOMETRY]->info;
  
         /* Calculate the minimum size. */
         unsigned min_esgs_ring_size = align(gs->vgt_esgs_ring_itemsize * 4 * gs_vertex_reuse *
@@ -2273,6 +2263,9 @@ radv_generate_graphics_pipeline_key(struct radv_pipeline *pipeline,
         if (pipeline->device->physical_device->rad_info.chip_class < GFX8)
                 radv_pipeline_compute_get_int_clamp(pCreateInfo, &key.is_int8, &key.is_int10);
  
+       if (pipeline->device->physical_device->rad_info.chip_class >= GFX10)
+               key.topology = pCreateInfo->pInputAssemblyState->topology;
+
         return key;
  }
  
@@ -2302,6 +2295,7 @@ radv_fill_shader_keys(struct radv_device *device,
                 keys[MESA_SHADER_VERTEX].vs.vertex_attribute_offsets[i] = key->vertex_attribute_offsets[i];
                 keys[MESA_SHADER_VERTEX].vs.vertex_attribute_strides[i] = key->vertex_attribute_strides[i];
         }
+       keys[MESA_SHADER_VERTEX].vs.outprim = si_conv_prim_to_gs_out(key->topology);
  
         if (nir[MESA_SHADER_TESS_CTRL]) {
                 keys[MESA_SHADER_VERTEX].vs_common_out.as_ls = true;
@@ -2319,9 +2313,7 @@ radv_fill_shader_keys(struct radv_device *device,
                         keys[MESA_SHADER_VERTEX].vs_common_out.as_es = true;
         }
  
-       if (device->physical_device->rad_info.chip_class >= GFX10 &&
-           device->physical_device->rad_info.family != CHIP_NAVI14 &&
-           !(device->instance->debug_flags & RADV_DEBUG_NO_NGG)) {
+       if (device->physical_device->use_ngg) {
                 if (nir[MESA_SHADER_TESS_CTRL]) {
                         keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = true;
                 } else {
@@ -2345,9 +2337,6 @@ radv_fill_shader_keys(struct radv_device *device,
                  * issues still:
                  *   * GS primitives in pipeline statistic queries do not get
                  *     updates. See dEQP-VK.query_pool.statistics_query.geometry_shader_primitives
-                *   * dEQP-VK.clipping.user_defined.clip_cull_distance_dynamic_index.*geom* failures
-                *   * Interactions with tessellation failing:
-                *     dEQP-VK.tessellation.geometry_interaction.passthrough.tessellate_isolines_passthrough_geometry_no_change
                  *   * General issues with the last primitive missing/corrupt:
                  *     https://bugs.freedesktop.org/show_bug.cgi?id=111248
                  *
@@ -2360,20 +2349,21 @@ radv_fill_shader_keys(struct radv_device *device,
                                 keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
                 }
  
-               /* TODO: Implement streamout support for NGG. */
-               gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX;
+               if (!device->physical_device->use_ngg_streamout) {
+                       gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX;
  
-               for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
-                       if (nir[i])
-                               last_xfb_stage = i;
-               }
+                       for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
+                               if (nir[i])
+                                       last_xfb_stage = i;
+                       }
  
-               if (nir[last_xfb_stage] &&
-                   radv_nir_stage_uses_xfb(nir[last_xfb_stage])) {
-                       if (nir[MESA_SHADER_TESS_CTRL])
-                               keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
-                       else
-                               keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
+                       if (nir[last_xfb_stage] &&
+                           radv_nir_stage_uses_xfb(nir[last_xfb_stage])) {
+                               if (nir[MESA_SHADER_TESS_CTRL])
+                                       keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
+                               else
+                                       keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
+                       }
                 }
         }
  
@@ -2387,6 +2377,107 @@ radv_fill_shader_keys(struct radv_device *device,
         keys[MESA_SHADER_FRAGMENT].fs.num_samples = key->num_samples;
  }
  
+static void
+radv_fill_shader_info(struct radv_pipeline *pipeline,
+                     struct radv_shader_variant_key *keys,
+                      struct radv_shader_info *infos,
+                      nir_shader **nir)
+{
+       unsigned active_stages = 0;
+       unsigned filled_stages = 0;
+
+       for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+               if (nir[i])
+                       active_stages |= (1 << i);
+       }
+
+       if (nir[MESA_SHADER_FRAGMENT]) {
+               radv_nir_shader_info_init(&infos[MESA_SHADER_FRAGMENT]);
+               radv_nir_shader_info_pass(nir[MESA_SHADER_FRAGMENT],
+                                         pipeline->layout,
+                                         &keys[MESA_SHADER_FRAGMENT],
+                                         &infos[MESA_SHADER_FRAGMENT]);
+
+               /* TODO: These are no longer used as keys we should refactor this */
+               keys[MESA_SHADER_VERTEX].vs_common_out.export_prim_id =
+                       infos[MESA_SHADER_FRAGMENT].ps.prim_id_input;
+               keys[MESA_SHADER_VERTEX].vs_common_out.export_layer_id =
+                       infos[MESA_SHADER_FRAGMENT].ps.layer_input;
+               keys[MESA_SHADER_VERTEX].vs_common_out.export_clip_dists =
+                       !!infos[MESA_SHADER_FRAGMENT].ps.num_input_clips_culls;
+               keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_prim_id =
+                       infos[MESA_SHADER_FRAGMENT].ps.prim_id_input;
+               keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_layer_id =
+                       infos[MESA_SHADER_FRAGMENT].ps.layer_input;
+               keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_clip_dists =
+                       !!infos[MESA_SHADER_FRAGMENT].ps.num_input_clips_culls;
+
+               filled_stages |= (1 << MESA_SHADER_FRAGMENT);
+       }
+
+       if (pipeline->device->physical_device->rad_info.chip_class >= GFX9 &&
+           nir[MESA_SHADER_TESS_CTRL]) {
+               struct nir_shader *combined_nir[] = {nir[MESA_SHADER_VERTEX], nir[MESA_SHADER_TESS_CTRL]};
+               struct radv_shader_variant_key key = keys[MESA_SHADER_TESS_CTRL];
+               key.tcs.vs_key = keys[MESA_SHADER_VERTEX].vs;
+
+               radv_nir_shader_info_init(&infos[MESA_SHADER_TESS_CTRL]);
+
+               for (int i = 0; i < 2; i++) {
+                       radv_nir_shader_info_pass(combined_nir[i],
+                                                 pipeline->layout, &key,
+                                                 &infos[MESA_SHADER_TESS_CTRL]);
+               }
+
+               keys[MESA_SHADER_TESS_EVAL].tes.num_patches =
+                       infos[MESA_SHADER_TESS_CTRL].tcs.num_patches;
+               keys[MESA_SHADER_TESS_EVAL].tes.tcs_num_outputs =
+                       util_last_bit64(infos[MESA_SHADER_TESS_CTRL].tcs.outputs_written);
+
+               filled_stages |= (1 << MESA_SHADER_VERTEX);
+               filled_stages |= (1 << MESA_SHADER_TESS_CTRL);
+       }
+
+       if (pipeline->device->physical_device->rad_info.chip_class >= GFX9 &&
+           nir[MESA_SHADER_GEOMETRY]) {
+               gl_shader_stage pre_stage = nir[MESA_SHADER_TESS_EVAL] ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
+               struct nir_shader *combined_nir[] = {nir[pre_stage], nir[MESA_SHADER_GEOMETRY]};
+
+               radv_nir_shader_info_init(&infos[MESA_SHADER_GEOMETRY]);
+
+               for (int i = 0; i < 2; i++) {
+                       radv_nir_shader_info_pass(combined_nir[i],
+                                                 pipeline->layout,
+                                                 &keys[pre_stage],
+                                                 &infos[MESA_SHADER_GEOMETRY]);
+               }
+
+               filled_stages |= (1 << pre_stage);
+               filled_stages |= (1 << MESA_SHADER_GEOMETRY);
+       }
+
+       active_stages ^= filled_stages;
+       while (active_stages) {
+               int i = u_bit_scan(&active_stages);
+
+               if (i == MESA_SHADER_TESS_CTRL) {
+                       keys[MESA_SHADER_TESS_CTRL].tcs.num_inputs =
+                               util_last_bit64(infos[MESA_SHADER_VERTEX].vs.ls_outputs_written);
+               }
+
+               if (i == MESA_SHADER_TESS_EVAL) {
+                       keys[MESA_SHADER_TESS_EVAL].tes.num_patches =
+                               infos[MESA_SHADER_TESS_CTRL].tcs.num_patches;
+                       keys[MESA_SHADER_TESS_EVAL].tes.tcs_num_outputs =
+                               util_last_bit64(infos[MESA_SHADER_TESS_CTRL].tcs.outputs_written);
+               }
+
+               radv_nir_shader_info_init(&infos[i]);
+               radv_nir_shader_info_pass(nir[i], pipeline->layout,
+                                         &keys[i], &infos[i]);
+       }
+}
+
  static void
  merge_tess_info(struct shader_info *tes_info,
                  const struct shader_info *tcs_info)
@@ -2464,6 +2555,14 @@ void radv_stop_feedback(VkPipelineCreationFeedbackEXT *feedback, bool cache_hit)
                            (cache_hit ? VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT : 0);
  }
  
+static
+bool radv_aco_supported_stage(gl_shader_stage stage, bool has_gs, bool has_ts)
+{
+       return (stage == MESA_SHADER_VERTEX && !has_gs && !has_ts) ||
+              stage == MESA_SHADER_FRAGMENT ||
+              stage == MESA_SHADER_COMPUTE;
+}
+
  static
  void radv_create_shaders(struct radv_pipeline *pipeline,
                           struct radv_device *device,
@@ -2471,6 +2570,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                           const struct radv_pipeline_key *key,
                           const VkPipelineShaderStageCreateInfo **pStages,
                           const VkPipelineCreateFlags flags,
+                         const VkGraphicsPipelineCreateInfo *pCreateInfo,
                           VkPipelineCreationFeedbackEXT *pipeline_feedback,
                           VkPipelineCreationFeedbackEXT **stage_feedbacks)
  {
@@ -2479,6 +2579,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
         nir_shader *nir[MESA_SHADER_STAGES] = {0};
         struct radv_shader_binary *binaries[MESA_SHADER_STAGES] = {NULL};
         struct radv_shader_variant_key keys[MESA_SHADER_STAGES] = {{{{{0}}}}};
+       struct radv_shader_info infos[MESA_SHADER_STAGES] = {0};
         unsigned char hash[20], gs_copy_hash[20];
         bool keep_executable_info = (flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR) || device->keep_shader_info;
  
@@ -2524,6 +2625,10 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                 modules[MESA_SHADER_FRAGMENT] = &fs_m;
         }
  
+       bool has_gs = modules[MESA_SHADER_GEOMETRY];
+       bool has_ts = modules[MESA_SHADER_TESS_CTRL] || modules[MESA_SHADER_TESS_EVAL];
+       bool use_aco = device->physical_device->use_aco;
+
         for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
                 const VkPipelineShaderStageCreateInfo *stage = pStages[i];
  
@@ -2532,10 +2637,11 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
  
                 radv_start_feedback(stage_feedbacks[i]);
  
+               bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
                 nir[i] = radv_shader_compile_to_nir(device, modules[i],
                                                     stage ? stage->pName : "main", i,
                                                     stage ? stage->pSpecializationInfo : NULL,
-                                                   flags, pipeline->layout);
+                                                   flags, pipeline->layout, aco);
  
                 /* We don't want to alter meta shaders IR directly so clone it
                  * first.
@@ -2562,40 +2668,72 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                                            nir_lower_non_uniform_ssbo_access |
                                            nir_lower_non_uniform_texture_access |
                                            nir_lower_non_uniform_image_access);
-                       NIR_PASS_V(nir[i], nir_lower_bool_to_int32);
+
+                       bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
+                       if (!aco)
+                               NIR_PASS_V(nir[i], nir_lower_bool_to_int32);
                 }
  
                 if (radv_can_dump_shader(device, modules[i], false))
                         nir_print_shader(nir[i], stderr);
         }
  
+       if (nir[MESA_SHADER_FRAGMENT])
+               radv_lower_fs_io(nir[MESA_SHADER_FRAGMENT]);
+
         radv_fill_shader_keys(device, keys, key, nir);
  
+       radv_fill_shader_info(pipeline, keys, infos, nir);
+
+       if ((nir[MESA_SHADER_VERTEX] &&
+            keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg) ||
+           (nir[MESA_SHADER_TESS_EVAL] &&
+            keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg)) {
+               struct gfx10_ngg_info *ngg_info;
+
+               if (nir[MESA_SHADER_GEOMETRY])
+                       ngg_info = &infos[MESA_SHADER_GEOMETRY].ngg_info;
+               else if (nir[MESA_SHADER_TESS_CTRL])
+                       ngg_info = &infos[MESA_SHADER_TESS_EVAL].ngg_info;
+               else
+                       ngg_info = &infos[MESA_SHADER_VERTEX].ngg_info;
+
+               gfx10_get_ngg_info(pCreateInfo, pipeline, nir, infos, ngg_info);
+       } else if (nir[MESA_SHADER_GEOMETRY]) {
+               struct gfx9_gs_info *gs_info =
+                       &infos[MESA_SHADER_GEOMETRY].gs_ring_info;
+
+               gfx9_get_gs_info(pCreateInfo, pipeline, nir, infos, gs_info);
+       }
+
         if (nir[MESA_SHADER_FRAGMENT]) {
                 if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) {
                         radv_start_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT]);
  
+                       bool aco = use_aco && radv_aco_supported_stage(MESA_SHADER_FRAGMENT, has_gs, has_ts);
                         pipeline->shaders[MESA_SHADER_FRAGMENT] =
                                radv_shader_variant_compile(device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1,
                                                           pipeline->layout, keys + MESA_SHADER_FRAGMENT,
-                                                         keep_executable_info, &binaries[MESA_SHADER_FRAGMENT]);
+                                                         infos + MESA_SHADER_FRAGMENT,
+                                                         keep_executable_info, aco,
+                                                         &binaries[MESA_SHADER_FRAGMENT]);
  
                         radv_stop_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT], false);
                 }
  
                 /* TODO: These are no longer used as keys we should refactor this */
                 keys[MESA_SHADER_VERTEX].vs_common_out.export_prim_id =
-                       pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.prim_id_input;
+                       pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.prim_id_input;
                 keys[MESA_SHADER_VERTEX].vs_common_out.export_layer_id =
-                       pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.layer_input;
+                       pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.layer_input;
                 keys[MESA_SHADER_VERTEX].vs_common_out.export_clip_dists =
-                       !!pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.num_input_clips_culls;
+                       !!pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.num_input_clips_culls;
                 keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_prim_id =
-                       pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.prim_id_input;
+                       pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.prim_id_input;
                 keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_layer_id =
-                       pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.layer_input;
+                       pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.layer_input;
                 keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_clip_dists =
-                       !!pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.num_input_clips_culls;
+                       !!pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.num_input_clips_culls;
         }
  
         if (device->physical_device->rad_info.chip_class >= GFX9 && modules[MESA_SHADER_TESS_CTRL]) {
@@ -2608,14 +2746,14 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
  
                         pipeline->shaders[MESA_SHADER_TESS_CTRL] = radv_shader_variant_compile(device, modules[MESA_SHADER_TESS_CTRL], combined_nir, 2,
                                                                                               pipeline->layout,
-                                                                                             &key, keep_executable_info,
-                                                                                             &binaries[MESA_SHADER_TESS_CTRL]);
+                                                                                             &key, &infos[MESA_SHADER_TESS_CTRL], keep_executable_info,
+                                                                                             false, &binaries[MESA_SHADER_TESS_CTRL]);
  
                         radv_stop_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL], false);
                 }
                 modules[MESA_SHADER_VERTEX] = NULL;
                 keys[MESA_SHADER_TESS_EVAL].tes.num_patches = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_patches;
-               keys[MESA_SHADER_TESS_EVAL].tes.tcs_num_outputs = util_last_bit64(pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.info.tcs.outputs_written);
+               keys[MESA_SHADER_TESS_EVAL].tes.tcs_num_outputs = util_last_bit64(pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.outputs_written);
         }
  
         if (device->physical_device->rad_info.chip_class >= GFX9 && modules[MESA_SHADER_GEOMETRY]) {
@@ -2627,8 +2765,8 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
  
                         pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_compile(device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2,
                                                                                              pipeline->layout,
-                                                                                            &keys[pre_stage], keep_executable_info,
-                                                                                            &binaries[MESA_SHADER_GEOMETRY]);
+                                                                                            &keys[pre_stage], &infos[MESA_SHADER_GEOMETRY], keep_executable_info,
+                                                                                            false, &binaries[MESA_SHADER_GEOMETRY]);
  
                         radv_stop_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY], false);
                 }
@@ -2638,19 +2776,20 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
         for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
                 if(modules[i] && !pipeline->shaders[i]) {
                         if (i == MESA_SHADER_TESS_CTRL) {
-                               keys[MESA_SHADER_TESS_CTRL].tcs.num_inputs = util_last_bit64(pipeline->shaders[MESA_SHADER_VERTEX]->info.info.vs.ls_outputs_written);
+                               keys[MESA_SHADER_TESS_CTRL].tcs.num_inputs = util_last_bit64(pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.ls_outputs_written);
                         }
                         if (i == MESA_SHADER_TESS_EVAL) {
                                 keys[MESA_SHADER_TESS_EVAL].tes.num_patches = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_patches;
-                               keys[MESA_SHADER_TESS_EVAL].tes.tcs_num_outputs = util_last_bit64(pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.info.tcs.outputs_written);
+                               keys[MESA_SHADER_TESS_EVAL].tes.tcs_num_outputs = util_last_bit64(pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.outputs_written);
                         }
  
                         radv_start_feedback(stage_feedbacks[i]);
  
+                       bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
                         pipeline->shaders[i] = radv_shader_variant_compile(device, modules[i], &nir[i], 1,
                                                                           pipeline->layout,
-                                                                         keys + i, keep_executable_info,
-                                                                         &binaries[i]);
+                                                                         keys + i, infos + i,keep_executable_info,
+                                                                         aco, &binaries[i]);
  
                         radv_stop_feedback(stage_feedbacks[i], false);
                 }
@@ -2660,9 +2799,19 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                 struct radv_shader_binary *gs_copy_binary = NULL;
                 if (!pipeline->gs_copy_shader &&
                     !radv_pipeline_has_ngg(pipeline)) {
+                       struct radv_shader_info info = {};
+                       struct radv_shader_variant_key key = {};
+
+                       key.has_multiview_view_index =
+                               keys[MESA_SHADER_GEOMETRY].has_multiview_view_index;
+
+                       radv_nir_shader_info_pass(nir[MESA_SHADER_GEOMETRY],
+                                                 pipeline->layout, &key,
+                                                 &info);
+
                         pipeline->gs_copy_shader = radv_create_gs_copy_shader(
-                                       device, nir[MESA_SHADER_GEOMETRY], &gs_copy_binary,
-                                       keep_executable_info,
+                                       device, nir[MESA_SHADER_GEOMETRY], &info,
+                                       &gs_copy_binary, keep_executable_info,
                                         keys[MESA_SHADER_GEOMETRY].has_multiview_view_index);
                 }
  
@@ -3044,20 +3193,6 @@ radv_gfx10_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipe
         struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
         VkExtent2D extent = {512, 512};
  
-       unsigned sdp_interface_count;
-
-       switch(pipeline->device->physical_device->rad_info.family) {
-       case CHIP_NAVI10:
-       case CHIP_NAVI12:
-               sdp_interface_count = 16;
-               break;
-       case CHIP_NAVI14:
-               sdp_interface_count = 8;
-               break;
-       default:
-               unreachable("Unhandled GFX10 chip");
-       }
-
         const unsigned db_tag_size = 64;
         const unsigned db_tag_count = 312;
         const unsigned color_tag_size = 1024;
@@ -3066,7 +3201,7 @@ radv_gfx10_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipe
         const unsigned fmask_tag_count = 44;
  
         const unsigned rb_count = pipeline->device->physical_device->rad_info.num_render_backends;
-       const unsigned pipe_count = MAX2(rb_count, sdp_interface_count);
+       const unsigned pipe_count = MAX2(rb_count, pipeline->device->physical_device->rad_info.num_sdp_interfaces);
  
         const unsigned db_tag_part = (db_tag_count * rb_count / pipe_count) * db_tag_size * pipe_count;
         const unsigned color_tag_part = (color_tag_count * rb_count / pipe_count) * color_tag_size * pipe_count;
@@ -3091,6 +3226,7 @@ radv_gfx10_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipe
                         color_bytes_per_pixel += vk_format_get_blocksize(format);
  
                         if (total_samples > 1) {
+                               assert(samples_log <= 3);
                                 const unsigned fmask_array[] = {0, 1, 1, 4};
                                 fmask_bytes_per_pixel += fmask_array[samples_log];
                         }
@@ -3188,7 +3324,8 @@ radv_pipeline_generate_disabled_binning_state(struct radeon_cmdbuf *ctx_cs,
  static void
  radv_pipeline_generate_binning_state(struct radeon_cmdbuf *ctx_cs,
                                      struct radv_pipeline *pipeline,
-                                    const VkGraphicsPipelineCreateInfo *pCreateInfo)
+                                    const VkGraphicsPipelineCreateInfo *pCreateInfo,
+                                    const struct radv_blend_state *blend)
  {
         if (pipeline->device->physical_device->rad_info.chip_class < GFX9)
                 return;
@@ -3218,6 +3355,19 @@ radv_pipeline_generate_binning_state(struct radeon_cmdbuf *ctx_cs,
                         fpovs_per_batch = 63;
                 }
  
+               bool disable_start_of_prim = true;
+               uint32_t db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
+
+               const struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
+
+               if (pipeline->device->dfsm_allowed && ps &&
+                   !ps->info.ps.can_discard &&
+                   !ps->info.ps.writes_memory &&
+                   blend->cb_target_enabled_4bit) {
+                       db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_AUTO);
+                       disable_start_of_prim = (blend->blend_enable_4bit & blend->cb_target_enabled_4bit) != 0;
+               }
+
                 const uint32_t pa_sc_binner_cntl_0 =
                         S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) |
                         S_028C44_BIN_SIZE_X(bin_size.width == 16) |
@@ -3226,12 +3376,10 @@ radv_pipeline_generate_binning_state(struct radeon_cmdbuf *ctx_cs,
                         S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) |
                         S_028C44_CONTEXT_STATES_PER_BIN(context_states_per_bin - 1) |
                         S_028C44_PERSISTENT_STATES_PER_BIN(persistent_states_per_bin - 1) |
-                       S_028C44_DISABLE_START_OF_PRIM(1) |
+                       S_028C44_DISABLE_START_OF_PRIM(disable_start_of_prim) |
                         S_028C44_FPOVS_PER_BATCH(fpovs_per_batch) |
                         S_028C44_OPTIMAL_BIN_SELECTION(1);
  
-               uint32_t db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
-
                 pipeline->graphics.binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
                 pipeline->graphics.binning.db_dfsm_control = db_dfsm_control;
         } else
@@ -3248,6 +3396,7 @@ radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs,
         const VkPipelineDepthStencilStateCreateInfo *vkds = pCreateInfo->pDepthStencilState;
         RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
         struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+       struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
         struct radv_render_pass_attachment *attachment = NULL;
         uint32_t db_depth_control = 0, db_stencil_control = 0;
         uint32_t db_render_control = 0, db_render_override2 = 0;
@@ -3296,7 +3445,8 @@ radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs,
         db_render_override |= S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
                               S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
  
-       if (!pCreateInfo->pRasterizationState->depthClampEnable) {
+       if (!pCreateInfo->pRasterizationState->depthClampEnable &&
+           ps->info.ps.writes_z) {
                 /* From VK_EXT_depth_range_unrestricted spec:
                  *
                  * "The behavior described in Primitive Clipping still applies.
@@ -3483,7 +3633,7 @@ radv_pipeline_generate_vgt_gs_mode(struct radeon_cmdbuf *ctx_cs,
  
                 vgt_gs_mode = ac_vgt_gs_mode(gs->info.gs.vertices_out,
                                              pipeline->device->physical_device->rad_info.chip_class);
-       } else if (outinfo->export_prim_id || vs->info.info.uses_prim_id) {
+       } else if (outinfo->export_prim_id || vs->info.uses_prim_id) {
                 vgt_gs_mode = S_028A40_MODE(V_028A40_GS_SCENARIO_A);
                 vgt_primitiveid_en |= S_028A84_PRIMITIVEID_EN(1);
         }
@@ -3601,14 +3751,14 @@ static void
  radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs,
                               struct radeon_cmdbuf *cs,
                               struct radv_pipeline *pipeline,
-                             struct radv_shader_variant *shader,
-                             const struct radv_ngg_state *ngg_state)
+                             struct radv_shader_variant *shader)
  {
         uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
         gl_shader_stage es_type =
                 radv_pipeline_has_tess(pipeline) ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
         struct radv_shader_variant *es =
                 es_type == MESA_SHADER_TESS_EVAL ? pipeline->shaders[MESA_SHADER_TESS_EVAL] : pipeline->shaders[MESA_SHADER_VERTEX];
+       const struct gfx10_ngg_info *ngg_state = &shader->info.ngg_info;
  
         radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
         radeon_emit(cs, va >> 8);
@@ -3626,7 +3776,7 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs,
                 outinfo->writes_layer ||
                 outinfo->writes_viewport_index;
         bool es_enable_prim_id = outinfo->export_prim_id ||
-                                (es && es->info.info.uses_prim_id);
+                                (es && es->info.uses_prim_id);
         bool break_wave_at_eoi = false;
         unsigned ge_cntl;
         unsigned nparams;
@@ -3635,7 +3785,7 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs,
                 struct radv_shader_variant *gs =
                         pipeline->shaders[MESA_SHADER_GEOMETRY];
  
-               if (es_enable_prim_id || (gs && gs->info.info.uses_prim_id))
+               if (es_enable_prim_id || (gs && gs->info.uses_prim_id))
                         break_wave_at_eoi = true;
         }
  
@@ -3778,8 +3928,7 @@ static void
  radv_pipeline_generate_vertex_shader(struct radeon_cmdbuf *ctx_cs,
                                      struct radeon_cmdbuf *cs,
                                      struct radv_pipeline *pipeline,
-                                    const struct radv_tessellation_state *tess,
-                                    const struct radv_ngg_state *ngg)
+                                    const struct radv_tessellation_state *tess)
  {
         struct radv_shader_variant *vs;
  
@@ -3793,7 +3942,7 @@ radv_pipeline_generate_vertex_shader(struct radeon_cmdbuf *ctx_cs,
         else if (vs->info.vs.as_es)
                 radv_pipeline_generate_hw_es(cs, pipeline, vs);
         else if (vs->info.is_ngg)
-               radv_pipeline_generate_hw_ngg(ctx_cs, cs, pipeline, vs, ngg);
+               radv_pipeline_generate_hw_ngg(ctx_cs, cs, pipeline, vs);
         else
                 radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, vs);
  }
@@ -3802,8 +3951,7 @@ static void
  radv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *ctx_cs,
                                     struct radeon_cmdbuf *cs,
                                     struct radv_pipeline *pipeline,
-                                   const struct radv_tessellation_state *tess,
-                                   const struct radv_ngg_state *ngg)
+                                   const struct radv_tessellation_state *tess)
  {
         if (!radv_pipeline_has_tess(pipeline))
                 return;
@@ -3815,7 +3963,7 @@ radv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *ctx_cs,
  
         if (tes) {
                 if (tes->info.is_ngg) {
-                       radv_pipeline_generate_hw_ngg(ctx_cs, cs, pipeline, tes, ngg);
+                       radv_pipeline_generate_hw_ngg(ctx_cs, cs, pipeline, tes);
                 } else if (tes->info.tes.as_es)
                         radv_pipeline_generate_hw_es(cs, pipeline, tes);
                 else
@@ -3847,9 +3995,9 @@ static void
  radv_pipeline_generate_hw_gs(struct radeon_cmdbuf *ctx_cs,
                              struct radeon_cmdbuf *cs,
                              struct radv_pipeline *pipeline,
-                            struct radv_shader_variant *gs,
-                            const struct radv_gs_state *gs_state)
+                            struct radv_shader_variant *gs)
  {
+       const struct gfx9_gs_info *gs_state = &gs->info.gs_ring_info;
         unsigned gs_max_out_vertices;
         uint8_t *num_components;
         uint8_t max_stream;
@@ -3857,8 +4005,8 @@ radv_pipeline_generate_hw_gs(struct radeon_cmdbuf *ctx_cs,
         uint64_t va;
  
         gs_max_out_vertices = gs->info.gs.vertices_out;
-       max_stream = gs->info.info.gs.max_stream;
-       num_components = gs->info.info.gs.num_stream_output_components;
+       max_stream = gs->info.gs.max_stream;
+       num_components = gs->info.gs.num_stream_output_components;
  
         offset = num_components[0] * gs_max_out_vertices;
  
@@ -3921,9 +4069,7 @@ radv_pipeline_generate_hw_gs(struct radeon_cmdbuf *ctx_cs,
  static void
  radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *ctx_cs,
                                        struct radeon_cmdbuf *cs,
-                                      struct radv_pipeline *pipeline,
-                                      const struct radv_gs_state *gs_state,
-                                      const struct radv_ngg_state *ngg_state)
+                                      struct radv_pipeline *pipeline)
  {
         struct radv_shader_variant *gs;
  
@@ -3932,9 +4078,9 @@ radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *ctx_cs,
                 return;
  
         if (gs->info.is_ngg)
-               radv_pipeline_generate_hw_ngg(ctx_cs, cs, pipeline, gs, ngg_state);
+               radv_pipeline_generate_hw_ngg(ctx_cs, cs, pipeline, gs);
         else
-               radv_pipeline_generate_hw_gs(ctx_cs, cs, pipeline, gs, gs_state);
+               radv_pipeline_generate_hw_gs(ctx_cs, cs, pipeline, gs);
  
         radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT,
                               gs->info.gs.vertices_out);
@@ -3972,7 +4118,7 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
  
         unsigned ps_offset = 0;
  
-       if (ps->info.info.ps.prim_id_input) {
+       if (ps->info.ps.prim_id_input) {
                 unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
                 if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
                         ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false);
@@ -3980,8 +4126,8 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
                 }
         }
  
-       if (ps->info.info.ps.layer_input ||
-           ps->info.info.needs_multiview_view_index) {
+       if (ps->info.ps.layer_input ||
+           ps->info.needs_multiview_view_index) {
                 unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
                 if (vs_offset != AC_EXP_PARAM_UNDEFINED)
                         ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false);
@@ -3990,14 +4136,14 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
                 ++ps_offset;
         }
  
-       if (ps->info.info.ps.has_pcoord) {
+       if (ps->info.ps.has_pcoord) {
                 unsigned val;
                 val = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20);
                 ps_input_cntl[ps_offset] = val;
                 ps_offset++;
         }
  
-       if (ps->info.info.ps.num_input_clips_culls) {
+       if (ps->info.ps.num_input_clips_culls) {
                 unsigned vs_offset;
  
                 vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
@@ -4008,17 +4154,17 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
  
                 vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1];
                 if (vs_offset != AC_EXP_PARAM_UNDEFINED &&
-                   ps->info.info.ps.num_input_clips_culls > 4) {
+                   ps->info.ps.num_input_clips_culls > 4) {
                         ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false);
                         ++ps_offset;
                 }
         }
  
-       for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {
+       for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.ps.input_mask; ++i) {
                 unsigned vs_offset;
                 bool flat_shade;
                 bool float16;
-               if (!(ps->info.fs.input_mask & (1u << i)))
+               if (!(ps->info.ps.input_mask & (1u << i)))
                         continue;
  
                 vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_VAR0 + i];
@@ -4028,8 +4174,8 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
                         continue;
                 }
  
-               flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset));
-               float16 = !!(ps->info.fs.float16_shaded_mask & (1u << ps_offset));
+               flat_shade = !!(ps->info.ps.flat_shaded_mask & (1u << ps_offset));
+               float16 = !!(ps->info.ps.float16_shaded_mask & (1u << ps_offset));
  
                 ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade, float16);
                 ++ps_offset;
@@ -4049,29 +4195,29 @@ radv_compute_db_shader_control(const struct radv_device *device,
                                 const struct radv_shader_variant *ps)
  {
         unsigned z_order;
-       if (ps->info.fs.early_fragment_test || !ps->info.info.ps.writes_memory)
+       if (ps->info.ps.early_fragment_test || !ps->info.ps.writes_memory)
                 z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
         else
                 z_order = V_02880C_LATE_Z;
  
         bool disable_rbplus = device->physical_device->rad_info.has_rbplus &&
-                             !device->physical_device->rbplus_allowed;
+                             !device->physical_device->rad_info.rbplus_allowed;
  
         /* It shouldn't be needed to export gl_SampleMask when MSAA is disabled
          * but this appears to break Project Cars (DXVK). See
          * https://bugs.freedesktop.org/show_bug.cgi?id=109401
          */
-       bool mask_export_enable = ps->info.info.ps.writes_sample_mask;
+       bool mask_export_enable = ps->info.ps.writes_sample_mask;
  
-       return  S_02880C_Z_EXPORT_ENABLE(ps->info.info.ps.writes_z) |
-               S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.info.ps.writes_stencil) |
-               S_02880C_KILL_ENABLE(!!ps->info.fs.can_discard) |
+       return  S_02880C_Z_EXPORT_ENABLE(ps->info.ps.writes_z) |
+               S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.ps.writes_stencil) |
+               S_02880C_KILL_ENABLE(!!ps->info.ps.can_discard) |
                 S_02880C_MASK_EXPORT_ENABLE(mask_export_enable) |
                 S_02880C_Z_ORDER(z_order) |
-               S_02880C_DEPTH_BEFORE_SHADER(ps->info.fs.early_fragment_test) |
-               S_02880C_PRE_SHADER_DEPTH_COVERAGE_ENABLE(ps->info.fs.post_depth_coverage) |
-               S_02880C_EXEC_ON_HIER_FAIL(ps->info.info.ps.writes_memory) |
-               S_02880C_EXEC_ON_NOOP(ps->info.info.ps.writes_memory) |
+               S_02880C_DEPTH_BEFORE_SHADER(ps->info.ps.early_fragment_test) |
+               S_02880C_PRE_SHADER_DEPTH_COVERAGE_ENABLE(ps->info.ps.post_depth_coverage) |
+               S_02880C_EXEC_ON_HIER_FAIL(ps->info.ps.writes_memory) |
+               S_02880C_EXEC_ON_NOOP(ps->info.ps.writes_memory) |
                 S_02880C_DUAL_QUAD_DISABLE(disable_rbplus);
  }
  
@@ -4104,15 +4250,15 @@ radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *ctx_cs,
                                ps->config.spi_ps_input_addr);
  
         radeon_set_context_reg(ctx_cs, R_0286D8_SPI_PS_IN_CONTROL,
-                              S_0286D8_NUM_INTERP(ps->info.fs.num_interp) |
-                              S_0286D8_PS_W32_EN(ps->info.info.wave_size == 32));
+                              S_0286D8_NUM_INTERP(ps->info.ps.num_interp) |
+                              S_0286D8_PS_W32_EN(ps->info.wave_size == 32));
  
         radeon_set_context_reg(ctx_cs, R_0286E0_SPI_BARYC_CNTL, pipeline->graphics.spi_baryc_cntl);
  
         radeon_set_context_reg(ctx_cs, R_028710_SPI_SHADER_Z_FORMAT,
-                              ac_get_spi_shader_z_format(ps->info.info.ps.writes_z,
-                                                         ps->info.info.ps.writes_stencil,
-                                                         ps->info.info.ps.writes_sample_mask));
+                              ac_get_spi_shader_z_format(ps->info.ps.writes_z,
+                                                         ps->info.ps.writes_stencil,
+                                                         ps->info.ps.writes_sample_mask));
  
         if (pipeline->device->dfsm_allowed) {
                 /* optimise this? */
@@ -4162,6 +4308,8 @@ radv_compute_vgt_shader_stages_en(const struct radv_pipeline *pipeline)
  
         if (radv_pipeline_has_ngg(pipeline)) {
                 stages |= S_028B54_PRIMGEN_EN(1);
+               if (pipeline->streamout_shader)
+                       stages |= S_028B54_NGG_WAVE_ID_EN(1);
         } else if (radv_pipeline_has_gs(pipeline)) {
                 stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
         }
@@ -4173,16 +4321,16 @@ radv_compute_vgt_shader_stages_en(const struct radv_pipeline *pipeline)
                 uint8_t hs_size = 64, gs_size = 64, vs_size = 64;
  
                 if (radv_pipeline_has_tess(pipeline))
-                       hs_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.info.wave_size;
+                       hs_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.wave_size;
  
                 if (pipeline->shaders[MESA_SHADER_GEOMETRY]) {
-                       vs_size = gs_size = pipeline->shaders[MESA_SHADER_GEOMETRY]->info.info.wave_size;
+                       vs_size = gs_size = pipeline->shaders[MESA_SHADER_GEOMETRY]->info.wave_size;
                         if (pipeline->gs_copy_shader)
-                               vs_size = pipeline->gs_copy_shader->info.info.wave_size;
+                               vs_size = pipeline->gs_copy_shader->info.wave_size;
                 } else if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
-                       vs_size = pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.info.wave_size;
+                       vs_size = pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.wave_size;
                 else if (pipeline->shaders[MESA_SHADER_VERTEX])
-                       vs_size = pipeline->shaders[MESA_SHADER_VERTEX]->info.info.wave_size;
+                       vs_size = pipeline->shaders[MESA_SHADER_VERTEX]->info.wave_size;
                 
                 if (radv_pipeline_has_ngg(pipeline))
                         gs_size = vs_size;
@@ -4230,8 +4378,7 @@ radv_compute_cliprect_rule(const VkGraphicsPipelineCreateInfo *pCreateInfo)
  static void
  gfx10_pipeline_generate_ge_cntl(struct radeon_cmdbuf *ctx_cs,
                                 struct radv_pipeline *pipeline,
-                               const struct radv_tessellation_state *tess,
-                               const struct radv_gs_state *gs_state)
+                               const struct radv_tessellation_state *tess)
  {
         bool break_wave_at_eoi = false;
         unsigned primgroup_size;
@@ -4241,6 +4388,8 @@ gfx10_pipeline_generate_ge_cntl(struct radeon_cmdbuf *ctx_cs,
                 primgroup_size = tess->num_patches; /* must be a multiple of NUM_PATCHES */
                 vertgroup_size = 0;
         } else if (radv_pipeline_has_gs(pipeline)) {
+               const struct gfx9_gs_info *gs_state =
+                       &pipeline->shaders[MESA_SHADER_GEOMETRY]->info.gs_ring_info;
                 unsigned vgt_gs_onchip_cntl = gs_state->vgt_gs_onchip_cntl;
                 primgroup_size = G_028A44_GS_PRIMS_PER_SUBGRP(vgt_gs_onchip_cntl);
                 vertgroup_size = G_028A44_ES_VERTS_PER_SUBGRP(vgt_gs_onchip_cntl);
@@ -4250,8 +4399,8 @@ gfx10_pipeline_generate_ge_cntl(struct radeon_cmdbuf *ctx_cs,
         }
  
         if (radv_pipeline_has_tess(pipeline)) {
-               if (pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.info.uses_prim_id ||
-                   radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.info.uses_prim_id)
+               if (pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id ||
+                   radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.uses_prim_id)
                         break_wave_at_eoi = true;
         }
  
@@ -4268,8 +4417,6 @@ radv_pipeline_generate_pm4(struct radv_pipeline *pipeline,
                             const struct radv_graphics_pipeline_create_info *extra,
                             const struct radv_blend_state *blend,
                             const struct radv_tessellation_state *tess,
-                           const struct radv_gs_state *gs,
-                           const struct radv_ngg_state *ngg,
                             unsigned prim, unsigned gs_out)
  {
         struct radeon_cmdbuf *ctx_cs = &pipeline->ctx_cs;
@@ -4285,16 +4432,16 @@ radv_pipeline_generate_pm4(struct radv_pipeline *pipeline,
         radv_pipeline_generate_raster_state(ctx_cs, pipeline, pCreateInfo);
         radv_pipeline_generate_multisample_state(ctx_cs, pipeline);
         radv_pipeline_generate_vgt_gs_mode(ctx_cs, pipeline);
-       radv_pipeline_generate_vertex_shader(ctx_cs, cs, pipeline, tess, ngg);
-       radv_pipeline_generate_tess_shaders(ctx_cs, cs, pipeline, tess, ngg);
-       radv_pipeline_generate_geometry_shader(ctx_cs, cs, pipeline, gs, ngg);
+       radv_pipeline_generate_vertex_shader(ctx_cs, cs, pipeline, tess);
+       radv_pipeline_generate_tess_shaders(ctx_cs, cs, pipeline, tess);
+       radv_pipeline_generate_geometry_shader(ctx_cs, cs, pipeline);
         radv_pipeline_generate_fragment_shader(ctx_cs, cs, pipeline);
         radv_pipeline_generate_ps_inputs(ctx_cs, pipeline);
         radv_pipeline_generate_vgt_vertex_reuse(ctx_cs, pipeline);
-       radv_pipeline_generate_binning_state(ctx_cs, pipeline, pCreateInfo);
+       radv_pipeline_generate_binning_state(ctx_cs, pipeline, pCreateInfo, blend);
  
         if (pipeline->device->physical_device->rad_info.chip_class >= GFX10 && !radv_pipeline_has_ngg(pipeline))
-               gfx10_pipeline_generate_ge_cntl(ctx_cs, pipeline, tess, gs);
+               gfx10_pipeline_generate_ge_cntl(ctx_cs, pipeline, tess);
  
         radeon_set_context_reg(ctx_cs, R_0286E8_SPI_TMPRING_SIZE,
                                S_0286E8_WAVES(pipeline->max_waves) |
@@ -4357,15 +4504,15 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline,
         }
  
         ia_multi_vgt_param.ia_switch_on_eoi = false;
-       if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.prim_id_input)
+       if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.prim_id_input)
                 ia_multi_vgt_param.ia_switch_on_eoi = true;
         if (radv_pipeline_has_gs(pipeline) &&
-           pipeline->shaders[MESA_SHADER_GEOMETRY]->info.info.uses_prim_id)
+           pipeline->shaders[MESA_SHADER_GEOMETRY]->info.uses_prim_id)
                 ia_multi_vgt_param.ia_switch_on_eoi = true;
         if (radv_pipeline_has_tess(pipeline)) {
                 /* SWITCH_ON_EOI must be set if PrimID is used. */
-               if (pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.info.uses_prim_id ||
-                   radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.info.uses_prim_id)
+               if (pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id ||
+                   radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.uses_prim_id)
                         ia_multi_vgt_param.ia_switch_on_eoi = true;
         }
  
@@ -4468,7 +4615,7 @@ radv_pipeline_get_streamout_shader(struct radv_pipeline *pipeline)
                 struct radv_shader_variant *shader =
                         radv_get_shader(pipeline, i);
  
-               if (shader && shader->info.info.so.num_outputs > 0)
+               if (shader && shader->info.so.num_outputs > 0)
                         return shader;
         }
  
@@ -4512,7 +4659,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
         }
  
         struct radv_pipeline_key key = radv_generate_graphics_pipeline_key(pipeline, pCreateInfo, &blend, has_view_index);
-       radv_create_shaders(pipeline, device, cache, &key, pStages, pCreateInfo->flags, pipeline_feedback, stage_feedbacks);
+       radv_create_shaders(pipeline, device, cache, &key, pStages, pCreateInfo->flags, pCreateInfo, pipeline_feedback, stage_feedbacks);
  
         pipeline->graphics.spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
         radv_pipeline_init_multisample_state(pipeline, &blend, pCreateInfo);
@@ -4563,11 +4710,11 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
          */
         struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
         if ((pipeline->device->physical_device->rad_info.chip_class <= GFX9 ||
-            ps->info.fs.can_discard) &&
+            ps->info.ps.can_discard) &&
             !blend.spi_shader_col_format) {
-               if (!ps->info.info.ps.writes_z &&
-                   !ps->info.info.ps.writes_stencil &&
-                   !ps->info.info.ps.writes_sample_mask)
+               if (!ps->info.ps.writes_z &&
+                   !ps->info.ps.writes_stencil &&
+                   !ps->info.ps.writes_sample_mask)
                         blend.spi_shader_col_format = V_028714_SPI_SHADER_32_R;
         }
  
@@ -4577,14 +4724,11 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
                 }
         }
  
-       struct radv_ngg_state ngg = {0};
-       struct radv_gs_state gs = {0};
+       if (radv_pipeline_has_gs(pipeline) && !radv_pipeline_has_ngg(pipeline)) {
+               struct radv_shader_variant *gs =
+                       pipeline->shaders[MESA_SHADER_GEOMETRY];
  
-       if (radv_pipeline_has_ngg(pipeline)) {
-               ngg = calculate_ngg_info(pCreateInfo, pipeline);
-       } else if (radv_pipeline_has_gs(pipeline)) {
-               gs = calculate_gs_info(pCreateInfo, pipeline);
-               calculate_gs_ring_sizes(pipeline, &gs);
+               calculate_gs_ring_sizes(pipeline, &gs->info.gs_ring_info);
         }
  
         struct radv_tessellation_state tess = {0};
@@ -4608,7 +4752,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
         if (loc->sgpr_idx != -1) {
                 pipeline->graphics.vtx_base_sgpr = pipeline->user_data_0[MESA_SHADER_VERTEX];
                 pipeline->graphics.vtx_base_sgpr += loc->sgpr_idx * 4;
-               if (radv_get_shader(pipeline, MESA_SHADER_VERTEX)->info.info.vs.needs_draw_id)
+               if (radv_get_shader(pipeline, MESA_SHADER_VERTEX)->info.vs.needs_draw_id)
                         pipeline->graphics.vtx_emit_num = 3;
                 else
                         pipeline->graphics.vtx_emit_num = 2;
@@ -4618,7 +4762,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
         pipeline->streamout_shader = radv_pipeline_get_streamout_shader(pipeline);
  
         result = radv_pipeline_scratch_init(device, pipeline);
-       radv_pipeline_generate_pm4(pipeline, pCreateInfo, extra, &blend, &tess, &gs, &ngg, prim, gs_out);
+       radv_pipeline_generate_pm4(pipeline, pCreateInfo, extra, &blend, &tess, prim, gs_out);
  
         return result;
  }
@@ -4692,8 +4836,8 @@ radv_compute_generate_pm4(struct radv_pipeline *pipeline)
         unsigned max_waves_per_sh = 0;
         uint64_t va;
  
-       pipeline->cs.buf = malloc(20 * 4);
-       pipeline->cs.max_dw = 20;
+       pipeline->cs.max_dw = device->physical_device->rad_info.chip_class >= GFX10 ? 22 : 20;
+       pipeline->cs.buf = malloc(pipeline->cs.max_dw * 4);
  
         compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
         va = radv_buffer_get_va(compute_shader->bo) + compute_shader->bo_offset;
@@ -4705,6 +4849,9 @@ radv_compute_generate_pm4(struct radv_pipeline *pipeline)
         radeon_set_sh_reg_seq(&pipeline->cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
         radeon_emit(&pipeline->cs, compute_shader->config.rsrc1);
         radeon_emit(&pipeline->cs, compute_shader->config.rsrc2);
+       if (device->physical_device->rad_info.chip_class >= GFX10) {
+               radeon_set_sh_reg(&pipeline->cs, R_00B8A0_COMPUTE_PGM_RSRC3, compute_shader->config.rsrc3);
+       }
  
         radeon_set_sh_reg(&pipeline->cs, R_00B860_COMPUTE_TMPRING_SIZE,
                           S_00B860_WAVES(pipeline->max_waves) |
@@ -4770,7 +4917,7 @@ static VkResult radv_compute_pipeline_create(
                 stage_feedbacks[MESA_SHADER_COMPUTE] = &creation_feedback->pPipelineStageCreationFeedbacks[0];
  
         pStages[MESA_SHADER_COMPUTE] = &pCreateInfo->stage;
-       radv_create_shaders(pipeline, device, cache, &(struct radv_pipeline_key) {0}, pStages, pCreateInfo->flags, pipeline_feedback, stage_feedbacks);
+       radv_create_shaders(pipeline, device, cache, &(struct radv_pipeline_key) {0}, pStages, pCreateInfo->flags, NULL, pipeline_feedback, stage_feedbacks);
  
         pipeline->user_data_0[MESA_SHADER_COMPUTE] = radv_pipeline_stage_to_user_data_0(pipeline, MESA_SHADER_COMPUTE, device->physical_device->rad_info.chip_class);
         pipeline->need_indirect_descriptor_sets |= pipeline->shaders[MESA_SHADER_COMPUTE]->info.need_indirect_descriptor_sets;
@@ -4817,8 +4964,15 @@ static uint32_t radv_get_executable_count(const struct radv_pipeline *pipeline)
  {
         uint32_t ret = 0;
         for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
-               if (pipeline->shaders[i])
-                       ret += i == MESA_SHADER_GEOMETRY ? 2u : 1u;
+               if (!pipeline->shaders[i])
+                       continue;
+
+               if (i == MESA_SHADER_GEOMETRY &&
+                   !radv_pipeline_has_ngg(pipeline)) {
+                       ret += 2u;
+               } else {
+                       ret += 1u;
+               }
                 
         }
         return ret;
@@ -4837,7 +4991,8 @@ radv_get_shader_from_executable_index(const struct radv_pipeline *pipeline, int
  
                 --index;
  
-               if (i == MESA_SHADER_GEOMETRY) {
+               if (i == MESA_SHADER_GEOMETRY &&
+                   !radv_pipeline_has_ngg(pipeline)) {
                         if (!index) {
                                 *stage = i;
                                 return pipeline->gs_copy_shader;
@@ -4928,7 +5083,8 @@ VkResult radv_GetPipelineExecutablePropertiesKHR(
                 desc_copy(pProperties[executable_idx].description, description);
  
                 ++executable_idx;
-               if (i == MESA_SHADER_GEOMETRY) {
+               if (i == MESA_SHADER_GEOMETRY &&
+                   !radv_pipeline_has_ngg(pipeline)) {
                         assert(pipeline->gs_copy_shader);
                         if (executable_idx >= count)
                                 break;
@@ -5013,7 +5169,7 @@ VkResult radv_GetPipelineExecutableStatisticsKHR(
                 desc_copy(s->name, "Code size");
                 desc_copy(s->description, "Code size in bytes");
                 s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
-               s->value.u64 = shader->code_size;
+               s->value.u64 = shader->exec_size;
         }
         ++s;
  
@@ -5093,12 +5249,17 @@ VkResult radv_GetPipelineExecutableInternalRepresentationsKHR(
         }
         ++p;
  
-       /* LLVM IR */
+       /* backend IR */
         if (p < end) {
                 p->isText = true;
-               desc_copy(p->name, "LLVM IR");
-               desc_copy(p->description, "The LLVM IR after some optimizations");
-               if (radv_copy_representation(p->pData, &p->dataSize, shader->llvm_ir_string) != VK_SUCCESS)
+               if (shader->aco_used) {
+                       desc_copy(p->name, "ACO IR");
+                       desc_copy(p->description, "The ACO IR after some optimizations");
+               } else {
+                       desc_copy(p->name, "LLVM IR");
+                       desc_copy(p->description, "The LLVM IR after some optimizations");
+               }
+               if (radv_copy_representation(p->pData, &p->dataSize, shader->ir_string) != VK_SUCCESS)
                         result = VK_INCOMPLETE;
         }
         ++p;