radeonsi: remove redundant GS variables in si_shader_selector

[mesa.git] / src / gallium / drivers / radeonsi / gfx10_shader_ngg.c
diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c

index efeb9e8838c2f0aea814bce11bfa12aff8029b02..9679508e5dfb4d30d49836499155884bcc42fbbb 100644 (file)
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -72,7 +72,7 @@ static LLVMValueRef ngg_get_query_buf(struct si_shader_context *ctx)
  
  static LLVMValueRef ngg_get_initial_edgeflag(struct si_shader_context *ctx, unsigned index)
  {
-   if (ctx->type == PIPE_SHADER_VERTEX) {
+   if (ctx->stage == MESA_SHADER_VERTEX) {
        LLVMValueRef tmp;
        tmp = LLVMBuildLShr(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args.gs_invocation_id),
                            LLVMConstInt(ctx->ac.i32, 8 + index, false), "");
@@ -89,8 +89,8 @@ static LLVMValueRef ngg_get_vertices_per_prim(struct si_shader_context *ctx, uns
  {
     const struct si_shader_info *info = &ctx->shader->selector->info;
  
-   if (ctx->type == PIPE_SHADER_VERTEX) {
-      if (info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD]) {
+   if (ctx->stage == MESA_SHADER_VERTEX) {
+      if (info->base.vs.blit_sgprs_amd) {
           /* Blits always use axis-aligned rectangles with 3 vertices. */
           *num_vertices = 3;
           return LLVMConstInt(ctx->ac.i32, 3, 0);
@@ -107,11 +107,11 @@ static LLVMValueRef ngg_get_vertices_per_prim(struct si_shader_context *ctx, uns
           return LLVMBuildAdd(ctx->ac.builder, num, ctx->ac.i32_1, "");
        }
     } else {
-      assert(ctx->type == PIPE_SHADER_TESS_EVAL);
+      assert(ctx->stage == MESA_SHADER_TESS_EVAL);
  
-      if (info->properties[TGSI_PROPERTY_TES_POINT_MODE])
+      if (info->base.tess.point_mode)
           *num_vertices = 1;
-      else if (info->properties[TGSI_PROPERTY_TES_PRIM_MODE] == PIPE_PRIM_LINES)
+      else if (info->base.tess.primitive_mode == GL_LINES)
           *num_vertices = 2;
        else
           *num_vertices = 3;
@@ -126,7 +126,7 @@ bool gfx10_ngg_export_prim_early(struct si_shader *shader)
  
     assert(shader->key.as_ngg && !shader->key.as_es);
  
-   return sel->type != PIPE_SHADER_GEOMETRY && !sel->info.writes_edgeflag;
+   return sel->info.stage != MESA_SHADER_GEOMETRY && !sel->info.writes_edgeflag;
  }
  
  void gfx10_ngg_build_sendmsg_gs_alloc_req(struct si_shader_context *ctx)
@@ -232,8 +232,7 @@ static void build_streamout_vertex(struct si_shader_context *ctx, LLVMValueRef *
  
        unsigned reg = so->output[i].register_index;
        struct si_shader_output_values out;
-      out.semantic_name = info->output_semantic_name[reg];
-      out.semantic_index = info->output_semantic_index[reg];
+      out.semantic = info->output_semantic[reg];
  
        for (unsigned comp = 0; comp < 4; comp++) {
           tmp = ac_build_gep0(&ctx->ac, vertexptr, LLVMConstInt(ctx->ac.i32, 4 * reg + comp, false));
@@ -282,7 +281,7 @@ static void build_streamout(struct si_shader_context *ctx, struct ngg_streamout
     LLVMValueRef prim_stride_dw_vgpr = LLVMGetUndef(ctx->ac.i32);
     int stream_for_buffer[4] = {-1, -1, -1, -1};
     unsigned bufmask_for_stream[4] = {};
-   bool isgs = ctx->type == PIPE_SHADER_GEOMETRY;
+   bool isgs = ctx->stage == MESA_SHADER_GEOMETRY;
     unsigned scratch_emit_base = isgs ? 4 : 0;
     LLVMValueRef scratch_emit_basev = isgs ? i32_4 : ctx->ac.i32_0;
     unsigned scratch_offset_base = isgs ? 8 : 4;
@@ -600,15 +599,15 @@ static unsigned ngg_nogs_vertex_size(struct si_shader *shader)
      * to the ES thread of the provoking vertex. All ES threads
      * load and export PrimitiveID for their thread.
      */
-   if (shader->selector->type == PIPE_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id)
+   if (shader->selector->info.stage == MESA_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id)
        lds_vertex_size = MAX2(lds_vertex_size, 1);
  
     if (shader->key.opt.ngg_culling) {
-      if (shader->selector->type == PIPE_SHADER_VERTEX) {
+      if (shader->selector->info.stage == MESA_SHADER_VERTEX) {
           STATIC_ASSERT(lds_instance_id + 1 == 9);
           lds_vertex_size = MAX2(lds_vertex_size, 9);
        } else {
-         assert(shader->selector->type == PIPE_SHADER_TESS_EVAL);
+         assert(shader->selector->info.stage == MESA_SHADER_TESS_EVAL);
  
           if (shader->selector->info.uses_primid || shader->key.mono.u.vs_export_prim_id) {
              STATIC_ASSERT(lds_tes_patch_id + 2 == 11);
@@ -732,13 +731,13 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out
  
     assert(shader->key.opt.ngg_culling);
     assert(shader->key.as_ngg);
-   assert(sel->type == PIPE_SHADER_VERTEX ||
-          (sel->type == PIPE_SHADER_TESS_EVAL && !shader->key.as_es));
+   assert(sel->info.stage == MESA_SHADER_VERTEX ||
+          (sel->info.stage == MESA_SHADER_TESS_EVAL && !shader->key.as_es));
  
     LLVMValueRef position[4] = {};
     for (unsigned i = 0; i < info->num_outputs; i++) {
-      switch (info->output_semantic_name[i]) {
-      case TGSI_SEMANTIC_POSITION:
+      switch (info->output_semantic[i]) {
+      case VARYING_SLOT_POS:
           for (unsigned j = 0; j < 4; j++) {
              position[j] = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + j], "");
           }
@@ -770,7 +769,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out
     bool uses_tes_prim_id = false;
     LLVMValueRef packed_data = ctx->ac.i32_0;
  
-   if (ctx->type == PIPE_SHADER_VERTEX) {
+   if (ctx->stage == MESA_SHADER_VERTEX) {
        uses_instance_id = sel->info.uses_instanceid ||
                           shader->key.part.vs.prolog.instance_divisor_is_one ||
                           shader->key.part.vs.prolog.instance_divisor_is_fetched;
@@ -786,7 +785,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out
     } else {
        uses_tes_prim_id = sel->info.uses_primid || shader->key.mono.u.vs_export_prim_id;
  
-      assert(ctx->type == PIPE_SHADER_TESS_EVAL);
+      assert(ctx->stage == MESA_SHADER_TESS_EVAL);
        LLVMBuildStore(builder, ac_to_integer(&ctx->ac, ac_get_arg(&ctx->ac, ctx->tes_u)),
                       ac_build_gep0(&ctx->ac, es_vtxptr, LLVMConstInt(ctx->ac.i32, lds_tes_u, 0)));
        LLVMBuildStore(builder, ac_to_integer(&ctx->ac, ac_get_arg(&ctx->ac, ctx->tes_v)),
@@ -1077,7 +1076,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out
           LLVMBuildStore(builder, tmp, es_data[i]);
        }
  
-      if (ctx->type == PIPE_SHADER_TESS_EVAL) {
+      if (ctx->stage == MESA_SHADER_TESS_EVAL) {
           tmp = LLVMBuildLoad(builder,
                               si_build_gep_i8(ctx, old_es_vtxptr, lds_byte2_tes_rel_patch_id), "");
           tmp = LLVMBuildZExt(builder, tmp, ctx->ac.i32, "");
@@ -1100,7 +1099,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out
  
     ret = LLVMBuildInsertValue(ctx->ac.builder, ret, new_gs_tg_info, 2, "");
     ret = LLVMBuildInsertValue(ctx->ac.builder, ret, new_merged_wave_info, 3, "");
-   if (ctx->type == PIPE_SHADER_TESS_EVAL)
+   if (ctx->stage == MESA_SHADER_TESS_EVAL)
        ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, 4);
  
     ret = si_insert_input_ptr(ctx, ret, ctx->rw_buffers, 8 + SI_SGPR_RW_BUFFERS);
@@ -1111,7 +1110,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out
     ret = si_insert_input_ptr(ctx, ret, ctx->samplers_and_images, 8 + SI_SGPR_SAMPLERS_AND_IMAGES);
     ret = si_insert_input_ptr(ctx, ret, ctx->vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS);
  
-   if (ctx->type == PIPE_SHADER_VERTEX) {
+   if (ctx->stage == MESA_SHADER_VERTEX) {
        ret = si_insert_input_ptr(ctx, ret, ctx->args.base_vertex, 8 + SI_SGPR_BASE_VERTEX);
        ret = si_insert_input_ptr(ctx, ret, ctx->args.start_instance, 8 + SI_SGPR_START_INSTANCE);
        ret = si_insert_input_ptr(ctx, ret, ctx->args.draw_id, 8 + SI_SGPR_DRAWID);
@@ -1122,13 +1121,13 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out
                                       8 + SI_SGPR_VS_VB_DESCRIPTOR_FIRST + i * 4);
        }
     } else {
-      assert(ctx->type == PIPE_SHADER_TESS_EVAL);
+      assert(ctx->stage == MESA_SHADER_TESS_EVAL);
        ret = si_insert_input_ptr(ctx, ret, ctx->tcs_offchip_layout, 8 + SI_SGPR_TES_OFFCHIP_LAYOUT);
        ret = si_insert_input_ptr(ctx, ret, ctx->tes_offchip_addr, 8 + SI_SGPR_TES_OFFCHIP_ADDR);
     }
  
     unsigned vgpr;
-   if (ctx->type == PIPE_SHADER_VERTEX) {
+   if (ctx->stage == MESA_SHADER_VERTEX) {
        if (shader->selector->num_vbos_in_user_sgprs) {
           vgpr = 8 + SI_SGPR_VS_VB_DESCRIPTOR_FIRST + shader->selector->num_vbos_in_user_sgprs * 4;
        } else {
@@ -1146,7 +1145,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out
     ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_invocation_id, vgpr++);
     vgpr++; /* gs_vtx45_offset */
  
-   if (ctx->type == PIPE_SHADER_VERTEX) {
+   if (ctx->stage == MESA_SHADER_VERTEX) {
        val = LLVMBuildLoad(builder, es_data[0], "");
        ret = LLVMBuildInsertValue(builder, ret, ac_to_float(&ctx->ac, val), vgpr++,
                                   ""); /* VGPR5 - VertexID */
@@ -1159,7 +1158,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out
           vgpr++;
        }
     } else {
-      assert(ctx->type == PIPE_SHADER_TESS_EVAL);
+      assert(ctx->stage == MESA_SHADER_TESS_EVAL);
        unsigned num_vgprs = uses_tes_prim_id ? 4 : 3;
        for (unsigned i = 0; i < num_vgprs; i++) {
           val = LLVMBuildLoad(builder, es_data[i], "");
@@ -1174,7 +1173,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out
  
     /* These two also use LDS. */
     if (sel->info.writes_edgeflag ||
-       (ctx->type == PIPE_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id))
+       (ctx->stage == MESA_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id))
        ac_build_s_barrier(&ctx->ac);
  
     ctx->return_value = ret;
@@ -1201,8 +1200,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LL
        vertex_ptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
  
     for (unsigned i = 0; i < info->num_outputs; i++) {
-      outputs[i].semantic_name = info->output_semantic_name[i];
-      outputs[i].semantic_index = info->output_semantic_index[i];
+      outputs[i].semantic = info->output_semantic[i];
  
        for (unsigned j = 0; j < 4; j++) {
           outputs[i].vertex_stream[j] = (info->output_streams[i] >> (2 * j)) & 3;
@@ -1219,7 +1217,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LL
        }
  
        /* Store the edgeflag at the end (if streamout is enabled) */
-      if (info->output_semantic_name[i] == TGSI_SEMANTIC_EDGEFLAG && sel->info.writes_edgeflag) {
+      if (info->output_semantic[i] == VARYING_SLOT_EDGE && sel->info.writes_edgeflag) {
           LLVMValueRef edgeflag = LLVMBuildLoad(builder, addrs[4 * i], "");
           /* The output is a float, but the hw expects a 1-bit integer. */
           edgeflag = LLVMBuildFPToUI(ctx->ac.builder, edgeflag, ctx->ac.i32, "");
@@ -1234,7 +1232,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LL
     bool unterminated_es_if_block =
        !sel->so.num_outputs && !sel->info.writes_edgeflag &&
        !ctx->screen->use_ngg_streamout && /* no query buffer */
-      (ctx->type != PIPE_SHADER_VERTEX || !ctx->shader->key.mono.u.vs_export_prim_id);
+      (ctx->stage != MESA_SHADER_VERTEX || !ctx->shader->key.mono.u.vs_export_prim_id);
  
     if (!unterminated_es_if_block)
        ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
@@ -1301,7 +1299,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LL
     /* Copy Primitive IDs from GS threads to the LDS address corresponding
      * to the ES thread of the provoking vertex.
      */
-   if (ctx->type == PIPE_SHADER_VERTEX && ctx->shader->key.mono.u.vs_export_prim_id) {
+   if (ctx->stage == MESA_SHADER_VERTEX && ctx->shader->key.mono.u.vs_export_prim_id) {
        assert(!unterminated_es_if_block);
  
        /* Streamout and edge flags use LDS. Make it idle, so that we can reuse it. */
@@ -1324,7 +1322,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LL
     }
  
     /* Update query buffer */
-   if (ctx->screen->use_ngg_streamout && !info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD]) {
+   if (ctx->screen->use_ngg_streamout && !info->base.vs.blit_sgprs_amd) {
        assert(!unterminated_es_if_block);
  
        tmp = si_unpack_param(ctx, ctx->vs_state_bits, 6, 1);
@@ -1377,7 +1375,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LL
            * use the position from the current shader part. Instead,
            * load it from LDS.
            */
-         if (info->output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
+         if (info->output_semantic[i] == VARYING_SLOT_POS &&
               ctx->shader->key.opt.ngg_culling) {
              vertex_ptr = ngg_nogs_vertex_ptr(ctx, ac_get_arg(&ctx->ac, ctx->ngg_old_thread_id));
  
@@ -1395,10 +1393,9 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LL
        }
  
        if (ctx->shader->key.mono.u.vs_export_prim_id) {
-         outputs[i].semantic_name = TGSI_SEMANTIC_PRIMID;
-         outputs[i].semantic_index = 0;
+         outputs[i].semantic = VARYING_SLOT_PRIMITIVE_ID;
  
-         if (ctx->type == PIPE_SHADER_VERTEX) {
+         if (ctx->stage == MESA_SHADER_VERTEX) {
              /* Wait for GS stores to finish. */
              ac_build_s_barrier(&ctx->ac);
  
@@ -1406,7 +1403,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LL
              tmp = ac_build_gep0(&ctx->ac, tmp, ctx->ac.i32_0);
              outputs[i].values[0] = LLVMBuildLoad(builder, tmp, "");
           } else {
-            assert(ctx->type == PIPE_SHADER_TESS_EVAL);
+            assert(ctx->stage == MESA_SHADER_TESS_EVAL);
              outputs[i].values[0] = si_get_primitive_id(ctx, 0);
           }
  
@@ -1442,7 +1439,7 @@ static LLVMValueRef ngg_gs_get_vertex_storage(struct si_shader_context *ctx)
   * is in emit order; that is:
   * - during the epilogue, N is the threadidx (relative to the entire threadgroup)
   * - during vertex emit, i.e. while the API GS shader invocation is running,
- *   N = threadidx * gs_max_out_vertices + emitidx
+ *   N = threadidx * gs.vertices_out + emitidx
   *
   * Goals of the LDS memory layout:
   * 1. Eliminate bank conflicts on write for geometry shaders that have all emits
@@ -1461,7 +1458,7 @@ static LLVMValueRef ngg_gs_get_vertex_storage(struct si_shader_context *ctx)
   *
   * Swizzling is required to satisfy points 1 and 2 simultaneously.
   *
- * Vertices are stored in export order (gsthread * gs_max_out_vertices + emitidx).
+ * Vertices are stored in export order (gsthread * gs.vertices_out + emitidx).
   * Indices are swizzled in groups of 32, which ensures point 1 without
   * disturbing point 2.
   *
@@ -1473,8 +1470,8 @@ static LLVMValueRef ngg_gs_vertex_ptr(struct si_shader_context *ctx, LLVMValueRe
     LLVMBuilderRef builder = ctx->ac.builder;
     LLVMValueRef storage = ngg_gs_get_vertex_storage(ctx);
  
-   /* gs_max_out_vertices = 2^(write_stride_2exp) * some odd number */
-   unsigned write_stride_2exp = ffs(sel->gs_max_out_vertices) - 1;
+   /* gs.vertices_out = 2^(write_stride_2exp) * some odd number */
+   unsigned write_stride_2exp = ffs(sel->info.base.gs.vertices_out) - 1;
     if (write_stride_2exp) {
        LLVMValueRef row = LLVMBuildLShr(builder, vertexidx, LLVMConstInt(ctx->ac.i32, 5, false), "");
        LLVMValueRef swizzle = LLVMBuildAnd(
@@ -1492,7 +1489,7 @@ static LLVMValueRef ngg_gs_emit_vertex_ptr(struct si_shader_context *ctx, LLVMVa
     LLVMBuilderRef builder = ctx->ac.builder;
     LLVMValueRef tmp;
  
-   tmp = LLVMConstInt(ctx->ac.i32, sel->gs_max_out_vertices, false);
+   tmp = LLVMConstInt(ctx->ac.i32, sel->info.base.gs.vertices_out, false);
     tmp = LLVMBuildMul(builder, tmp, gsthread, "");
     const LLVMValueRef vertexidx = LLVMBuildAdd(builder, tmp, emitidx, "");
     return ngg_gs_vertex_ptr(ctx, vertexidx);
@@ -1534,7 +1531,7 @@ void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream, LL
      */
     const LLVMValueRef can_emit =
        LLVMBuildICmp(builder, LLVMIntULT, vertexidx,
-                    LLVMConstInt(ctx->ac.i32, sel->gs_max_out_vertices, false), "");
+                    LLVMConstInt(ctx->ac.i32, sel->info.base.gs.vertices_out, false), "");
  
     tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, "");
     tmp = LLVMBuildSelect(builder, can_emit, tmp, vertexidx, "");
@@ -1560,7 +1557,7 @@ void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream, LL
     /* Determine and store whether this vertex completed a primitive. */
     const LLVMValueRef curverts = LLVMBuildLoad(builder, ctx->gs_curprim_verts[stream], "");
  
-   tmp = LLVMConstInt(ctx->ac.i32, u_vertices_per_prim(sel->gs_output_prim) - 1, false);
+   tmp = LLVMConstInt(ctx->ac.i32, u_vertices_per_prim(sel->info.base.gs.output_primitive) - 1, false);
     const LLVMValueRef iscompleteprim = LLVMBuildICmp(builder, LLVMIntUGE, curverts, tmp, "");
  
     /* Since the geometry shader emits triangle strips, we need to
@@ -1568,7 +1565,7 @@ void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream, LL
      * the correct vertex order.
      */
     LLVMValueRef is_odd = ctx->ac.i1false;
-   if (stream == 0 && u_vertices_per_prim(sel->gs_output_prim) == 3) {
+   if (stream == 0 && u_vertices_per_prim(sel->info.base.gs.output_primitive) == 3) {
        tmp = LLVMBuildAnd(builder, curverts, ctx->ac.i32_1, "");
        is_odd = LLVMBuildICmp(builder, LLVMIntEQ, tmp, ctx->ac.i32_1, "");
     }
@@ -1618,7 +1615,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx)
  {
     const struct si_shader_selector *sel = ctx->shader->selector;
     const struct si_shader_info *info = &sel->info;
-   const unsigned verts_per_prim = u_vertices_per_prim(sel->gs_output_prim);
+   const unsigned verts_per_prim = u_vertices_per_prim(sel->info.base.gs.output_primitive);
     LLVMBuilderRef builder = ctx->ac.builder;
     LLVMValueRef i8_0 = LLVMConstInt(ctx->ac.i8, 0, false);
     LLVMValueRef tmp, tmp2;
@@ -1640,7 +1637,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx)
  
        const LLVMValueRef vertexidx = LLVMBuildLoad(builder, ctx->gs_next_vertex[stream], "");
        tmp = LLVMBuildICmp(builder, LLVMIntUGE, vertexidx,
-                          LLVMConstInt(ctx->ac.i32, sel->gs_max_out_vertices, false), "");
+                          LLVMConstInt(ctx->ac.i32, sel->info.base.gs.vertices_out, false), "");
        ac_build_ifcc(&ctx->ac, tmp, 5101);
        ac_build_break(&ctx->ac);
        ac_build_endif(&ctx->ac, 5101);
@@ -1862,8 +1859,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx)
  
        unsigned out_idx = 0;
        for (unsigned i = 0; i < info->num_outputs; i++) {
-         outputs[i].semantic_name = info->output_semantic_name[i];
-         outputs[i].semantic_index = info->output_semantic_index[i];
+         outputs[i].semantic = info->output_semantic[i];
  
           for (unsigned j = 0; j < 4; j++, out_idx++) {
              tmp = ngg_gs_get_emit_output_ptr(ctx, vertexptr, out_idx);
@@ -1887,6 +1883,16 @@ static void clamp_gsprims_to_esverts(unsigned *max_gsprims, unsigned max_esverts
     *max_gsprims = MIN2(*max_gsprims, 1 + max_reuse);
  }
  
+unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader)
+{
+   const struct si_shader_selector *sel = shader->selector;
+
+   if (sel->info.stage == MESA_SHADER_GEOMETRY && sel->so.num_outputs)
+      return 44;
+
+   return 8;
+}
+
  /**
   * Determine subgroup information like maximum number of vertices and prims.
   *
@@ -1898,23 +1904,18 @@ bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader)
     const struct si_shader_selector *gs_sel = shader->selector;
     const struct si_shader_selector *es_sel =
        shader->previous_stage_sel ? shader->previous_stage_sel : gs_sel;
-   const enum pipe_shader_type gs_type = gs_sel->type;
-   const unsigned gs_num_invocations = MAX2(gs_sel->gs_num_invocations, 1);
+   const gl_shader_stage gs_stage = gs_sel->info.stage;
+   const unsigned gs_num_invocations = MAX2(gs_sel->info.base.gs.invocations, 1);
     const unsigned input_prim = si_get_input_prim(gs_sel);
     const bool use_adjacency =
        input_prim >= PIPE_PRIM_LINES_ADJACENCY && input_prim <= PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY;
     const unsigned max_verts_per_prim = u_vertices_per_prim(input_prim);
-   const unsigned min_verts_per_prim = gs_type == PIPE_SHADER_GEOMETRY ? max_verts_per_prim : 1;
+   const unsigned min_verts_per_prim = gs_stage == MESA_SHADER_GEOMETRY ? max_verts_per_prim : 1;
  
     /* All these are in dwords: */
-   /* We can't allow using the whole LDS, because GS waves compete with
-    * other shader stages for LDS space.
-    *
-    * TODO: We should really take the shader's internal LDS use into
-    *       account. The linker will fail if the size is greater than
-    *       8K dwords.
+   /* GE can only use 8K dwords (32KB) of LDS per workgroup.
      */
-   const unsigned max_lds_size = 8 * 1024 - 768;
+   const unsigned max_lds_size = 8 * 1024 - gfx10_ngg_get_scratch_dw_size(shader);
     const unsigned target_lds_size = max_lds_size;
     unsigned esvert_lds_size = 0;
     unsigned gsprim_lds_size = 0;
@@ -1943,9 +1944,9 @@ bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader)
      */
     max_esverts_base = MIN2(max_esverts_base, 251 + max_verts_per_prim - 1);
  
-   if (gs_type == PIPE_SHADER_GEOMETRY) {
+   if (gs_stage == MESA_SHADER_GEOMETRY) {
        bool force_multi_cycling = false;
-      unsigned max_out_verts_per_gsprim = gs_sel->gs_max_out_vertices * gs_num_invocations;
+      unsigned max_out_verts_per_gsprim = gs_sel->info.base.gs.vertices_out * gs_num_invocations;
  
  retry_select_mode:
        if (max_out_verts_per_gsprim <= 256 && !force_multi_cycling) {
@@ -1958,14 +1959,14 @@ retry_select_mode:
            * tessellation. */
           max_vert_out_per_gs_instance = true;
           max_gsprims_base = 1;
-         max_out_verts_per_gsprim = gs_sel->gs_max_out_vertices;
+         max_out_verts_per_gsprim = gs_sel->info.base.gs.vertices_out;
        }
  
        esvert_lds_size = es_sel->esgs_itemsize / 4;
        gsprim_lds_size = (gs_sel->gsvs_vertex_size / 4 + 1) * max_out_verts_per_gsprim;
  
        if (gsprim_lds_size > target_lds_size && !force_multi_cycling) {
-         if (gs_sel->tess_turns_off_ngg || es_sel->type != PIPE_SHADER_TESS_EVAL) {
+         if (gs_sel->tess_turns_off_ngg || es_sel->info.stage != MESA_SHADER_TESS_EVAL) {
              force_multi_cycling = true;
              goto retry_select_mode;
           }
@@ -2049,17 +2050,17 @@ retry_select_mode:
  
     unsigned max_out_vertices =
        max_vert_out_per_gs_instance
-         ? gs_sel->gs_max_out_vertices
-         : gs_type == PIPE_SHADER_GEOMETRY
-              ? max_gsprims * gs_num_invocations * gs_sel->gs_max_out_vertices
+         ? gs_sel->info.base.gs.vertices_out
+         : gs_stage == MESA_SHADER_GEOMETRY
+              ? max_gsprims * gs_num_invocations * gs_sel->info.base.gs.vertices_out
                : max_esverts;
     assert(max_out_vertices <= 256);
  
     unsigned prim_amp_factor = 1;
-   if (gs_type == PIPE_SHADER_GEOMETRY) {
+   if (gs_stage == MESA_SHADER_GEOMETRY) {
        /* Number of output primitives per GS input primitive after
         * GS instancing. */
-      prim_amp_factor = gs_sel->gs_max_out_vertices;
+      prim_amp_factor = gs_sel->info.base.gs.vertices_out;
     }
  
     /* The GE only checks against the maximum number of ES verts after