radeonsi: remove redundant si_shader_info::uses_derivatives
[mesa.git] / src / gallium / drivers / radeonsi / si_shader_nir.c
index 00371224368dc60b66ae8b577494594245df9236..46dd6e973fb2ac0b83935ff1124642b36d18b4d0 100644 (file)
@@ -48,14 +48,14 @@ static const nir_deref_instr *tex_get_texture_deref(nir_tex_instr *instr)
 static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr,
                           bool is_input)
 {
-   unsigned interp = TGSI_INTERPOLATE_CONSTANT; /* load_input uses flat shading */
+   unsigned interp = INTERP_MODE_FLAT; /* load_input uses flat shading */
 
    if (intr->intrinsic == nir_intrinsic_load_interpolated_input) {
       nir_intrinsic_instr *baryc = nir_instr_as_intrinsic(intr->src[0].ssa->parent_instr);
 
       if (baryc) {
          if (nir_intrinsic_infos[baryc->intrinsic].index_map[NIR_INTRINSIC_INTERP_MODE] > 0)
-            interp = tgsi_get_interp_mode(nir_intrinsic_interp_mode(baryc), false);
+            interp = nir_intrinsic_interp_mode(baryc);
          else
             unreachable("unknown barycentric intrinsic");
       } else {
@@ -64,16 +64,18 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr
    }
 
    unsigned mask, bit_size;
-   bool dual_slot;
+   bool dual_slot, is_output_load;
 
    if (nir_intrinsic_infos[intr->intrinsic].index_map[NIR_INTRINSIC_WRMASK] > 0) {
       mask = nir_intrinsic_write_mask(intr); /* store */
       bit_size = nir_src_bit_size(intr->src[0]);
       dual_slot = bit_size == 64 && nir_src_num_components(intr->src[0]) >= 3;
+      is_output_load = false;
    } else {
       mask = nir_ssa_def_components_read(&intr->dest.ssa); /* load */
       bit_size = intr->dest.ssa.bit_size;
       dual_slot = bit_size == 64 && intr->dest.ssa.num_components >= 3;
+      is_output_load = !is_input;
    }
 
    /* Convert the 64-bit component mask to a 32-bit component mask. */
@@ -98,27 +100,25 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr
 
    mask <<= nir_intrinsic_component(intr);
 
-   unsigned name, index;
-   if (info->processor == PIPE_SHADER_VERTEX && is_input) {
-      /* VS doesn't have semantics. */
-      name = 0;
-      index = 0;
-   } else if (info->processor == PIPE_SHADER_FRAGMENT && !is_input) {
-      tgsi_get_gl_frag_result_semantic(nir_intrinsic_io_semantics(intr).location,
-                                       &name, &index);
-      /* Adjust for dual source blending. */
-      if (nir_intrinsic_io_semantics(intr).dual_source_blend_index)
-         index++;
-   } else {
-      tgsi_get_gl_varying_semantic(nir_intrinsic_io_semantics(intr).location,
-                                   true, &name, &index);
-   }
-
    nir_src offset = *nir_get_io_offset_src(intr);
    bool indirect = !nir_src_is_const(offset);
    if (!indirect)
       assert(nir_src_as_uint(offset) == 0);
 
+   unsigned semantic = 0;
+   /* VS doesn't have semantics. */
+   if (info->stage != MESA_SHADER_VERTEX || !is_input)
+      semantic = nir_intrinsic_io_semantics(intr).location;
+
+   if (info->stage == MESA_SHADER_FRAGMENT && !is_input) {
+      /* Never use FRAG_RESULT_COLOR directly. */
+      if (semantic == FRAG_RESULT_COLOR) {
+         semantic = FRAG_RESULT_DATA0;
+         info->color0_writes_all_cbufs = true;
+      }
+      semantic += nir_intrinsic_io_semantics(intr).dual_source_blend_index;
+   }
+
    unsigned driver_location = nir_intrinsic_base(intr);
    unsigned num_slots = indirect ? nir_intrinsic_io_semantics(intr).num_slots : (1 + dual_slot);
 
@@ -129,31 +129,39 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr
          unsigned loc = driver_location + i;
          unsigned slot_mask = (dual_slot && i % 2 ? mask >> 4 : mask) & 0xf;
 
-         info->input_semantic_name[loc] = name;
-         info->input_semantic_index[loc] = index + i;
+         info->input_semantic[loc] = semantic + i;
          info->input_interpolate[loc] = interp;
 
          if (slot_mask) {
             info->input_usage_mask[loc] |= slot_mask;
             info->num_inputs = MAX2(info->num_inputs, loc + 1);
 
-            if (name == TGSI_SEMANTIC_PRIMID)
+            if (semantic == VARYING_SLOT_PRIMITIVE_ID)
                info->uses_primid = true;
          }
       }
    } else {
       /* Outputs. */
       assert(driver_location + num_slots <= ARRAY_SIZE(info->output_usagemask));
+      assert(semantic + num_slots < ARRAY_SIZE(info->output_semantic_to_slot));
 
       for (unsigned i = 0; i < num_slots; i++) {
          unsigned loc = driver_location + i;
          unsigned slot_mask = (dual_slot && i % 2 ? mask >> 4 : mask) & 0xf;
 
-         info->output_semantic_name[loc] = name;
-         info->output_semantic_index[loc] = index + i;
+         info->output_semantic[loc] = semantic + i;
+         info->output_semantic_to_slot[semantic + i] = loc;
 
-         if (slot_mask) {
-            if (info->processor == PIPE_SHADER_GEOMETRY) {
+         if (is_output_load) {
+            /* Output loads have only a few things that we need to track. */
+            info->output_readmask[loc] |= slot_mask;
+
+            if (info->stage == MESA_SHADER_FRAGMENT &&
+                nir_intrinsic_io_semantics(intr).fb_fetch_output)
+               info->uses_fbfetch = true;
+         } else if (slot_mask) {
+            /* Output stores. */
+            if (info->stage == MESA_SHADER_GEOMETRY) {
                unsigned gs_streams = (uint32_t)nir_intrinsic_io_semantics(intr).gs_streams <<
                                      (nir_intrinsic_component(intr) * 2);
                unsigned new_mask = slot_mask & ~info->output_usagemask[loc];
@@ -171,44 +179,48 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr
             info->output_usagemask[loc] |= slot_mask;
             info->num_outputs = MAX2(info->num_outputs, loc + 1);
 
-            switch (name) {
-            case TGSI_SEMANTIC_PRIMID:
-               info->writes_primid = true;
-               break;
-            case TGSI_SEMANTIC_VIEWPORT_INDEX:
-               info->writes_viewport_index = true;
-               break;
-            case TGSI_SEMANTIC_LAYER:
-               info->writes_layer = true;
-               break;
-            case TGSI_SEMANTIC_PSIZE:
-               info->writes_psize = true;
-               break;
-            case TGSI_SEMANTIC_CLIPVERTEX:
-               info->writes_clipvertex = true;
-               break;
-            case TGSI_SEMANTIC_COLOR:
-               info->colors_written |= 1 << (index + i);
-
-               if (info->processor == PIPE_SHADER_FRAGMENT &&
-                   nir_intrinsic_io_semantics(intr).location == FRAG_RESULT_COLOR)
-                  info->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] = true;
-               break;
-            case TGSI_SEMANTIC_STENCIL:
-               info->writes_stencil = true;
-               break;
-            case TGSI_SEMANTIC_SAMPLEMASK:
-               info->writes_samplemask = true;
-               break;
-            case TGSI_SEMANTIC_EDGEFLAG:
-               info->writes_edgeflag = true;
-               break;
-            case TGSI_SEMANTIC_POSITION:
-               if (info->processor == PIPE_SHADER_FRAGMENT)
+            if (info->stage == MESA_SHADER_FRAGMENT) {
+               switch (semantic) {
+               case FRAG_RESULT_DEPTH:
                   info->writes_z = true;
-               else
+                  break;
+               case FRAG_RESULT_STENCIL:
+                  info->writes_stencil = true;
+                  break;
+               case FRAG_RESULT_SAMPLE_MASK:
+                  info->writes_samplemask = true;
+                  break;
+               default:
+                  if (semantic >= FRAG_RESULT_DATA0 && semantic <= FRAG_RESULT_DATA7) {
+                     unsigned index = semantic - FRAG_RESULT_DATA0;
+                     info->colors_written |= 1 << (index + i);
+                  }
+                  break;
+               }
+            } else {
+               switch (semantic) {
+               case VARYING_SLOT_PRIMITIVE_ID:
+                  info->writes_primid = true;
+                  break;
+               case VARYING_SLOT_VIEWPORT:
+                  info->writes_viewport_index = true;
+                  break;
+               case VARYING_SLOT_LAYER:
+                  info->writes_layer = true;
+                  break;
+               case VARYING_SLOT_PSIZ:
+                  info->writes_psize = true;
+                  break;
+               case VARYING_SLOT_CLIP_VERTEX:
+                  info->writes_clipvertex = true;
+                  break;
+               case VARYING_SLOT_EDGE:
+                  info->writes_edgeflag = true;
+                  break;
+               case VARYING_SLOT_POS:
                   info->writes_position = true;
-               break;
+                  break;
+               }
             }
          }
       }
@@ -218,42 +230,15 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr
 static void scan_instruction(const struct nir_shader *nir, struct si_shader_info *info,
                              nir_instr *instr)
 {
-   if (instr->type == nir_instr_type_alu) {
-      nir_alu_instr *alu = nir_instr_as_alu(instr);
-
-      switch (alu->op) {
-      case nir_op_fddx:
-      case nir_op_fddy:
-      case nir_op_fddx_fine:
-      case nir_op_fddy_fine:
-      case nir_op_fddx_coarse:
-      case nir_op_fddy_coarse:
-         info->uses_derivatives = true;
-         break;
-      default:
-         break;
-      }
-   } else if (instr->type == nir_instr_type_tex) {
+   if (instr->type == nir_instr_type_tex) {
       nir_tex_instr *tex = nir_instr_as_tex(instr);
       const nir_deref_instr *deref = tex_get_texture_deref(tex);
       nir_variable *var = deref ? nir_deref_instr_get_variable(deref) : NULL;
 
-      if (!var) {
-         info->samplers_declared |= u_bit_consecutive(tex->sampler_index, 1);
-      } else {
+      if (var) {
          if (deref->mode != nir_var_uniform || var->data.bindless)
             info->uses_bindless_samplers = true;
       }
-
-      switch (tex->op) {
-      case nir_texop_tex:
-      case nir_texop_txb:
-      case nir_texop_lod:
-         info->uses_derivatives = true;
-         break;
-      default:
-         break;
-      }
    } else if (instr->type == nir_instr_type_intrinsic) {
       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 
@@ -277,7 +262,7 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info
          break;
       case nir_intrinsic_load_local_group_size:
          /* The block size is translated to IMM with a fixed block size. */
-         if (info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0)
+         if (info->base.cs.local_size[0] == 0)
             info->uses_block_size = true;
          break;
       case nir_intrinsic_load_local_invocation_id:
@@ -293,15 +278,6 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info
          }
          break;
       }
-      case nir_intrinsic_load_vertex_id:
-         info->uses_vertexid = 1;
-         break;
-      case nir_intrinsic_load_vertex_id_zero_base:
-         info->uses_vertexid_nobase = 1;
-         break;
-      case nir_intrinsic_load_base_vertex:
-         info->uses_basevertex = 1;
-         break;
       case nir_intrinsic_load_draw_id:
          info->uses_drawid = 1;
          break;
@@ -322,12 +298,10 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info
          break;
       case nir_intrinsic_bindless_image_store:
          info->uses_bindless_images = true;
-         info->writes_memory = true;
-         info->num_memory_instructions++; /* we only care about stores */
+         info->num_memory_stores++;
          break;
       case nir_intrinsic_image_deref_store:
-         info->writes_memory = true;
-         info->num_memory_instructions++; /* we only care about stores */
+         info->num_memory_stores++;
          break;
       case nir_intrinsic_bindless_image_atomic_add:
       case nir_intrinsic_bindless_image_atomic_imin:
@@ -340,8 +314,7 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info
       case nir_intrinsic_bindless_image_atomic_exchange:
       case nir_intrinsic_bindless_image_atomic_comp_swap:
          info->uses_bindless_images = true;
-         info->writes_memory = true;
-         info->num_memory_instructions++; /* we only care about stores */
+         info->num_memory_stores++;
          break;
       case nir_intrinsic_image_deref_atomic_add:
       case nir_intrinsic_image_deref_atomic_imin:
@@ -355,8 +328,7 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info
       case nir_intrinsic_image_deref_atomic_comp_swap:
       case nir_intrinsic_image_deref_atomic_inc_wrap:
       case nir_intrinsic_image_deref_atomic_dec_wrap:
-         info->writes_memory = true;
-         info->num_memory_instructions++; /* we only care about stores */
+         info->num_memory_stores++;
          break;
       case nir_intrinsic_store_ssbo:
       case nir_intrinsic_ssbo_atomic_add:
@@ -369,8 +341,7 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info
       case nir_intrinsic_ssbo_atomic_xor:
       case nir_intrinsic_ssbo_atomic_exchange:
       case nir_intrinsic_ssbo_atomic_comp_swap:
-         info->writes_memory = true;
-         info->num_memory_instructions++; /* we only care about stores */
+         info->num_memory_stores++;
          break;
       case nir_intrinsic_load_color0:
       case nir_intrinsic_load_color1: {
@@ -396,9 +367,6 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info
                info->uses_linear_centroid = true;
             else
                info->uses_linear_center = true;
-
-            if (intr->intrinsic == nir_intrinsic_load_barycentric_at_sample)
-               info->uses_linear_opcode_interp_sample = true;
          } else {
             if (intr->intrinsic == nir_intrinsic_load_barycentric_sample)
                info->uses_persp_sample = true;
@@ -406,10 +374,9 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info
                info->uses_persp_centroid = true;
             else
                info->uses_persp_center = true;
-
-            if (intr->intrinsic == nir_intrinsic_load_barycentric_at_sample)
-               info->uses_persp_opcode_interp_sample = true;
          }
+         if (intr->intrinsic == nir_intrinsic_load_barycentric_at_sample)
+            info->uses_interp_at_sample = true;
          break;
       }
       case nir_intrinsic_load_input:
@@ -418,28 +385,12 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info
       case nir_intrinsic_load_interpolated_input:
          scan_io_usage(info, intr, true);
          break;
+      case nir_intrinsic_load_output:
+      case nir_intrinsic_load_per_vertex_output:
       case nir_intrinsic_store_output:
       case nir_intrinsic_store_per_vertex_output:
          scan_io_usage(info, intr, false);
          break;
-      case nir_intrinsic_load_output: {
-         unsigned location = nir_intrinsic_io_semantics(intr).location;
-
-         if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
-            if (location == VARYING_SLOT_TESS_LEVEL_INNER ||
-                location == VARYING_SLOT_TESS_LEVEL_OUTER)
-               info->reads_tessfactor_outputs = true;
-            else
-               info->reads_perpatch_outputs = true;
-         } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
-            if (nir_intrinsic_io_semantics(intr).fb_fetch_output)
-               info->uses_fbfetch = true;
-         }
-         break;
-      }
-      case nir_intrinsic_load_per_vertex_output:
-         info->reads_pervertex_outputs = true;
-         break;
       case nir_intrinsic_load_deref:
       case nir_intrinsic_store_deref:
       case nir_intrinsic_interp_deref_at_centroid:
@@ -457,71 +408,25 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf
 {
    nir_function *func;
 
-   info->processor = pipe_shader_type_from_mesa(nir->info.stage);
-
-   info->properties[TGSI_PROPERTY_NEXT_SHADER] = pipe_shader_type_from_mesa(nir->info.next_stage);
-
-   if (nir->info.stage == MESA_SHADER_VERTEX) {
-      info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION] = nir->info.vs.window_space_position;
-      info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD] = nir->info.vs.blit_sgprs_amd;
-   }
-
-   if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
-      info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT] = nir->info.tess.tcs_vertices_out;
-   }
+   info->base = nir->info;
+   info->stage = nir->info.stage;
 
    if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
-      if (nir->info.tess.primitive_mode == GL_ISOLINES)
-         info->properties[TGSI_PROPERTY_TES_PRIM_MODE] = PIPE_PRIM_LINES;
-      else
-         info->properties[TGSI_PROPERTY_TES_PRIM_MODE] = nir->info.tess.primitive_mode;
-
-      STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
-      STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 == PIPE_TESS_SPACING_FRACTIONAL_ODD);
-      STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 == PIPE_TESS_SPACING_FRACTIONAL_EVEN);
-
-      info->properties[TGSI_PROPERTY_TES_SPACING] = (nir->info.tess.spacing + 1) % 3;
-      info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW] = !nir->info.tess.ccw;
-      info->properties[TGSI_PROPERTY_TES_POINT_MODE] = nir->info.tess.point_mode;
-   }
-
-   if (nir->info.stage == MESA_SHADER_GEOMETRY) {
-      info->properties[TGSI_PROPERTY_GS_INPUT_PRIM] = nir->info.gs.input_primitive;
-      info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM] = nir->info.gs.output_primitive;
-      info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES] = nir->info.gs.vertices_out;
-      info->properties[TGSI_PROPERTY_GS_INVOCATIONS] = nir->info.gs.invocations;
+      if (info->base.tess.primitive_mode == GL_ISOLINES)
+         info->base.tess.primitive_mode = GL_LINES;
    }
 
    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
-      info->properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL] =
-         nir->info.fs.early_fragment_tests | nir->info.fs.post_depth_coverage;
-      info->properties[TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE] = nir->info.fs.post_depth_coverage;
-
-      if (nir->info.fs.pixel_center_integer) {
-         info->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] = TGSI_FS_COORD_PIXEL_CENTER_INTEGER;
-      }
+      /* post_depth_coverage implies early_fragment_tests */
+      info->base.fs.early_fragment_tests |= info->base.fs.post_depth_coverage;
 
-      if (nir->info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
-         switch (nir->info.fs.depth_layout) {
-         case FRAG_DEPTH_LAYOUT_ANY:
-            info->properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT] = TGSI_FS_DEPTH_LAYOUT_ANY;
-            break;
-         case FRAG_DEPTH_LAYOUT_GREATER:
-            info->properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT] = TGSI_FS_DEPTH_LAYOUT_GREATER;
-            break;
-         case FRAG_DEPTH_LAYOUT_LESS:
-            info->properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT] = TGSI_FS_DEPTH_LAYOUT_LESS;
-            break;
-         case FRAG_DEPTH_LAYOUT_UNCHANGED:
-            info->properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT] = TGSI_FS_DEPTH_LAYOUT_UNCHANGED;
-            break;
-         default:
-            unreachable("Unknow depth layout");
-         }
+      info->color_interpolate[0] = nir->info.fs.color0_interp;
+      info->color_interpolate[1] = nir->info.fs.color1_interp;
+      for (unsigned i = 0; i < 2; i++) {
+         if (info->color_interpolate[i] == INTERP_MODE_NONE)
+            info->color_interpolate[i] = INTERP_MODE_COLOR;
       }
 
-      info->color_interpolate[0] = tgsi_get_interp_mode(nir->info.fs.color0_interp, true);
-      info->color_interpolate[1] = tgsi_get_interp_mode(nir->info.fs.color1_interp, true);
       info->color_interpolate_loc[0] = nir->info.fs.color0_sample ? TGSI_INTERPOLATE_LOC_SAMPLE :
                                        nir->info.fs.color0_centroid ? TGSI_INTERPOLATE_LOC_CENTROID :
                                                                       TGSI_INTERPOLATE_LOC_CENTER;
@@ -530,34 +435,14 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf
                                                                       TGSI_INTERPOLATE_LOC_CENTER;
    }
 
-   if (gl_shader_stage_is_compute(nir->info.stage)) {
-      info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] = nir->info.cs.local_size[0];
-      info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] = nir->info.cs.local_size[1];
-      info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH] = nir->info.cs.local_size[2];
-      info->properties[TGSI_PROPERTY_CS_USER_DATA_COMPONENTS_AMD] =
-         nir->info.cs.user_data_components_amd;
-   }
-
    info->constbuf0_num_slots = nir->num_uniforms;
-   info->shader_buffers_declared = u_bit_consecutive(0, nir->info.num_ssbos);
-   info->const_buffers_declared = u_bit_consecutive(0, nir->info.num_ubos);
-   info->images_declared = u_bit_consecutive(0, nir->info.num_images);
-   info->msaa_images_declared = nir->info.msaa_images;
-   info->image_buffers = nir->info.image_buffers;
-   info->samplers_declared = nir->info.textures_used;
-
-   info->num_written_clipdistance = nir->info.clip_distance_array_size;
-   info->num_written_culldistance = nir->info.cull_distance_array_size;
-   info->clipdist_writemask = u_bit_consecutive(0, info->num_written_clipdistance);
-   info->culldist_writemask = u_bit_consecutive(0, info->num_written_culldistance);
-
-   if (info->processor == PIPE_SHADER_FRAGMENT)
-      info->uses_kill = nir->info.fs.uses_discard;
 
    if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
       info->tessfactors_are_def_in_all_invocs = ac_are_tessfactors_def_in_all_invocs(nir);
    }
 
+   memset(info->output_semantic_to_slot, -1, sizeof(info->output_semantic_to_slot));
+
    func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
    nir_foreach_block (block, func->impl) {
       nir_foreach_instr (instr, block)
@@ -568,14 +453,17 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf
    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
       for (unsigned i = 0; i < 2; i++) {
          if ((info->colors_read >> (i * 4)) & 0xf) {
-            info->input_semantic_name[info->num_inputs] = TGSI_SEMANTIC_COLOR;
-            info->input_semantic_index[info->num_inputs] = i;
+            info->input_semantic[info->num_inputs] = VARYING_SLOT_COL0 + i;
             info->input_interpolate[info->num_inputs] = info->color_interpolate[i];
             info->input_usage_mask[info->num_inputs] = info->colors_read >> (i * 4);
             info->num_inputs++;
          }
       }
    }
+
+   /* Trim output read masks based on write masks. */
+   for (unsigned i = 0; i < info->num_outputs; i++)
+      info->output_readmask[i] &= info->output_usagemask[i];
 }
 
 static void si_nir_opts(struct nir_shader *nir, bool first)
@@ -639,8 +527,7 @@ static void si_nir_opts(struct nir_shader *nir, bool first)
          assert(lower_flrp);
          bool lower_flrp_progress = false;
 
-         NIR_PASS(lower_flrp_progress, nir, nir_lower_flrp, lower_flrp, false /* always_precise */,
-                  nir->options->lower_ffma);
+         NIR_PASS(lower_flrp_progress, nir, nir_lower_flrp, lower_flrp, false /* always_precise */);
          if (lower_flrp_progress) {
             NIR_PASS(progress, nir, nir_opt_constant_folding);
             progress = true;
@@ -752,6 +639,23 @@ static void si_lower_io(struct nir_shader *nir)
    NIR_PASS_V(nir, nir_opt_constant_folding);
    NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in);
    NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_out);
+
+   /* Remove dead derefs, so that nir_validate doesn't fail. */
+   NIR_PASS_V(nir, nir_opt_dce);
+
+   /* Remove input and output nir_variables, because we don't need them
+    * anymore. Also remove uniforms, because those should have been lowered
+    * to UBOs already.
+    */
+   unsigned modes = nir_var_shader_in | nir_var_shader_out | nir_var_uniform;
+   nir_foreach_variable_with_modes_safe(var, nir, modes) {
+      if (var->data.mode == nir_var_uniform &&
+          (glsl_type_get_image_count(var->type) ||
+           glsl_type_get_sampler_count(var->type)))
+         continue;
+
+      exec_node_remove(&var->node);
+   }
 }
 
 /**