i965: enable component packing for vs and fs
authorTimothy Arceri <timothy.arceri@collabora.com>
Mon, 23 May 2016 06:48:05 +0000 (16:48 +1000)
committerTimothy Arceri <timothy.arceri@collabora.com>
Wed, 20 Jul 2016 23:10:53 +0000 (09:10 +1000)
Rather than trying to work out the total number of components
used at a location we simply treat all outputs as vec4s. This
removes the need for complex code looping over varyings to match
packed locations and the need for storing the total number of
components used at each location.

Reviewed-by: Alejandro PiƱeiro <apinheiro@igalia.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_fs.h
src/mesa/drivers/dri/i965/brw_fs_nir.cpp
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
src/mesa/drivers/dri/i965/brw_nir.c

index 574475f071a6c994474efa3aec6ec72dfa9f9dee..fc1e1c4a2c56ea314c36cccf6bb75393c7688abe 100644 (file)
@@ -317,7 +317,6 @@ public:
    fs_reg frag_stencil;
    fs_reg sample_mask;
    fs_reg outputs[VARYING_SLOT_MAX];
-   unsigned output_components[VARYING_SLOT_MAX];
    fs_reg dual_src_output;
    bool do_dual_src;
    int first_non_payload_grf;
index 4d4c94a55bde7daaea69bb3e9b54ae5906ec9104..65bca6d6dbc0e48909e5b18d2846bbf2cc85a5f0 100644 (file)
@@ -67,13 +67,12 @@ fs_visitor::nir_setup_single_output_varying(fs_reg *reg,
       }
    } else {
       assert(type->is_scalar() || type->is_vector());
-      unsigned num_elements = type->vector_elements;
+      unsigned num_iter = 1;
       if (type->is_double())
-         num_elements *= 2;
-      for (unsigned count = 0; count < num_elements; count += 4) {
+         num_iter = 2;
+      for (unsigned count = 0; count < num_iter; count++) {
          this->outputs[*location] = *reg;
-         this->output_components[*location] = MIN2(4, num_elements - count);
-         *reg = offset(*reg, bld, this->output_components[*location]);
+         *reg = offset(*reg, bld, 4);
          (*location)++;
       }
    }
@@ -114,7 +113,6 @@ fs_visitor::nir_setup_outputs()
             /* Writing gl_FragColor outputs to all color regions. */
             for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) {
                this->outputs[i] = reg;
-               this->output_components[i] = 4;
             }
          } else if (var->data.location == FRAG_RESULT_DEPTH) {
             this->frag_depth = reg;
@@ -123,8 +121,6 @@ fs_visitor::nir_setup_outputs()
          } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
             this->sample_mask = reg;
          } else {
-            int vector_elements = var->type->without_array()->vector_elements;
-
             /* gl_FragData or a user-defined FS output */
             assert(var->data.location >= FRAG_RESULT_DATA0 &&
                    var->data.location < FRAG_RESULT_DATA0+BRW_MAX_DRAW_BUFFERS);
@@ -132,8 +128,7 @@ fs_visitor::nir_setup_outputs()
             /* General color output. */
             for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) {
                int output = var->data.location - FRAG_RESULT_DATA0 + i;
-               this->outputs[output] = offset(reg, bld, vector_elements * i);
-               this->output_components[output] = vector_elements;
+               this->outputs[output] = offset(reg, bld, 4 * i);
             }
          }
          break;
@@ -2360,6 +2355,7 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
 
    case nir_intrinsic_load_input: {
       fs_reg src = fs_reg(ATTR, instr->const_index[0], dest.type);
+      unsigned first_component = nir_intrinsic_component(instr);
       unsigned num_components = instr->num_components;
       enum brw_reg_type type = dest.type;
 
@@ -2368,7 +2364,7 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
       src = offset(src, bld, const_offset->u32[0]);
 
       for (unsigned j = 0; j < num_components; j++) {
-         bld.MOV(offset(dest, bld, j), offset(src, bld, j));
+         bld.MOV(offset(dest, bld, j), offset(src, bld, j + first_component));
       }
 
       if (type == BRW_REGISTER_TYPE_DF) {
@@ -4103,6 +4099,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       new_dest = offset(new_dest, bld, const_offset->u32[0]);
 
       unsigned num_components = instr->num_components;
+      unsigned first_component = nir_intrinsic_component(instr);
       unsigned bit_size = instr->src[0].is_ssa ?
          instr->src[0].ssa->bit_size : instr->src[0].reg.reg->bit_size;
       if (bit_size == 64) {
@@ -4116,7 +4113,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       }
 
       for (unsigned j = 0; j < num_components; j++) {
-         bld.MOV(offset(new_dest, bld, j), offset(src, bld, j));
+         bld.MOV(offset(new_dest, bld, j + first_component),
+                 offset(src, bld, j));
       }
       break;
    }
index 156a6306b77451e520ee34cba94a2526f99e4e57..6d843749f442f36d6f59b011ffede7769cdc9dec 100644 (file)
@@ -459,8 +459,7 @@ fs_visitor::emit_fb_writes()
             src0_alpha = offset(outputs[0], bld, 3);
 
          inst = emit_single_fb_write(abld, this->outputs[target], reg_undef,
-                                     src0_alpha,
-                                     this->output_components[target]);
+                                     src0_alpha, 4);
          inst->target = target;
       }
    }
@@ -545,9 +544,7 @@ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
    const fs_builder abld = bld.annotate("user clip distances");
 
    this->outputs[VARYING_SLOT_CLIP_DIST0] = vgrf(glsl_type::vec4_type);
-   this->output_components[VARYING_SLOT_CLIP_DIST0] = 4;
    this->outputs[VARYING_SLOT_CLIP_DIST1] = vgrf(glsl_type::vec4_type);
-   this->output_components[VARYING_SLOT_CLIP_DIST1] = 4;
 
    for (int i = 0; i < key->nr_userclip_plane_consts; i++) {
       fs_reg u = userplane[i];
@@ -724,10 +721,8 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
                sources[length++] = reg;
             }
          } else {
-            for (unsigned i = 0; i < output_components[varying]; i++)
+            for (unsigned i = 0; i < 4; i++)
                sources[length++] = offset(this->outputs[varying], bld, i);
-            for (unsigned i = output_components[varying]; i < 4; i++)
-               sources[length++] = brw_imm_d(0);
          }
          break;
       }
@@ -901,7 +896,6 @@ fs_visitor::init()
    this->nir_ssa_values = NULL;
 
    memset(&this->payload, 0, sizeof(this->payload));
-   memset(this->output_components, 0, sizeof(this->output_components));
    this->source_depth_to_render_target = false;
    this->runtime_check_aads_emit = false;
    this->first_non_payload_grf = 0;
index fe7653137eaa3382d9edd67978e8065f2b881222..388ae9e0386fd2421e6984cd05a3c9e4ca53bb94 100644 (file)
@@ -302,8 +302,8 @@ brw_nir_lower_vue_outputs(nir_shader *nir,
    if (is_scalar) {
       nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
                                VARYING_SLOT_VAR0,
-                               type_size_scalar);
-      nir_lower_io(nir, nir_var_shader_out, type_size_scalar);
+                               type_size_vec4_times_4);
+      nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4);
    } else {
       nir_foreach_variable(var, &nir->outputs)
          var->data.driver_location = var->data.location;
@@ -340,8 +340,8 @@ void
 brw_nir_lower_fs_outputs(nir_shader *nir)
 {
    nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
-                            FRAG_RESULT_DATA0, type_size_scalar);
-   nir_lower_io(nir, nir_var_shader_out, type_size_scalar);
+                            FRAG_RESULT_DATA0, type_size_vec4_times_4);
+   nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4);
 }
 
 void