i965/vec4: add support for packing vs/gs/tes outputs
authorTimothy Arceri <timothy.arceri@collabora.com>
Thu, 23 Jun 2016 02:49:53 +0000 (12:49 +1000)
committerTimothy Arceri <timothy.arceri@collabora.com>
Thu, 21 Jul 2016 02:06:11 +0000 (12:06 +1000)
Here we create a new output_generic_reg array with the ability to
store the dst_reg for each component of user defined varyings.
This is needed as the previous code only stored the dst_reg based
on the varying location which meant packed varyings would overwrite
each other.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Alejandro PiƱeiro <apinheiro@igalia.com>
src/mesa/drivers/dri/i965/brw_vec4.h
src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

index d544e711da9015b160b0886188ffc6bfc6f3352a..1505ba6ecb1a9f5141d3c7484ce4bac56f7a531d 100644 (file)
@@ -114,6 +114,8 @@ public:
     * for the ir->location's used.
     */
    dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
+   dst_reg output_generic_reg[MAX_VARYINGS_INCL_PATCH][4];
+   unsigned output_generic_num_components[MAX_VARYINGS_INCL_PATCH][4];
    const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
    int uniforms;
 
@@ -269,6 +271,7 @@ public:
    void emit_ndc_computation();
    void emit_psiz_and_flags(dst_reg reg);
    vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying);
+   void emit_generic_urb_slot(dst_reg reg, int varying, int component);
    virtual void emit_urb_slot(dst_reg reg, int varying);
 
    void emit_shader_time_begin();
index f786f5839a28174a715edb9b9e2ed3e9902cc4e5..c29411847e53442ae602725dbb1ff0659ca3a6ff 100644 (file)
@@ -416,7 +416,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F,
                         instr->num_components);
 
-      output_reg[varying] = dst_reg(src);
+      if (varying >= VARYING_SLOT_VAR0) {
+         unsigned c = nir_intrinsic_component(instr);
+         unsigned v = varying - VARYING_SLOT_VAR0;
+         output_generic_reg[v][c] = dst_reg(src);
+         output_generic_num_components[v][c] = instr->num_components;
+      } else {
+         output_reg[varying] = dst_reg(src);
+      }
       break;
    }
 
index b87d0a6939eb0d38b8c837ebc01c8ac457ca5568..76b2a05700f6b06d65070f159515186a70c53bb8 100644 (file)
@@ -1272,12 +1272,34 @@ vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
    assert(varying < VARYING_SLOT_MAX);
    assert(output_reg[varying].type == reg.type);
    current_annotation = output_reg_annotation[varying];
-   if (output_reg[varying].file != BAD_FILE)
+   if (output_reg[varying].file != BAD_FILE) {
       return emit(MOV(reg, src_reg(output_reg[varying])));
-   else
+   else
       return NULL;
 }
 
+void
+vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying, int component)
+{
+   assert(varying < VARYING_SLOT_MAX);
+   assert(varying >= VARYING_SLOT_VAR0);
+   varying = varying - VARYING_SLOT_VAR0;
+
+   unsigned num_comps = output_generic_num_components[varying][component];
+   if (num_comps == 0)
+      return;
+
+   assert(output_generic_reg[varying][component].type == reg.type);
+   current_annotation = output_reg_annotation[varying];
+   if (output_generic_reg[varying][component].file != BAD_FILE) {
+      src_reg src = src_reg(output_generic_reg[varying][component]);
+      src.swizzle = BRW_SWZ_COMP_OUTPUT(component);
+      reg.writemask =
+         brw_writemask_for_component_packing(num_comps, component);
+      emit(MOV(reg, src));
+   }
+}
+
 void
 vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
 {
@@ -1317,7 +1339,13 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
       /* No need to write to this slot */
       break;
    default:
-      emit_generic_urb_slot(reg, varying);
+      if (varying >= VARYING_SLOT_VAR0) {
+         for (int i = 0; i < 4; i++) {
+            emit_generic_urb_slot(reg, varying, i);
+         }
+      } else {
+         emit_generic_urb_slot(reg, varying);
+      }
       break;
    }
 }
@@ -1765,6 +1793,9 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
    this->current_annotation = NULL;
    memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
 
+   memset(this->output_generic_num_components, 0,
+          sizeof(this->output_generic_num_components));
+
    this->virtual_grf_start = NULL;
    this->virtual_grf_end = NULL;
    this->live_intervals = NULL;