i965/vec4: Handle component qualifiers on non-generic varyings.
authorKenneth Graunke <kenneth@whitecape.org>
Mon, 17 Oct 2016 18:14:10 +0000 (11:14 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Tue, 22 Nov 2016 08:29:24 +0000 (00:29 -0800)
ARB_enhanced_layouts only requires component qualifier support for
generic varyings, so this is all the vec4 backend knew how to handle.

This patch extends the backend to handle it for all varyings, so we
can use store_output intrinsics with a component set for things like
clip/cull distances.  We may want to use that for other VUE header
fields in the future as well.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Timothy Arceri <timothy.arceri@collabora.com>
src/mesa/drivers/dri/i965/brw_vec4.h
src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp

index 62c6007129ac58575c07fde306c2685b6eb3b8ac..dc69ea971745caee07cd79421d6c2f3fe0e43259 100644 (file)
@@ -113,10 +113,9 @@ public:
    /* Regs for vertex results.  Generated at ir_variable visiting time
     * for the ir->location's used.
     */
-   dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
-   dst_reg output_generic_reg[MAX_VARYINGS_INCL_PATCH][4];
-   unsigned output_generic_num_components[MAX_VARYINGS_INCL_PATCH][4];
-   const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
+   dst_reg output_reg[VARYING_SLOT_TESS_MAX][4];
+   unsigned output_num_components[VARYING_SLOT_TESS_MAX][4];
+   const char *output_reg_annotation[VARYING_SLOT_TESS_MAX];
    int uniforms;
 
    src_reg shader_start_time;
@@ -270,8 +269,7 @@ public:
 
    void emit_ndc_computation();
    void emit_psiz_and_flags(dst_reg reg);
-   vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying);
-   void emit_generic_urb_slot(dst_reg reg, int varying, int component);
+   vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying, int comp);
    virtual void emit_urb_slot(dst_reg reg, int varying);
 
    void emit_shader_time_begin();
index fc4eb3ac1d78e75da2ba57be0419294a1ba8c698..0d54907cadf021a22567f881ee63be4b8c91ec4f 100644 (file)
@@ -416,14 +416,9 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F,
                         instr->num_components);
 
-      if (varying >= VARYING_SLOT_VAR0) {
-         unsigned c = nir_intrinsic_component(instr);
-         unsigned v = varying - VARYING_SLOT_VAR0;
-         output_generic_reg[v][c] = dst_reg(src);
-         output_generic_num_components[v][c] = instr->num_components;
-      } else {
-         output_reg[varying] = dst_reg(src);
-      }
+      unsigned c = nir_intrinsic_component(instr);
+      output_reg[varying][c] = dst_reg(src);
+      output_num_components[varying][c] = instr->num_components;
       break;
    }
 
index 954f147fe7e83dc50c6813a221d63ec69ff68370..8c7901f2d41786974646eeef45eda3eecd84ee16 100644 (file)
@@ -1154,15 +1154,16 @@ vec4_visitor::gs_end_primitive()
 void
 vec4_visitor::emit_ndc_computation()
 {
-   if (output_reg[VARYING_SLOT_POS].file == BAD_FILE)
+   if (output_reg[VARYING_SLOT_POS][0].file == BAD_FILE)
       return;
 
    /* Get the position */
-   src_reg pos = src_reg(output_reg[VARYING_SLOT_POS]);
+   src_reg pos = src_reg(output_reg[VARYING_SLOT_POS][0]);
 
    /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
    dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
-   output_reg[BRW_VARYING_SLOT_NDC] = ndc;
+   output_reg[BRW_VARYING_SLOT_NDC][0] = ndc;
+   output_num_components[BRW_VARYING_SLOT_NDC][0] = 4;
 
    current_annotation = "NDC";
    dst_reg ndc_w = ndc;
@@ -1182,7 +1183,7 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
 {
    if (devinfo->gen < 6 &&
        ((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) ||
-        output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE ||
+        output_reg[VARYING_SLOT_CLIP_DIST0][0].file != BAD_FILE ||
         devinfo->has_negative_rhw_bug)) {
       dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
       dst_reg header1_w = header1;
@@ -1191,23 +1192,23 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
       emit(MOV(header1, brw_imm_ud(0u)));
 
       if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
-        src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]);
+        src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ][0]);
 
         current_annotation = "Point size";
         emit(MUL(header1_w, psiz, brw_imm_f((float)(1 << 11))));
         emit(AND(header1_w, src_reg(header1_w), brw_imm_d(0x7ff << 8)));
       }
 
-      if (output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE) {
+      if (output_reg[VARYING_SLOT_CLIP_DIST0][0].file != BAD_FILE) {
          current_annotation = "Clipping flags";
          dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
          dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
 
-         emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L));
+         emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0][0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L));
          emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, brw_imm_d(0));
          emit(OR(header1_w, src_reg(header1_w), src_reg(flags0)));
 
-         emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), brw_imm_f(0.0f), BRW_CONDITIONAL_L));
+         emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1][0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L));
          emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, brw_imm_d(0));
          emit(SHL(flags1, src_reg(flags1), brw_imm_d(4)));
          emit(OR(header1_w, src_reg(header1_w), src_reg(flags1)));
@@ -1223,15 +1224,15 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
        * clipped against all fixed planes.
        */
       if (devinfo->has_negative_rhw_bug &&
-          output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE) {
-         src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]);
+          output_reg[BRW_VARYING_SLOT_NDC][0].file != BAD_FILE) {
+         src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC][0]);
          ndc_w.swizzle = BRW_SWIZZLE_WWWW;
          emit(CMP(dst_null_f(), ndc_w, brw_imm_f(0.0f), BRW_CONDITIONAL_L));
          vec4_instruction *inst;
          inst = emit(OR(header1_w, src_reg(header1_w), brw_imm_ud(1u << 6)));
          inst->predicate = BRW_PREDICATE_NORMAL;
-         output_reg[BRW_VARYING_SLOT_NDC].type = BRW_REGISTER_TYPE_F;
-         inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], brw_imm_f(0.0f)));
+         output_reg[BRW_VARYING_SLOT_NDC][0].type = BRW_REGISTER_TYPE_F;
+         inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC][0], brw_imm_f(0.0f)));
          inst->predicate = BRW_PREDICATE_NORMAL;
       }
 
@@ -1243,7 +1244,7 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
       if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
          dst_reg reg_w = reg;
          reg_w.writemask = WRITEMASK_W;
-         src_reg reg_as_src = src_reg(output_reg[VARYING_SLOT_PSIZ]);
+         src_reg reg_as_src = src_reg(output_reg[VARYING_SLOT_PSIZ][0]);
          reg_as_src.type = reg_w.type;
          reg_as_src.swizzle = brw_swizzle_for_size(1);
          emit(MOV(reg_w, reg_as_src));
@@ -1252,58 +1253,45 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
          dst_reg reg_y = reg;
          reg_y.writemask = WRITEMASK_Y;
          reg_y.type = BRW_REGISTER_TYPE_D;
-         output_reg[VARYING_SLOT_LAYER].type = reg_y.type;
-         emit(MOV(reg_y, src_reg(output_reg[VARYING_SLOT_LAYER])));
+         output_reg[VARYING_SLOT_LAYER][0].type = reg_y.type;
+         emit(MOV(reg_y, src_reg(output_reg[VARYING_SLOT_LAYER][0])));
       }
       if (prog_data->vue_map.slots_valid & VARYING_BIT_VIEWPORT) {
          dst_reg reg_z = reg;
          reg_z.writemask = WRITEMASK_Z;
          reg_z.type = BRW_REGISTER_TYPE_D;
-         output_reg[VARYING_SLOT_VIEWPORT].type = reg_z.type;
-         emit(MOV(reg_z, src_reg(output_reg[VARYING_SLOT_VIEWPORT])));
+         output_reg[VARYING_SLOT_VIEWPORT][0].type = reg_z.type;
+         emit(MOV(reg_z, src_reg(output_reg[VARYING_SLOT_VIEWPORT][0])));
       }
    }
 }
 
 vec4_instruction *
-vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
-{
-   assert(varying < VARYING_SLOT_MAX);
-   assert(output_reg[varying].type == reg.type);
-   current_annotation = output_reg_annotation[varying];
-   if (output_reg[varying].file != BAD_FILE) {
-      return emit(MOV(reg, src_reg(output_reg[varying])));
-   } else
-      return NULL;
-}
-
-void
 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying, int component)
 {
    assert(varying < VARYING_SLOT_MAX);
-   assert(varying >= VARYING_SLOT_VAR0);
-   varying = varying - VARYING_SLOT_VAR0;
 
-   unsigned num_comps = output_generic_num_components[varying][component];
+   unsigned num_comps = output_num_components[varying][component];
    if (num_comps == 0)
-      return;
+      return NULL;
 
-   assert(output_generic_reg[varying][component].type == reg.type);
+   assert(output_reg[varying][component].type == reg.type);
    current_annotation = output_reg_annotation[varying];
-   if (output_generic_reg[varying][component].file != BAD_FILE) {
-      src_reg src = src_reg(output_generic_reg[varying][component]);
+   if (output_reg[varying][component].file != BAD_FILE) {
+      src_reg src = src_reg(output_reg[varying][component]);
       src.swizzle = BRW_SWZ_COMP_OUTPUT(component);
       reg.writemask =
          brw_writemask_for_component_packing(num_comps, component);
-      emit(MOV(reg, src));
+      return emit(MOV(reg, src));
    }
+   return NULL;
 }
 
 void
 vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
 {
    reg.type = BRW_REGISTER_TYPE_F;
-   output_reg[varying].type = reg.type;
+   output_reg[varying][0].type = reg.type;
 
    switch (varying) {
    case VARYING_SLOT_PSIZ:
@@ -1315,13 +1303,13 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
    }
    case BRW_VARYING_SLOT_NDC:
       current_annotation = "NDC";
-      if (output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE)
-         emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
+      if (output_reg[BRW_VARYING_SLOT_NDC][0].file != BAD_FILE)
+         emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC][0])));
       break;
    case VARYING_SLOT_POS:
       current_annotation = "gl_Position";
-      if (output_reg[VARYING_SLOT_POS].file != BAD_FILE)
-         emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
+      if (output_reg[VARYING_SLOT_POS][0].file != BAD_FILE)
+         emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS][0])));
       break;
    case VARYING_SLOT_EDGE:
       /* This is present when doing unfilled polygons.  We're supposed to copy
@@ -1338,12 +1326,8 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
       /* No need to write to this slot */
       break;
    default:
-      if (varying >= VARYING_SLOT_VAR0) {
-         for (int i = 0; i < 4; i++) {
-            emit_generic_urb_slot(reg, varying, i);
-         }
-      } else {
-         emit_generic_urb_slot(reg, varying);
+      for (int i = 0; i < 4; i++) {
+         emit_generic_urb_slot(reg, varying, i);
       }
       break;
    }
@@ -1795,8 +1779,7 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
    this->current_annotation = NULL;
    memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
 
-   memset(this->output_generic_num_components, 0,
-          sizeof(this->output_generic_num_components));
+   memset(this->output_num_components, 0, sizeof(this->output_num_components));
 
    this->virtual_grf_start = NULL;
    this->virtual_grf_end = NULL;
index 058ee3a7dadf63e737ec9b1e0732671c16e505e3..a300f76876737174db7884051059754c95da8b69 100644 (file)
@@ -104,7 +104,7 @@ void
 vec4_vs_visitor::emit_urb_slot(dst_reg reg, int varying)
 {
    reg.type = BRW_REGISTER_TYPE_F;
-   output_reg[varying].type = reg.type;
+   output_reg[varying][0].type = reg.type;
 
    switch (varying) {
    case VARYING_SLOT_COL0:
@@ -115,7 +115,7 @@ vec4_vs_visitor::emit_urb_slot(dst_reg reg, int varying)
        * and we only support GS in core profile.  So, this must be a vertex
        * shader.
        */
-      vec4_instruction *inst = emit_generic_urb_slot(reg, varying);
+      vec4_instruction *inst = emit_generic_urb_slot(reg, varying, 0);
       if (inst && key->clamp_vertex_color)
          inst->saturate = true;
       break;
@@ -150,7 +150,7 @@ vec4_vs_visitor::emit_clip_distances(dst_reg reg, int offset)
         ++i) {
       reg.writemask = 1 << i;
       emit(DP4(reg,
-               src_reg(output_reg[clip_vertex]),
+               src_reg(output_reg[clip_vertex][0]),
                src_reg(this->userplane[i + offset])));
    }
 }
@@ -180,11 +180,15 @@ vec4_vs_visitor::emit_thread_end()
    if (key->nr_userclip_plane_consts > 0) {
       current_annotation = "user clip distances";
 
-      output_reg[VARYING_SLOT_CLIP_DIST0] = dst_reg(this, glsl_type::vec4_type);
-      output_reg[VARYING_SLOT_CLIP_DIST1] = dst_reg(this, glsl_type::vec4_type);
+      output_reg[VARYING_SLOT_CLIP_DIST0][0] =
+         dst_reg(this, glsl_type::vec4_type);
+      output_reg[VARYING_SLOT_CLIP_DIST1][0] =
+         dst_reg(this, glsl_type::vec4_type);
+      output_num_components[VARYING_SLOT_CLIP_DIST0][0] = 4;
+      output_num_components[VARYING_SLOT_CLIP_DIST1][0] = 4;
 
-      emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST0], 0);
-      emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1], 4);
+      emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST0][0], 0);
+      emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1][0], 4);
    }
 
    /* For VS, we always end the thread by emitting a single vertex.
index 329a1119f3fd8fc5b3084d1e42e2431177c7cf13..7254729f41c944a8a177b51be3d343ef2bd04991 100644 (file)
@@ -412,7 +412,7 @@ gen6_gs_visitor::emit_thread_end()
 
                /* Copy this slot to the appropriate message register */
                dst_reg reg = dst_reg(MRF, mrf);
-               reg.type = output_reg[varying].type;
+               reg.type = output_reg[varying][0].type;
                data.type = reg.type;
                vec4_instruction *inst = emit(MOV(reg, data));
                inst->force_writemask_all = true;
@@ -688,7 +688,7 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
          int offset = get_vertex_output_offset_for_varying(vertex, varying);
          emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_d(offset)));
          memcpy(data.reladdr, &this->vertex_output_offset, sizeof(src_reg));
-         data.type = output_reg[varying].type;
+         data.type = output_reg[varying][0].type;
 
          /* PSIZ, LAYER and VIEWPORT are packed in different channels of the
           * same slot, so make sure we write the appropriate channel