i965/gen6/gs: Handle the case where a geometry shader emits no output.

author Iago Toral Quiroga <itoral@igalia.com>

Mon, 21 Jul 2014 09:48:42 +0000 (11:48 +0200)

committer Iago Toral Quiroga <itoral@igalia.com>

Fri, 19 Sep 2014 13:01:15 +0000 (15:01 +0200)
author Iago Toral Quiroga <itoral@igalia.com>
Mon, 21 Jul 2014 09:48:42 +0000 (11:48 +0200)
committer Iago Toral Quiroga <itoral@igalia.com>
Fri, 19 Sep 2014 13:01:15 +0000 (15:01 +0200)
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp

index 77e1957be94d148ea94647adfe2a2b81c402f1a2..34cd59be52caebdc879a47047230a6aa35d2fcca 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
@@ -166,7 +166,7 @@ gen6_gs_visitor::visit(ir_end_primitive *)
  
     /* Otherwise we know that the last vertex we have processed was the last
      * vertex in the primitive and we need to set its PrimEnd flag, so do this
-    * unless we haven't emitted that vertex at all.
+    * unless we haven't emitted that vertex at all (vertex_count != 0).
      *
      * Notice that we have already incremented vertex_count when we processed
      * the last emit_vertex, so we need to take that into account in the
@@ -176,6 +176,10 @@ gen6_gs_visitor::visit(ir_end_primitive *)
     unsigned num_output_vertices = c->gp->program.VerticesOut;
     emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices + 1),
              BRW_CONDITIONAL_L));
+   vec4_instruction *inst = emit(CMP(dst_null_d(),
+                                     this->vertex_count, 0u,
+                                     BRW_CONDITIONAL_NEQ));
+   inst->predicate = BRW_PREDICATE_NORMAL;
     emit(IF(BRW_PREDICATE_NORMAL));
     {
        /* vertex_output_offset is already pointing at the first entry of the
@@ -224,47 +228,40 @@ gen6_gs_visitor::emit_urb_write_header(int mrf)
  }
  
  void
-gen6_gs_visitor::emit_urb_write_opcode(bool complete, src_reg vertex,
-                                       int base_mrf, int mlen, int urb_offset)
+gen6_gs_visitor::emit_urb_write_opcode(bool complete, int base_mrf,
+                                       int last_mrf, int urb_offset)
  {
     vec4_instruction *inst = NULL;
  
-   /* If the vertex is not complete we don't have to do anything special */
     if (!complete) {
+      /* If the vertex is not complete we don't have to do anything special */
        inst = emit(GS_OPCODE_URB_WRITE);
        inst->urb_write_flags = BRW_URB_WRITE_NO_FLAGS;
-      inst->base_mrf = base_mrf;
-      inst->mlen = mlen;
-      inst->offset = urb_offset;
-      return;
-   }
-
-   /* Otherwise, if this is not the last vertex we are going to write,
-    * we have to request a new VUE handle for the next vertex.
-    *
-    * Notice that the vertex parameter has been pre-incremented in
-    * emit_thread_end() to make this comparison easier.
-    */
-   emit(CMP(dst_null_d(), vertex, this->vertex_count, BRW_CONDITIONAL_L));
-   emit(IF(BRW_PREDICATE_NORMAL));
-   {
+   } else {
+      /* Otherwise we always request to allocate a new VUE handle. If this is
+       * the last write before the EOT message and the new handle never gets
+       * used it will be dereferenced when we send the EOT message. This is
+       * necessary to avoid different setups for the EOT message (one for the
+       * case when there is no output and another for the case when there is)
+       * which would require to end the program with an IF/ELSE/ENDIF block,
+       * something we do not want.
+       */
        inst = emit(GS_OPCODE_URB_WRITE_ALLOCATE);
        inst->urb_write_flags = BRW_URB_WRITE_COMPLETE;
-      inst->base_mrf = base_mrf;
-      inst->mlen = mlen;
-      inst->offset = urb_offset;
        inst->dst = dst_reg(MRF, base_mrf);
        inst->src[0] = this->temp;
     }
-   emit(BRW_OPCODE_ELSE);
-   {
-      inst = emit(GS_OPCODE_URB_WRITE);
-      inst->urb_write_flags = BRW_URB_WRITE_COMPLETE;
-      inst->base_mrf = base_mrf;
-      inst->mlen = mlen;
-      inst->offset = urb_offset;
-   }
-   emit(BRW_OPCODE_ENDIF);
+
+   inst->base_mrf = base_mrf;
+   /* URB data written (does not include the message header reg) must
+    * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
+    * section 5.4.3.2.2: URB_INTERLEAVED.
+    */
+   int mlen = last_mrf - base_mrf;
+   if ((mlen % 2) != 1)
+      mlen++;
+   inst->mlen = mlen;
+   inst->offset = urb_offset;
  }
  
  void
@@ -303,113 +300,113 @@ gen6_gs_visitor::emit_thread_end()
     int max_usable_mrf = 13;
  
     /* Issue the FF_SYNC message and obtain the initial VUE handle. */
-   this->current_annotation = "gen6 thread end: ff_sync";
-   vec4_instruction *inst =
-      emit(GS_OPCODE_FF_SYNC, dst_reg(this->temp), this->prim_count);
-   inst->base_mrf = base_mrf;
-
-   /* Loop over all buffered vertices and emit URB write messages */
-   this->current_annotation = "gen6 thread end: urb writes init";
-   src_reg vertex(this, glsl_type::uint_type);
-   emit(MOV(dst_reg(vertex), 0u));
-   emit(MOV(dst_reg(this->vertex_output_offset), 0u));
-
-   this->current_annotation = "gen6 thread end: urb writes";
-   emit(BRW_OPCODE_DO);
+   emit(CMP(dst_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_G));
+   emit(IF(BRW_PREDICATE_NORMAL));
     {
-      emit(CMP(dst_null_d(), vertex, this->vertex_count, BRW_CONDITIONAL_GE));
-      inst = emit(BRW_OPCODE_BREAK);
-      inst->predicate = BRW_PREDICATE_NORMAL;
+      this->current_annotation = "gen6 thread end: ff_sync";
+      vec4_instruction *inst =
+         emit(GS_OPCODE_FF_SYNC, dst_reg(this->temp), this->prim_count);
+      inst->base_mrf = base_mrf;
  
-      /* First we prepare the message header */
-      emit_urb_write_header(base_mrf);
+      /* Loop over all buffered vertices and emit URB write messages */
+      this->current_annotation = "gen6 thread end: urb writes init";
+      src_reg vertex(this, glsl_type::uint_type);
+      emit(MOV(dst_reg(vertex), 0u));
+      emit(MOV(dst_reg(this->vertex_output_offset), 0u));
  
-      /* Then add vertex data to the message in interleaved fashion */
-      int slot = 0;
-      bool complete = false;
-      do {
-         int mrf = base_mrf + 1;
+      this->current_annotation = "gen6 thread end: urb writes";
+      emit(BRW_OPCODE_DO);
+      {
+         emit(CMP(dst_null_d(), vertex, this->vertex_count, BRW_CONDITIONAL_GE));
+         inst = emit(BRW_OPCODE_BREAK);
+         inst->predicate = BRW_PREDICATE_NORMAL;
  
-         /* URB offset is in URB row increments, and each of our MRFs is half
-          * of one of those, since we're doing interleaved writes.
-          */
-         int urb_offset = slot / 2;
+         /* First we prepare the message header */
+         emit_urb_write_header(base_mrf);
  
-         for (; slot < prog_data->vue_map.num_slots; ++slot) {
-            int varying = prog_data->vue_map.slot_to_varying[slot];
-            current_annotation = output_reg_annotation[varying];
+         /* Then add vertex data to the message in interleaved fashion */
+         int slot = 0;
+         bool complete = false;
+         do {
+            int mrf = base_mrf + 1;
  
-            /* Compute offset of this slot for the current vertex
-             * in vertex_output
+            /* URB offset is in URB row increments, and each of our MRFs is half
+             * of one of those, since we're doing interleaved writes.
               */
-            src_reg data(this->vertex_output);
-            data.reladdr = ralloc(mem_ctx, src_reg);
-            memcpy(data.reladdr, &this->vertex_output_offset, sizeof(src_reg));
-
-            if (varying == VARYING_SLOT_PSIZ) {
-               /* We did not buffer PSIZ, emit it directly here */
-               emit_urb_slot(dst_reg(MRF, mrf), varying);
-            } else {
-               /* Copy this slot to the appropriate message register */
-               dst_reg reg = dst_reg(MRF, mrf);
-               reg.type = output_reg[varying].type;
-               data.type = reg.type;
-               vec4_instruction *inst = emit(MOV(reg, data));
-               inst->force_writemask_all = true;
+            int urb_offset = slot / 2;
+
+            for (; slot < prog_data->vue_map.num_slots; ++slot) {
+               int varying = prog_data->vue_map.slot_to_varying[slot];
+               current_annotation = output_reg_annotation[varying];
+
+               /* Compute offset of this slot for the current vertex
+                * in vertex_output
+                */
+               src_reg data(this->vertex_output);
+               data.reladdr = ralloc(mem_ctx, src_reg);
+               memcpy(data.reladdr, &this->vertex_output_offset,
+                      sizeof(src_reg));
+
+               if (varying == VARYING_SLOT_PSIZ) {
+                  /* We did not buffer PSIZ, emit it directly here */
+                  emit_urb_slot(dst_reg(MRF, mrf), varying);
+               } else {
+                  /* Copy this slot to the appropriate message register */
+                  dst_reg reg = dst_reg(MRF, mrf);
+                  reg.type = output_reg[varying].type;
+                  data.type = reg.type;
+                  vec4_instruction *inst = emit(MOV(reg, data));
+                  inst->force_writemask_all = true;
+               }
+
+               mrf++;
+               emit(ADD(dst_reg(this->vertex_output_offset),
+                        this->vertex_output_offset, 1u));
+
+               /* If this was max_usable_mrf, we can't fit anything more into
+                * this URB WRITE.
+                */
+               if (mrf > max_usable_mrf) {
+                  slot++;
+                  break;
+               }
              }
  
-            mrf++;
-            emit(ADD(dst_reg(this->vertex_output_offset),
-                     this->vertex_output_offset, 1u));
+            complete = slot >= prog_data->vue_map.num_slots;
+            emit_urb_write_opcode(complete, base_mrf, mrf, urb_offset);
+         } while (!complete);
  
-            /* If this was max_usable_mrf, we can't fit anything more into this
-             * URB WRITE.
-             */
-            if (mrf > max_usable_mrf) {
-               slot++;
-               break;
-            }
-         }
-
-         complete = slot >= prog_data->vue_map.num_slots;
-
-         /* When we emit the URB_WRITE below we need to do different things
-          * depending on whether this is the last vertex we are going to
-          * write. That means that we will need to check if
-          * vertex >= vertex_count - 1. However, by increasing vertex early
-          * we transform that comparison into vertex >= vertex_count, which
-          * is more convenient.
+         /* Skip over the flags data item so that vertex_output_offset points
+          * to the first data item of the next vertex, so that we can start
+          * writing the next vertex.
            */
-         if (complete)
-            emit(ADD(dst_reg(vertex), vertex, 1u));
+         emit(ADD(dst_reg(this->vertex_output_offset),
+                  this->vertex_output_offset, 1u));
  
-         /* URB data written (does not include the message header reg) must
-          * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
-          * section 5.4.3.2.2: URB_INTERLEAVED.
-          */
-         int mlen = mrf - base_mrf;
-         if ((mlen % 2) != 1)
-            mlen++;
-         emit_urb_write_opcode(complete, vertex, base_mrf, mlen, urb_offset);
-      } while (!complete);
-
-      /* Skip over the flags data item so that vertex_output_offset points to
-       * the first data item of the next vertex, so that we can start writing
-       * the next vertex.
-       */
-       emit(ADD(dst_reg(this->vertex_output_offset),
-                this->vertex_output_offset, 1u));
+         emit(ADD(dst_reg(vertex), vertex, 1u));
+      }
+      emit(BRW_OPCODE_WHILE);
     }
-   emit(BRW_OPCODE_WHILE);
+   emit(BRW_OPCODE_ENDIF);
  
     /* Finally, emit EOT message.
      *
-    * In gen6 it looks like we have to set the complete flag too, otherwise
-    * the GPU hangs.
+    * In gen6 we need to end the thread differently depending on whether we have
+    * emitted at least one vertex or not. In case we did, the EOT message must
+    * always include the COMPLETE flag or else the GPU hangs. If we have not
+    * produced any output we can't use the COMPLETE flag.
+    *
+    * However, this would lead us to end the program with an ENDIF opcode,
+    * which we want to avoid, so what we do is that we always request a new
+    * VUE handle every time we do a URB WRITE, even for the last vertex we emit.
+    * With this we make sure that whether we have emitted at least one vertex
+    * or none at all, we have to finish the thread without writing to the URB,
+    * which works for both cases by setting the COMPLETE and UNUSED flags in
+    * the EOT message.
      */
     this->current_annotation = "gen6 thread end: EOT";
-   inst = emit(GS_OPCODE_THREAD_END);
-   inst->urb_write_flags = BRW_URB_WRITE_COMPLETE;
+   vec4_instruction *inst = emit(GS_OPCODE_THREAD_END);
+   inst->urb_write_flags = BRW_URB_WRITE_COMPLETE | BRW_URB_WRITE_UNUSED;
     inst->base_mrf = base_mrf;
     inst->mlen = 1;
  }
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h

index 68fe88ddd42ff043f507d5839edc08487579239b..7af6405a28202295fb0ce00f4d5094afa8c15710 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
@@ -49,9 +49,8 @@ protected:
     virtual void visit(ir_end_primitive *);
     virtual void emit_urb_write_header(int mrf);
     virtual void emit_urb_write_opcode(bool complete,
-                                      src_reg vertex,
                                        int base_mrf,
-                                      int mlen,
+                                      int last_mrf,
                                        int urb_offset);
  
  private:
author	Iago Toral Quiroga <itoral@igalia.com>
	Mon, 21 Jul 2014 09:48:42 +0000 (11:48 +0200)
committer	Iago Toral Quiroga <itoral@igalia.com>
	Fri, 19 Sep 2014 13:01:15 +0000 (15:01 +0200)
src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/gen6_gs_visitor.h		patch \| blob \| history