draw/gs: make sure geometry shaders don't overflow

author Zack Rusin <zackr@vmware.com>

Wed, 17 Apr 2013 03:31:22 +0000 (20:31 -0700)

committer Zack Rusin <zackr@vmware.com>

Wed, 17 Apr 2013 06:38:47 +0000 (23:38 -0700)
author Zack Rusin <zackr@vmware.com>
Wed, 17 Apr 2013 03:31:22 +0000 (20:31 -0700)
committer Zack Rusin <zackr@vmware.com>
Wed, 17 Apr 2013 06:38:47 +0000 (23:38 -0700)
diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c

index 987c01a79192c355aade6cfbac77af0788de83ab..2f94eaeda4fa574c7f09eb5cf54a296629ffced5 100644 (file)
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -280,6 +280,7 @@ llvm_fetch_gs_outputs(struct draw_geometry_shader *shader,
     int max_prims_per_invocation = 0;
     char *output_ptr = (char*)shader->gs_output;
     int i, j, prim_idx;
+   unsigned next_prim_boundary = shader->primitive_boundary;
  
     for (i = 0; i < shader->vector_length; ++i) {
        int prims = shader->llvm_emitted_primitives[i];
@@ -290,19 +291,42 @@ llvm_fetch_gs_outputs(struct draw_geometry_shader *shader,
        total_verts += shader->llvm_emitted_vertices[i];
     }
  
-
     output_ptr += shader->emitted_vertices * shader->vertex_size;
     for (i = 0; i < shader->vector_length - 1; ++i) {
        int current_verts = shader->llvm_emitted_vertices[i];
-
-      if (current_verts != shader->max_output_vertices) {
-         memcpy(output_ptr + (vertex_count + current_verts) * shader->vertex_size,
-                output_ptr + (vertex_count + shader->max_output_vertices) * shader->vertex_size,
-                shader->vertex_size * (total_verts - vertex_count));
+      int next_verts = shader->llvm_emitted_vertices[i + 1];
+#if 0
+      int j; 
+      for (j = 0; j < current_verts; ++j) {
+         struct vertex_header *vh = (struct vertex_header *)
+            (output_ptr + shader->vertex_size * (i * next_prim_boundary + j));
+         debug_printf("--- %d) [%f, %f, %f, %f]\n", j + vertex_count,
+                      vh->data[0][0], vh->data[0][1], vh->data[0][2], vh->data[0][3]);
+         
+      }
+#endif
+      debug_assert(current_verts <= shader->max_output_vertices);
+      debug_assert(next_verts <= shader->max_output_vertices);
+      if (next_verts) {
+         memmove(output_ptr + (vertex_count + current_verts) * shader->vertex_size,
+                 output_ptr + ((i + 1) * next_prim_boundary) * shader->vertex_size,
+                 shader->vertex_size * next_verts);
        }
        vertex_count += current_verts;
     }
  
+#if 0
+   {
+      int i;
+      for (i = 0; i < total_verts; ++i) {
+         struct vertex_header *vh = (struct vertex_header *)(output_ptr + shader->vertex_size * i);
+         debug_printf("%d) [%f, %f, %f, %f]\n", i,
+                      vh->data[0][0], vh->data[0][1], vh->data[0][2], vh->data[0][3]);
+         
+      }
+   }
+#endif
+
     prim_idx = 0;
     for (i = 0; i < shader->vector_length; ++i) {
        int num_prims = shader->llvm_emitted_primitives[i];
@@ -513,10 +537,12 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader,
  
     output_verts->vertex_size = vertex_size;
     output_verts->stride = output_verts->vertex_size;
+   /* we allocate exactly one extra vertex per primitive to allow the GS to emit
+    * overflown vertices into some area where they won't harm anyone */
     output_verts->verts =
        (struct vertex_header *)MALLOC(output_verts->vertex_size *
                                       max_out_prims *
-                                     shader->max_output_vertices);
+                                     shader->primitive_boundary);
  
  #if 0
     debug_printf("%s count = %d (in prims # = %d)\n",
@@ -724,6 +750,16 @@ draw_create_geometry_shader(struct draw_context *draw,
                 TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES)
           gs->max_output_vertices = gs->info.properties[i].data[0];
     }
+   /* Primitive boundary is bigger than max_output_vertices by one, because
+    * the specification says that the geometry shader should exit if the 
+    * number of emitted vertices is bigger or equal to max_output_vertices and
+    * we can't do that because we're running in the SoA mode, which means that
+    * our storing routines will keep getting called on channels that have
+    * overflown.
+    * So we need some scratch area where we can keep writing the overflown 
+    * vertices without overwriting anything important or crashing.
+    */
+   gs->primitive_boundary = gs->max_output_vertices + 1;
  
     for (i = 0; i < gs->info.num_outputs; i++) {
        if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h

index 7c841396aa2f24c90f8ae9a39bd99b7f6b8ffca7..ca744cebfc923bf687dd84d1046eae991864c95d 100644 (file)
--- a/src/gallium/auxiliary/draw/draw_gs.h
+++ b/src/gallium/auxiliary/draw/draw_gs.h
@@ -68,6 +68,7 @@ struct draw_geometry_shader {
     unsigned position_output;
  
     unsigned max_output_vertices;
+   unsigned primitive_boundary;
     unsigned input_primitive;
     unsigned output_primitive;
  
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c

index 0e34978319891751b3fe999907b2790d0a699287..33fe40d5e7f5d7663dd58b73b2f2778c3fe4b9a6 100644 (file)
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -1287,8 +1287,8 @@ draw_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base,
     LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
                                                    lp_int_type(gs_type), 0);
     LLVMValueRef indices[LP_MAX_VECTOR_LENGTH];
-   LLVMValueRef max_output_vertices =
-      lp_build_const_int32(gallivm, variant->shader->base.max_output_vertices);
+   LLVMValueRef next_prim_offset =
+      lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary);
     LLVMValueRef io = variant->io_ptr;
     unsigned i;
     const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
@@ -1297,7 +1297,7 @@ draw_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base,
        LLVMValueRef ind = lp_build_const_int32(gallivm, i);
        LLVMValueRef currently_emitted =
           LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, "");
-      indices[i] = LLVMBuildMul(builder, ind, max_output_vertices, "");
+      indices[i] = LLVMBuildMul(builder, ind, next_prim_offset, "");
        indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, "");
     }
  
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h

index f1b1d79eab70c16507c26add292db7a3ff740e09..0fbb8aabbb02f81119db4d36ba3174f30858e33d 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -396,6 +396,7 @@ struct lp_build_tgsi_soa_context
     LLVMValueRef emitted_prims_vec_ptr;
     LLVMValueRef total_emitted_vertices_vec_ptr;
     LLVMValueRef emitted_vertices_vec_ptr;
+   LLVMValueRef max_output_vertices_vec;
  
     LLVMValueRef consts_ptr;
     const LLVMValueRef *pos;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c

index 9822f72c6a3ece0e12bea0d81cad15f5f6dbf245..28eb57b4b51397b2a435d992e606a03dcb1deef7 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -828,7 +828,6 @@ emit_fetch_gs_input(
        vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
     }
  
-
     res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
                                      vertex_index, attrib_index,
                                      swizzle_index);
@@ -2257,6 +2256,20 @@ clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
     LLVMBuildStore(builder, current_vec, ptr);
  }
  
+static LLVMValueRef
+clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
+                                  LLVMValueRef current_mask_vec,
+                                  LLVMValueRef total_emitted_vertices_vec)
+{
+   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+   struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
+   LLVMValueRef max_mask = lp_build_cmp(uint_bld, PIPE_FUNC_LESS,
+                                        total_emitted_vertices_vec,
+                                        bld->max_output_vertices_vec);
+
+   return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
+}
+
  static void
  emit_vertex(
     const struct lp_build_tgsi_action * action,
@@ -2270,6 +2283,8 @@ emit_vertex(
        LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
        LLVMValueRef total_emitted_vertices_vec =
           LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
+      masked_ones = clamp_mask_to_max_output_vertices(bld, masked_ones,
+                                                      total_emitted_vertices_vec);
        gather_outputs(bld);
        bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
                                   bld->outputs,
@@ -2812,12 +2827,29 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
     bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
  
     if (gs_iface) {
+      /* There's no specific value for this because it should always
+       * be set, but apps using ext_geometry_shader4 quite often
+       * were forgetting so we're using MAX_VERTEX_VARYING from
+       * that spec even though we could debug_assert if it's not
+       * set, but that's a lot uglier. */
+      uint max_output_vertices = 32;
+      uint i = 0;
        /* inputs are always indirect with gs */
        bld.indirect_files |= (1 << TGSI_FILE_INPUT);
        bld.gs_iface = gs_iface;
        bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
        bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
        bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
+
+      for (i = 0; i < info->num_properties; ++i) {
+         if (info->properties[i].name ==
+             TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
+            max_output_vertices = info->properties[i].data[0];
+         }
+      }
+      bld.max_output_vertices_vec =
+         lp_build_const_int_vec(gallivm, bld.bld_base.uint_bld.type,
+                                max_output_vertices);
     }
  
     lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base);
author	Zack Rusin <zackr@vmware.com>
	Wed, 17 Apr 2013 03:31:22 +0000 (20:31 -0700)
committer	Zack Rusin <zackr@vmware.com>
	Wed, 17 Apr 2013 06:38:47 +0000 (23:38 -0700)
src/gallium/auxiliary/draw/draw_gs.c		patch \| blob \| history
src/gallium/auxiliary/draw/draw_gs.h		patch \| blob \| history
src/gallium/auxiliary/draw/draw_llvm.c		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_tgsi.h		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c		patch \| blob \| history