draw/gs: make sure geometry shaders don't overflow
authorZack Rusin <zackr@vmware.com>
Wed, 17 Apr 2013 03:31:22 +0000 (20:31 -0700)
committerZack Rusin <zackr@vmware.com>
Wed, 17 Apr 2013 06:38:47 +0000 (23:38 -0700)
The specification says that the geometry shader should exit if the
number of emitted vertices is bigger or equal to max_output_vertices and
we can't do that because we're running in the SoA mode, which means that
our storing routines will keep getting called on channels that have
overflown (even though they will be masked out, but we just can't skip
them).
So we need some scratch area where we can keep writing the overflown
vertices without overwriting anything important or crashing.

Signed-off-by: Zack Rusin <zackr@vmware.com>
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
src/gallium/auxiliary/draw/draw_gs.c
src/gallium/auxiliary/draw/draw_gs.h
src/gallium/auxiliary/draw/draw_llvm.c
src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c

index 987c01a79192c355aade6cfbac77af0788de83ab..2f94eaeda4fa574c7f09eb5cf54a296629ffced5 100644 (file)
@@ -280,6 +280,7 @@ llvm_fetch_gs_outputs(struct draw_geometry_shader *shader,
    int max_prims_per_invocation = 0;
    char *output_ptr = (char*)shader->gs_output;
    int i, j, prim_idx;
+   unsigned next_prim_boundary = shader->primitive_boundary;
 
    for (i = 0; i < shader->vector_length; ++i) {
       int prims = shader->llvm_emitted_primitives[i];
@@ -290,19 +291,42 @@ llvm_fetch_gs_outputs(struct draw_geometry_shader *shader,
       total_verts += shader->llvm_emitted_vertices[i];
    }
 
-
    output_ptr += shader->emitted_vertices * shader->vertex_size;
    for (i = 0; i < shader->vector_length - 1; ++i) {
       int current_verts = shader->llvm_emitted_vertices[i];
-
-      if (current_verts != shader->max_output_vertices) {
-         memcpy(output_ptr + (vertex_count + current_verts) * shader->vertex_size,
-                output_ptr + (vertex_count + shader->max_output_vertices) * shader->vertex_size,
-                shader->vertex_size * (total_verts - vertex_count));
+      int next_verts = shader->llvm_emitted_vertices[i + 1];
+#if 0
+      int j; 
+      for (j = 0; j < current_verts; ++j) {
+         struct vertex_header *vh = (struct vertex_header *)
+            (output_ptr + shader->vertex_size * (i * next_prim_boundary + j));
+         debug_printf("--- %d) [%f, %f, %f, %f]\n", j + vertex_count,
+                      vh->data[0][0], vh->data[0][1], vh->data[0][2], vh->data[0][3]);
+         
+      }
+#endif
+      debug_assert(current_verts <= shader->max_output_vertices);
+      debug_assert(next_verts <= shader->max_output_vertices);
+      if (next_verts) {
+         memmove(output_ptr + (vertex_count + current_verts) * shader->vertex_size,
+                 output_ptr + ((i + 1) * next_prim_boundary) * shader->vertex_size,
+                 shader->vertex_size * next_verts);
       }
       vertex_count += current_verts;
    }
 
+#if 0
+   {
+      int i;
+      for (i = 0; i < total_verts; ++i) {
+         struct vertex_header *vh = (struct vertex_header *)(output_ptr + shader->vertex_size * i);
+         debug_printf("%d) [%f, %f, %f, %f]\n", i,
+                      vh->data[0][0], vh->data[0][1], vh->data[0][2], vh->data[0][3]);
+         
+      }
+   }
+#endif
+
    prim_idx = 0;
    for (i = 0; i < shader->vector_length; ++i) {
       int num_prims = shader->llvm_emitted_primitives[i];
@@ -513,10 +537,12 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader,
 
    output_verts->vertex_size = vertex_size;
    output_verts->stride = output_verts->vertex_size;
+   /* we allocate exactly one extra vertex per primitive to allow the GS to emit
+    * overflown vertices into some area where they won't harm anyone */
    output_verts->verts =
       (struct vertex_header *)MALLOC(output_verts->vertex_size *
                                      max_out_prims *
-                                     shader->max_output_vertices);
+                                     shader->primitive_boundary);
 
 #if 0
    debug_printf("%s count = %d (in prims # = %d)\n",
@@ -724,6 +750,16 @@ draw_create_geometry_shader(struct draw_context *draw,
                TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES)
          gs->max_output_vertices = gs->info.properties[i].data[0];
    }
+   /* Primitive boundary is bigger than max_output_vertices by one, because
+    * the specification says that the geometry shader should exit if the 
+    * number of emitted vertices is bigger or equal to max_output_vertices and
+    * we can't do that because we're running in the SoA mode, which means that
+    * our storing routines will keep getting called on channels that have
+    * overflown.
+    * So we need some scratch area where we can keep writing the overflown 
+    * vertices without overwriting anything important or crashing.
+    */
+   gs->primitive_boundary = gs->max_output_vertices + 1;
 
    for (i = 0; i < gs->info.num_outputs; i++) {
       if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
index 7c841396aa2f24c90f8ae9a39bd99b7f6b8ffca7..ca744cebfc923bf687dd84d1046eae991864c95d 100644 (file)
@@ -68,6 +68,7 @@ struct draw_geometry_shader {
    unsigned position_output;
 
    unsigned max_output_vertices;
+   unsigned primitive_boundary;
    unsigned input_primitive;
    unsigned output_primitive;
 
index 0e34978319891751b3fe999907b2790d0a699287..33fe40d5e7f5d7663dd58b73b2f2778c3fe4b9a6 100644 (file)
@@ -1287,8 +1287,8 @@ draw_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base,
    LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
                                                   lp_int_type(gs_type), 0);
    LLVMValueRef indices[LP_MAX_VECTOR_LENGTH];
-   LLVMValueRef max_output_vertices =
-      lp_build_const_int32(gallivm, variant->shader->base.max_output_vertices);
+   LLVMValueRef next_prim_offset =
+      lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary);
    LLVMValueRef io = variant->io_ptr;
    unsigned i;
    const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
@@ -1297,7 +1297,7 @@ draw_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base,
       LLVMValueRef ind = lp_build_const_int32(gallivm, i);
       LLVMValueRef currently_emitted =
          LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, "");
-      indices[i] = LLVMBuildMul(builder, ind, max_output_vertices, "");
+      indices[i] = LLVMBuildMul(builder, ind, next_prim_offset, "");
       indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, "");
    }
 
index f1b1d79eab70c16507c26add292db7a3ff740e09..0fbb8aabbb02f81119db4d36ba3174f30858e33d 100644 (file)
@@ -396,6 +396,7 @@ struct lp_build_tgsi_soa_context
    LLVMValueRef emitted_prims_vec_ptr;
    LLVMValueRef total_emitted_vertices_vec_ptr;
    LLVMValueRef emitted_vertices_vec_ptr;
+   LLVMValueRef max_output_vertices_vec;
 
    LLVMValueRef consts_ptr;
    const LLVMValueRef *pos;
index 9822f72c6a3ece0e12bea0d81cad15f5f6dbf245..28eb57b4b51397b2a435d992e606a03dcb1deef7 100644 (file)
@@ -828,7 +828,6 @@ emit_fetch_gs_input(
       vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
    }
 
-
    res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
                                     vertex_index, attrib_index,
                                     swizzle_index);
@@ -2257,6 +2256,20 @@ clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
    LLVMBuildStore(builder, current_vec, ptr);
 }
 
+static LLVMValueRef
+clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
+                                  LLVMValueRef current_mask_vec,
+                                  LLVMValueRef total_emitted_vertices_vec)
+{
+   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+   struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
+   LLVMValueRef max_mask = lp_build_cmp(uint_bld, PIPE_FUNC_LESS,
+                                        total_emitted_vertices_vec,
+                                        bld->max_output_vertices_vec);
+
+   return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
+}
+
 static void
 emit_vertex(
    const struct lp_build_tgsi_action * action,
@@ -2270,6 +2283,8 @@ emit_vertex(
       LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
       LLVMValueRef total_emitted_vertices_vec =
          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
+      masked_ones = clamp_mask_to_max_output_vertices(bld, masked_ones,
+                                                      total_emitted_vertices_vec);
       gather_outputs(bld);
       bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
                                  bld->outputs,
@@ -2812,12 +2827,29 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
    bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
 
    if (gs_iface) {
+      /* There's no specific value for this because it should always
+       * be set, but apps using ext_geometry_shader4 quite often
+       * were forgetting so we're using MAX_VERTEX_VARYING from
+       * that spec even though we could debug_assert if it's not
+       * set, but that's a lot uglier. */
+      uint max_output_vertices = 32;
+      uint i = 0;
       /* inputs are always indirect with gs */
       bld.indirect_files |= (1 << TGSI_FILE_INPUT);
       bld.gs_iface = gs_iface;
       bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
       bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
       bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
+
+      for (i = 0; i < info->num_properties; ++i) {
+         if (info->properties[i].name ==
+             TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
+            max_output_vertices = info->properties[i].data[0];
+         }
+      }
+      bld.max_output_vertices_vec =
+         lp_build_const_int_vec(gallivm, bld.bld_base.uint_bld.type,
+                                max_output_vertices);
    }
 
    lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base);