int max_prims_per_invocation = 0;
char *output_ptr = (char*)shader->gs_output;
int i, j, prim_idx;
+ unsigned next_prim_boundary = shader->primitive_boundary;
for (i = 0; i < shader->vector_length; ++i) {
int prims = shader->llvm_emitted_primitives[i];
total_verts += shader->llvm_emitted_vertices[i];
}
-
output_ptr += shader->emitted_vertices * shader->vertex_size;
for (i = 0; i < shader->vector_length - 1; ++i) {
int current_verts = shader->llvm_emitted_vertices[i];
-
- if (current_verts != shader->max_output_vertices) {
- memcpy(output_ptr + (vertex_count + current_verts) * shader->vertex_size,
- output_ptr + (vertex_count + shader->max_output_vertices) * shader->vertex_size,
- shader->vertex_size * (total_verts - vertex_count));
+ int next_verts = shader->llvm_emitted_vertices[i + 1];
+#if 0
+ int j;
+ for (j = 0; j < current_verts; ++j) {
+ struct vertex_header *vh = (struct vertex_header *)
+ (output_ptr + shader->vertex_size * (i * next_prim_boundary + j));
+ debug_printf("--- %d) [%f, %f, %f, %f]\n", j + vertex_count,
+ vh->data[0][0], vh->data[0][1], vh->data[0][2], vh->data[0][3]);
+
+ }
+#endif
+ debug_assert(current_verts <= shader->max_output_vertices);
+ debug_assert(next_verts <= shader->max_output_vertices);
+ if (next_verts) {
+ memmove(output_ptr + (vertex_count + current_verts) * shader->vertex_size,
+ output_ptr + ((i + 1) * next_prim_boundary) * shader->vertex_size,
+ shader->vertex_size * next_verts);
}
vertex_count += current_verts;
}
+#if 0
+ {
+ int i;
+ for (i = 0; i < total_verts; ++i) {
+ struct vertex_header *vh = (struct vertex_header *)(output_ptr + shader->vertex_size * i);
+ debug_printf("%d) [%f, %f, %f, %f]\n", i,
+ vh->data[0][0], vh->data[0][1], vh->data[0][2], vh->data[0][3]);
+
+ }
+ }
+#endif
+
prim_idx = 0;
for (i = 0; i < shader->vector_length; ++i) {
int num_prims = shader->llvm_emitted_primitives[i];
output_verts->vertex_size = vertex_size;
output_verts->stride = output_verts->vertex_size;
+ /* we allocate exactly one extra vertex per primitive to allow the GS to emit
+ * overflown vertices into some area where they won't harm anyone */
output_verts->verts =
(struct vertex_header *)MALLOC(output_verts->vertex_size *
max_out_prims *
- shader->max_output_vertices);
+ shader->primitive_boundary);
#if 0
debug_printf("%s count = %d (in prims # = %d)\n",
TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES)
gs->max_output_vertices = gs->info.properties[i].data[0];
}
+ /* Primitive boundary is bigger than max_output_vertices by one, because
+ * the specification says that the geometry shader should exit if the
+ * number of emitted vertices is bigger or equal to max_output_vertices and
+ * we can't do that because we're running in the SoA mode, which means that
+ * our storing routines will keep getting called on channels that have
+ * overflown.
+ * So we need some scratch area where we can keep writing the overflown
+ * vertices without overwriting anything important or crashing.
+ */
+ gs->primitive_boundary = gs->max_output_vertices + 1;
for (i = 0; i < gs->info.num_outputs; i++) {
if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
unsigned position_output;
unsigned max_output_vertices;
+ unsigned primitive_boundary;
unsigned input_primitive;
unsigned output_primitive;
LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
lp_int_type(gs_type), 0);
LLVMValueRef indices[LP_MAX_VECTOR_LENGTH];
- LLVMValueRef max_output_vertices =
- lp_build_const_int32(gallivm, variant->shader->base.max_output_vertices);
+ LLVMValueRef next_prim_offset =
+ lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary);
LLVMValueRef io = variant->io_ptr;
unsigned i;
const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
LLVMValueRef ind = lp_build_const_int32(gallivm, i);
LLVMValueRef currently_emitted =
LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, "");
- indices[i] = LLVMBuildMul(builder, ind, max_output_vertices, "");
+ indices[i] = LLVMBuildMul(builder, ind, next_prim_offset, "");
indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, "");
}
LLVMValueRef emitted_prims_vec_ptr;
LLVMValueRef total_emitted_vertices_vec_ptr;
LLVMValueRef emitted_vertices_vec_ptr;
+ LLVMValueRef max_output_vertices_vec;
LLVMValueRef consts_ptr;
const LLVMValueRef *pos;
vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
}
-
res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
vertex_index, attrib_index,
swizzle_index);
LLVMBuildStore(builder, current_vec, ptr);
}
+static LLVMValueRef
+clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
+ LLVMValueRef current_mask_vec,
+ LLVMValueRef total_emitted_vertices_vec)
+{
+ LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+ struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
+ LLVMValueRef max_mask = lp_build_cmp(uint_bld, PIPE_FUNC_LESS,
+ total_emitted_vertices_vec,
+ bld->max_output_vertices_vec);
+
+ return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
+}
+
static void
emit_vertex(
const struct lp_build_tgsi_action * action,
LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
LLVMValueRef total_emitted_vertices_vec =
LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
+ masked_ones = clamp_mask_to_max_output_vertices(bld, masked_ones,
+ total_emitted_vertices_vec);
gather_outputs(bld);
bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
bld->outputs,
bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
if (gs_iface) {
+ /* There's no specific value for this because it should always
+ * be set, but apps using ext_geometry_shader4 quite often
+ * were forgetting so we're using MAX_VERTEX_VARYING from
+ * that spec even though we could debug_assert if it's not
+ * set, but that's a lot uglier. */
+ uint max_output_vertices = 32;
+ uint i = 0;
/* inputs are always indirect with gs */
bld.indirect_files |= (1 << TGSI_FILE_INPUT);
bld.gs_iface = gs_iface;
bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
+
+ for (i = 0; i < info->num_properties; ++i) {
+ if (info->properties[i].name ==
+ TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
+ max_output_vertices = info->properties[i].data[0];
+ }
+ }
+ bld.max_output_vertices_vec =
+ lp_build_const_int_vec(gallivm, bld.bld_base.uint_bld.type,
+ max_output_vertices);
}
lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base);