#include "lp_bld_sample.h"
#include "lp_bld_struct.h"
+#define DUMP_GS_EMITS 0
static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
{
mask->cond_stack_size = 0;
mask->loop_stack_size = 0;
mask->call_stack_size = 0;
+ mask->switch_stack_size = 0;
mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
- mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
+ mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
+ mask->cond_mask = mask->switch_mask =
LLVMConstAllOnes(mask->int_vec_type);
mask->loop_limiter = lp_build_alloca(bld->gallivm, int_type, "looplimiter");
} else
mask->exec_mask = mask->cond_mask;
+ if (mask->switch_stack_size) {
+ mask->exec_mask = LLVMBuildAnd(builder,
+ mask->exec_mask,
+ mask->switch_mask,
+ "switchmask");
+ }
+
if (mask->call_stack_size || mask->ret_in_main) {
mask->exec_mask = LLVMBuildAnd(builder,
mask->exec_mask,
mask->has_mask = (mask->cond_stack_size > 0 ||
mask->loop_stack_size > 0 ||
mask->call_stack_size > 0 ||
+ mask->switch_stack_size > 0 ||
mask->ret_in_main);
}
assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
+ mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size] =
+ mask->break_type;
+ mask->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
+
mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
lp_exec_mask_update(mask);
}
-static void lp_exec_break(struct lp_exec_mask *mask)
+static void lp_exec_break(struct lp_exec_mask *mask,
+ struct lp_build_tgsi_context * bld_base)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
- LLVMValueRef exec_mask = LLVMBuildNot(builder,
- mask->exec_mask,
- "break");
- mask->break_mask = LLVMBuildAnd(builder,
- mask->break_mask,
- exec_mask, "break_full");
+ if (mask->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
+ LLVMValueRef exec_mask = LLVMBuildNot(builder,
+ mask->exec_mask,
+ "break");
+
+ mask->break_mask = LLVMBuildAnd(builder,
+ mask->break_mask,
+ exec_mask, "break_full");
+ }
+ else {
+ unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
+ boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
+ opcode == TGSI_OPCODE_CASE);
+
+
+ if (mask->switch_in_default) {
+ /*
+ * stop default execution but only if this is an unconditional switch.
+ * (The condition here is not perfect since dead code after break is
+ * allowed but should be sufficient since false negatives are just
+ * unoptimized - so we don't have to pre-evaluate that).
+ */
+ if(break_always && mask->switch_pc) {
+ bld_base->pc = mask->switch_pc;
+ return;
+ }
+ }
+
+ if (break_always) {
+ mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
+ }
+ else {
+ LLVMValueRef exec_mask = LLVMBuildNot(builder,
+ mask->exec_mask,
+ "break");
+ mask->switch_mask = LLVMBuildAnd(builder,
+ mask->switch_mask,
+ exec_mask, "break_switch");
+ }
+ }
lp_exec_mask_update(mask);
}
LLVMValueRef cond_mask = LLVMBuildAnd(builder,
mask->exec_mask,
cond, "cond_mask");
- cond_mask = LLVMBuildNot(builder, cond, "break_cond");
+ cond_mask = LLVMBuildNot(builder, cond_mask, "break_cond");
- mask->break_mask = LLVMBuildAnd(builder,
- mask->break_mask,
- cond_mask, "breakc_full");
+ if (mask->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
+ mask->break_mask = LLVMBuildAnd(builder,
+ mask->break_mask,
+ cond_mask, "breakc_full");
+ }
+ else {
+ mask->switch_mask = LLVMBuildAnd(builder,
+ mask->switch_mask,
+ cond_mask, "breakc_switch");
+ }
lp_exec_mask_update(mask);
}
mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
+ mask->break_type = mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size];
lp_exec_mask_update(mask);
}
+static void lp_exec_switch(struct lp_exec_mask *mask,
+ LLVMValueRef switchval)
+{
+ mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size] =
+ mask->break_type;
+ mask->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
+
+ mask->switch_stack[mask->switch_stack_size].switch_val = mask->switch_val;
+ mask->switch_stack[mask->switch_stack_size].switch_mask = mask->switch_mask;
+ mask->switch_stack[mask->switch_stack_size].switch_mask_default = mask->switch_mask_default;
+ mask->switch_stack[mask->switch_stack_size].switch_in_default = mask->switch_in_default;
+ mask->switch_stack[mask->switch_stack_size].switch_pc = mask->switch_pc;
+ mask->switch_stack_size++;
+
+ mask->switch_val = switchval;
+ mask->switch_mask = LLVMConstNull(mask->int_vec_type);
+ mask->switch_mask_default = LLVMConstNull(mask->int_vec_type);
+ mask->switch_in_default = false;
+ mask->switch_pc = 0;
+
+ lp_exec_mask_update(mask);
+}
+
+static void lp_exec_endswitch(struct lp_exec_mask *mask,
+ struct lp_build_tgsi_context * bld_base)
+{
+ LLVMBuilderRef builder = mask->bld->gallivm->builder;
+
+ /* check if there's deferred default if so do it now */
+ if (mask->switch_pc && !mask->switch_in_default) {
+ LLVMValueRef prevmask, defaultmask;
+ unsigned tmp_pc;
+ prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
+ defaultmask = LLVMBuildNot(builder, mask->switch_mask_default, "sw_default_mask");
+ mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
+ mask->switch_in_default = true;
+
+ lp_exec_mask_update(mask);
+
+ assert(bld_base->instructions[mask->switch_pc - 1].Instruction.Opcode ==
+ TGSI_OPCODE_DEFAULT);
+
+ tmp_pc = bld_base->pc;
+ bld_base->pc = mask->switch_pc;
+ /*
+ * re-purpose switch_pc to point to here again, since we stop execution of
+ * the deferred default after next break.
+ */
+ mask->switch_pc = tmp_pc - 1;
+
+ return;
+ }
+
+ else if (mask->switch_pc && mask->switch_in_default) {
+ assert(bld_base->pc == mask->switch_pc + 1);
+ }
+
+ mask->switch_stack_size--;
+ mask->switch_val = mask->switch_stack[mask->switch_stack_size].switch_val;
+ mask->switch_mask = mask->switch_stack[mask->switch_stack_size].switch_mask;
+ mask->switch_mask_default = mask->switch_stack[mask->switch_stack_size].switch_mask_default;
+ mask->switch_in_default = mask->switch_stack[mask->switch_stack_size].switch_in_default;
+ mask->switch_pc = mask->switch_stack[mask->switch_stack_size].switch_pc;
+
+ mask->break_type = mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size];
+
+ lp_exec_mask_update(mask);
+}
+
+static void lp_exec_case(struct lp_exec_mask *mask,
+ LLVMValueRef caseval)
+{
+ LLVMBuilderRef builder = mask->bld->gallivm->builder;
+
+ LLVMValueRef casemask, prevmask;
+
+ /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
+ if (!mask->switch_in_default) {
+ prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
+ casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, mask->switch_val);
+ mask->switch_mask_default = LLVMBuildOr(builder, casemask,
+ mask->switch_mask_default, "sw_default_mask");
+ casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
+ mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
+
+ lp_exec_mask_update(mask);
+ }
+}
+
+/*
+ * Analyse default statement in a switch.
+ * \return true if default is last statement, false otherwise
+ * \param default_pc_start contains pc of instruction to jump to
+ * if default wasn't last but there's no
+ * fallthrough into default.
+ */
+static boolean default_analyse_is_last(struct lp_exec_mask *mask,
+ struct lp_build_tgsi_context * bld_base,
+ int *default_pc_start)
+{
+ unsigned pc = bld_base->pc;
+ unsigned curr_switch_stack = mask->switch_stack_size;
+
+ /* skip over case statements which are together with default */
+ while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
+ pc++;
+ }
+
+ while (pc != -1 && pc < bld_base->num_instructions) {
+ unsigned opcode = bld_base->instructions[pc].Instruction.Opcode;
+ switch (opcode) {
+ case TGSI_OPCODE_CASE:
+ if (curr_switch_stack == mask->switch_stack_size) {
+ *default_pc_start = pc - 1;
+ return false;
+ }
+ break;
+ case TGSI_OPCODE_SWITCH:
+ curr_switch_stack++;
+ break;
+ case TGSI_OPCODE_ENDSWITCH:
+ if (curr_switch_stack == mask->switch_stack_size) {
+ *default_pc_start = pc - 1;
+ return true;
+ }
+ curr_switch_stack--;
+ break;
+ }
+ pc++;
+ }
+ /* should never arrive here */
+ assert(0);
+ return true;
+}
+
+static void lp_exec_default(struct lp_exec_mask *mask,
+ struct lp_build_tgsi_context * bld_base)
+{
+ LLVMBuilderRef builder = mask->bld->gallivm->builder;
+
+ int default_exec_pc;
+ boolean default_is_last;
+
+ /*
+ * This is a messy opcode, because it may not be always at the end and
+ * there can be fallthrough in and out of it.
+ */
+
+ default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
+ /*
+ * If it is last statement in switch (note that case statements appearing
+ * "at the same time" as default don't change that) everything is just fine,
+ * update switch mask and go on. This means we can handle default with
+ * fallthrough INTO it without overhead, if it is last.
+ */
+ if (default_is_last) {
+ LLVMValueRef prevmask, defaultmask;
+ prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
+ defaultmask = LLVMBuildNot(builder, mask->switch_mask_default, "sw_default_mask");
+ defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
+ mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
+ mask->switch_in_default = true;
+
+ lp_exec_mask_update(mask);
+ }
+ else {
+ /*
+ * Technically, "case" immediately before default isn't really a
+ * fallthrough, however we still have to count them as such as we
+ * already have updated the masks.
+ * If that happens in practice could add a switch optimizer pass
+ * which just gets rid of all case statements appearing together with
+ * default (or could do switch analysis at switch start time instead).
+ */
+ unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
+ boolean ft_into = (opcode != TGSI_OPCODE_BRK ||
+ opcode != TGSI_OPCODE_SWITCH);
+ /*
+ * If it is not last statement and there was no fallthrough into it,
+ * we record the PC and continue execution at next case (again, those
+ * case encountered at the same time don't count). At endswitch
+ * time, we update switchmask, and go back executing the code we skipped
+ * until the next break (possibly re-executing some code with changed mask
+ * if there was a fallthrough out of default).
+ * Finally, if it is not last statement and there was a fallthrough into it,
+ * do the same as with the former case, except instead of skipping the code
+ * just execute it without updating the mask, then go back and re-execute.
+ */
+ mask->switch_pc = bld_base->pc;
+ if (!ft_into) {
+ bld_base->pc = default_exec_pc;
+ }
+ }
+}
+
+
/* stores val into an address pointed to by dst.
* mask->exec_mask is used to figure out which bits of val
* should be stored into the address
if (mask->cond_stack_size == 0 &&
mask->loop_stack_size == 0 &&
+ mask->switch_stack_size == 0 &&
mask->call_stack_size == 0) {
/* returning from main() */
*pc = -1;
vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
}
-
res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
+ reg->Dimension.Indirect,
vertex_index, attrib_index,
swizzle_index);
break;
}
+ /* If the destination is untyped then the source can be anything,
+ * but LLVM won't like if the types don't match so lets cast
+ * to the correct destination type as expected by LLVM. */
+ if (dtype == TGSI_TYPE_UNTYPED &&
+ !lp_check_vec_type(bld_store->type, LLVMTypeOf(value))) {
+ value = LLVMBuildBitCast(builder, value, bld_store->vec_type,
+ "src_casted");
+ }
+
switch( inst->Instruction.Saturate ) {
case TGSI_SAT_NONE:
break;
}
else {
LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
- chan_index);
+ chan_index);
lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, out_ptr);
}
break;
opcode == TGSI_OPCODE_CAL ||
opcode == TGSI_OPCODE_CALLNZ ||
opcode == TGSI_OPCODE_IF ||
- opcode == TGSI_OPCODE_IFC ||
+ opcode == TGSI_OPCODE_UIF ||
opcode == TGSI_OPCODE_BGNLOOP ||
opcode == TGSI_OPCODE_SWITCH)
return FALSE;
return one_vec;
}
+static void
+increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
+ LLVMValueRef ptr,
+ LLVMValueRef mask)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+
+ LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
+
+ current_vec = LLVMBuildAdd(builder, current_vec, mask, "");
+
+ LLVMBuildStore(builder, current_vec, ptr);
+}
+
+static void
+clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
+ LLVMValueRef ptr,
+ LLVMValueRef mask)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+
+ LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
+ LLVMValueRef full_mask = lp_build_cmp(&bld_base->uint_bld,
+ PIPE_FUNC_NOTEQUAL,
+ mask,
+ bld_base->uint_bld.zero);
+
+ current_vec = lp_build_select(&bld_base->uint_bld,
+ full_mask,
+ bld_base->uint_bld.zero,
+ current_vec);
+
+ LLVMBuildStore(builder, current_vec, ptr);
+}
+
+static LLVMValueRef
+clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
+ LLVMValueRef current_mask_vec,
+ LLVMValueRef total_emitted_vertices_vec)
+{
+ LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+ struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
+ LLVMValueRef max_mask = lp_build_cmp(uint_bld, PIPE_FUNC_LESS,
+ total_emitted_vertices_vec,
+ bld->max_output_vertices_vec);
+
+ return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
+}
+
static void
emit_vertex(
const struct lp_build_tgsi_action * action,
if (bld->gs_iface->emit_vertex) {
LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
+ LLVMValueRef total_emitted_vertices_vec =
+ LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
+ masked_ones = clamp_mask_to_max_output_vertices(bld, masked_ones,
+ total_emitted_vertices_vec);
gather_outputs(bld);
bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
bld->outputs,
- bld->total_emitted_vertices_vec);
- bld->emitted_vertices_vec =
- LLVMBuildAdd(builder, bld->emitted_vertices_vec, masked_ones, "");
- bld->total_emitted_vertices_vec =
- LLVMBuildAdd(builder, bld->total_emitted_vertices_vec, masked_ones, "");
- bld->pending_end_primitive = TRUE;
+ total_emitted_vertices_vec);
+ increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
+ masked_ones);
+ increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
+ masked_ones);
+#if DUMP_GS_EMITS
+ lp_build_print_value(bld->bld_base.base.gallivm,
+ " +++ emit vertex masked ones = ",
+ masked_ones);
+ lp_build_print_value(bld->bld_base.base.gallivm,
+ " +++ emit vertex emitted = ",
+ total_emitted_vertices_vec);
+#endif
}
}
+static void
+end_primitive_masked(struct lp_build_tgsi_context * bld_base,
+ LLVMValueRef masked_ones)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+ LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+
+ if (bld->gs_iface->end_primitive) {
+ LLVMValueRef emitted_vertices_vec =
+ LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
+ LLVMValueRef emitted_prims_vec =
+ LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
+
+ bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
+ emitted_vertices_vec,
+ emitted_prims_vec);
+
+#if DUMP_GS_EMITS
+ lp_build_print_value(bld->bld_base.base.gallivm,
+ " +++ end prim masked ones = ",
+ masked_ones);
+ lp_build_print_value(bld->bld_base.base.gallivm,
+ " +++ end prim emitted verts1 = ",
+ emitted_vertices_vec);
+ lp_build_print_value(bld->bld_base.base.gallivm,
+ " +++ end prim emitted prims1 = ",
+ LLVMBuildLoad(builder,
+ bld->emitted_prims_vec_ptr, ""));
+#endif
+ increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
+ masked_ones);
+ clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
+ masked_ones);
+#if DUMP_GS_EMITS
+ lp_build_print_value(bld->bld_base.base.gallivm,
+ " +++ end prim emitted verts2 = ",
+ LLVMBuildLoad(builder,
+ bld->emitted_vertices_vec_ptr, ""));
+#endif
+ }
+
+}
+
static void
end_primitive(
const struct lp_build_tgsi_action * action,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
if (bld->gs_iface->end_primitive) {
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
- bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
- bld->emitted_vertices_vec,
- bld->emitted_prims_vec);
- bld->emitted_prims_vec =
- LLVMBuildAdd(builder, bld->emitted_prims_vec, masked_ones, "");
- bld->emitted_vertices_vec = bld_base->uint_bld.zero;
- bld->pending_end_primitive = FALSE;
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+ LLVMValueRef emitted_verts = LLVMBuildLoad(
+ builder, bld->emitted_vertices_vec_ptr, "");
+ LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
+ emitted_verts,
+ uint_bld->zero);
+ /* We need to combine the current execution mask with the mask
+ telling us which, if any, execution slots actually have
+ unemitted primitives, this way we make sure that end_primitives
+ executes only on the paths that have unflushed vertices */
+ masked_ones = LLVMBuildAnd(builder, masked_ones, emitted_mask, "");
+
+ end_primitive_masked(bld_base, masked_ones);
}
}
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- lp_exec_break(&bld->exec_mask);
+ lp_exec_break(&bld->exec_mask, bld_base);
}
static void
lp_exec_mask_cond_push(&bld->exec_mask, tmp);
}
+static void
+uif_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMValueRef tmp;
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+
+ tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
+ emit_data->args[0], uint_bld->zero);
+ lp_exec_mask_cond_push(&bld->exec_mask, tmp);
+}
+
+static void
+case_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+ lp_exec_case(&bld->exec_mask, emit_data->args[0]);
+}
+
+static void
+default_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+ lp_exec_default(&bld->exec_mask, bld_base);
+}
+
+static void
+switch_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+ lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
+}
+
+static void
+endswitch_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+ lp_exec_endswitch(&bld->exec_mask, bld_base);
+}
+
static void
bgnloop_emit(
const struct lp_build_tgsi_action * action,
if (bld->gs_iface) {
struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
- bld->emitted_prims_vec = uint_bld->zero;
- bld->emitted_vertices_vec = uint_bld->zero;
- bld->total_emitted_vertices_vec = uint_bld->zero;
+ bld->emitted_prims_vec_ptr =
+ lp_build_alloca(gallivm,
+ uint_bld->vec_type,
+ "emitted_prims_ptr");
+ bld->emitted_vertices_vec_ptr =
+ lp_build_alloca(gallivm,
+ uint_bld->vec_type,
+ "emitted_vertices_ptr");
+ bld->total_emitted_vertices_vec_ptr =
+ lp_build_alloca(gallivm,
+ uint_bld->vec_type,
+ "total_emitted_vertices_ptr");
+
+ LLVMBuildStore(gallivm->builder, uint_bld->zero,
+ bld->emitted_prims_vec_ptr);
+ LLVMBuildStore(gallivm->builder, uint_bld->zero,
+ bld->emitted_vertices_vec_ptr);
+ LLVMBuildStore(gallivm->builder, uint_bld->zero,
+ bld->total_emitted_vertices_vec_ptr);
}
}
static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
if (0) {
/* for debugging */
/* If we have indirect addressing in outputs we need to copy our alloca array
* to the outputs slots specified by the caller */
if (bld->gs_iface) {
- /* flush the accumulated vertices as a primitive */
- if (bld->pending_end_primitive) {
- end_primitive(NULL, bld_base, NULL);
- bld->pending_end_primitive = FALSE;
- }
+ LLVMValueRef total_emitted_vertices_vec;
+ LLVMValueRef emitted_prims_vec;
+ /* implicit end_primitives, needed in case there are any unflushed
+ vertices in the cache */
+ end_primitive(NULL, bld_base, NULL);
+
+ total_emitted_vertices_vec =
+ LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
+ emitted_prims_vec =
+ LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
bld->gs_iface->gs_epilogue(bld->gs_iface,
&bld->bld_base,
- bld->total_emitted_vertices_vec,
- bld->emitted_prims_vec);
+ total_emitted_vertices_vec,
+ emitted_prims_vec);
} else {
gather_outputs(bld);
}
struct lp_build_mask_context *mask,
LLVMValueRef consts_ptr,
const struct lp_bld_tgsi_system_values *system_values,
- const LLVMValueRef *pos,
const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
struct lp_build_sampler_soa *sampler,
lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
bld.mask = mask;
- bld.pos = pos;
bld.inputs = inputs;
bld.outputs = outputs;
bld.consts_ptr = consts_ptr;
bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
bld.bld_base.op_actions[TGSI_OPCODE_BREAKC].emit = breakc_emit;
bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
bld.bld_base.op_actions[TGSI_OPCODE_KIL].emit = kil_emit;
bld.bld_base.op_actions[TGSI_OPCODE_KILP].emit = kilp_emit;
bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit;
bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit;
bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
if (gs_iface) {
+ /* There's no specific value for this because it should always
+ * be set, but apps using ext_geometry_shader4 quite often
+ * were forgetting so we're using MAX_VERTEX_VARYING from
+ * that spec even though we could debug_assert if it's not
+ * set, but that's a lot uglier. */
+ uint max_output_vertices = 32;
+ uint i = 0;
/* inputs are always indirect with gs */
bld.indirect_files |= (1 << TGSI_FILE_INPUT);
bld.gs_iface = gs_iface;
- bld.pending_end_primitive = FALSE;
bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
+
+ for (i = 0; i < info->num_properties; ++i) {
+ if (info->properties[i].name ==
+ TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
+ max_output_vertices = info->properties[i].data[0];
+ }
+ }
+ bld.max_output_vertices_vec =
+ lp_build_const_int_vec(gallivm, bld.bld_base.uint_bld.type,
+ max_output_vertices);
}
- lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base);
+ lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
bld.system_values = *system_values;