int cond_stack_size;
LLVMValueRef cond_mask;
- LLVMValueRef break_stack[LP_MAX_TGSI_NESTING];
- int break_stack_size;
- LLVMValueRef break_mask;
-
- LLVMValueRef cont_stack[LP_MAX_TGSI_NESTING];
- int cont_stack_size;
+ LLVMBasicBlockRef loop_block;
LLVMValueRef cont_mask;
-
- LLVMBasicBlockRef loop_stack[LP_MAX_TGSI_NESTING];
+ LLVMValueRef break_mask;
+ LLVMValueRef break_var;
+ struct {
+ LLVMBasicBlockRef loop_block;
+ LLVMValueRef cont_mask;
+ LLVMValueRef break_mask;
+ LLVMValueRef break_var;
+ } loop_stack[LP_MAX_TGSI_NESTING];
int loop_stack_size;
- LLVMBasicBlockRef loop_block;
-
LLVMValueRef exec_mask;
};
{
struct lp_build_context base;
+ /* Builder for integer masks and indices */
+ struct lp_build_context int_bld;
+
LLVMValueRef consts_ptr;
const LLVMValueRef *pos;
const LLVMValueRef (*inputs)[NUM_CHANNELS];
LLVMValueRef (*outputs)[NUM_CHANNELS];
- struct lp_build_sampler_soa *sampler;
+ const struct lp_build_sampler_soa *sampler;
LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
+ LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
/* we allocate an array of temps if we have indirect
* addressing and then the temps above is unused */
mask->has_mask = FALSE;
mask->cond_stack_size = 0;
mask->loop_stack_size = 0;
- mask->break_stack_size = 0;
- mask->cont_stack_size = 0;
mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
+ mask->break_mask = mask->cont_mask = mask->cond_mask =
+ LLVMConstAllOnes(mask->int_vec_type);
}
static void lp_exec_mask_update(struct lp_exec_mask *mask)
LLVMValueRef val)
{
assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
+ if (mask->cond_stack_size == 0) {
+ assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
+ }
mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
- mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val,
- mask->int_vec_type, "");
+ assert(LLVMTypeOf(val) == mask->int_vec_type);
+ mask->cond_mask = val;
lp_exec_mask_update(mask);
}
static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
{
- LLVMValueRef prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
- LLVMValueRef inv_mask = LLVMBuildNot(mask->bld->builder,
- mask->cond_mask, "");
-
- /* means that we didn't have any mask before and that
- * we were fully enabled */
- if (mask->cond_stack_size <= 1) {
- prev_mask = LLVMConstAllOnes(mask->int_vec_type);
+ LLVMValueRef prev_mask;
+ LLVMValueRef inv_mask;
+
+ assert(mask->cond_stack_size);
+ prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
+ if (mask->cond_stack_size == 1) {
+ assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
}
+ inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
+
mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
inv_mask,
prev_mask, "");
static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
{
+ assert(mask->cond_stack_size);
mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
lp_exec_mask_update(mask);
}
static void lp_exec_bgnloop(struct lp_exec_mask *mask)
{
+ if (mask->loop_stack_size == 0) {
+ assert(mask->loop_block == NULL);
+ assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
+ assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
+ assert(mask->break_var == NULL);
+ }
- if (mask->cont_stack_size == 0)
- mask->cont_mask = LLVMConstAllOnes(mask->int_vec_type);
- if (mask->break_stack_size == 0)
- mask->break_mask = LLVMConstAllOnes(mask->int_vec_type);
- if (mask->cond_stack_size == 0)
- mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type);
+ assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
- assert(mask->break_stack_size < LP_MAX_TGSI_NESTING);
- assert(mask->cont_stack_size < LP_MAX_TGSI_NESTING);
- assert(mask->break_stack_size < LP_MAX_TGSI_NESTING);
+ mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
+ mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
+ mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
+ mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
+ ++mask->loop_stack_size;
+
+ mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
+ LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
- mask->break_stack[mask->break_stack_size++] = mask->break_mask;
- mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask;
- mask->loop_stack[mask->loop_stack_size++] = mask->loop_block;
mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
LLVMBuildBr(mask->bld->builder, mask->loop_block);
LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
+ mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
+
lp_exec_mask_update(mask);
}
assert(mask->break_mask);
+ /*
+ * Restore the cont_mask, but don't pop
+ */
+ assert(mask->loop_stack_size);
+ mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
+ lp_exec_mask_update(mask);
+
+ /*
+ * Unlike the continue mask, the break_mask must be preserved across loop
+ * iterations
+ */
+ LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
+
/* i1cond = (mask == 0) */
i1cond = LLVMBuildICmp(
mask->bld->builder,
LLVMIntNE,
- LLVMBuildBitCast(mask->bld->builder, mask->break_mask, reg_type, ""),
+ LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
LLVMConstNull(reg_type), "");
endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
- mask->loop_block = mask->loop_stack[--mask->loop_stack_size];
- /* pop the cont mask */
- if (mask->cont_stack_size) {
- mask->cont_mask = mask->cont_stack[--mask->cont_stack_size];
- }
- /* pop the break mask */
- if (mask->break_stack_size) {
- mask->break_mask = mask->break_stack[--mask->break_stack_size];
- }
+ assert(mask->loop_stack_size);
+ --mask->loop_stack_size;
+ mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
+ mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
+ mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
+ mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
lp_exec_mask_update(mask);
}
* (0 means don't store this bit, 1 means do store).
*/
static void lp_exec_mask_store(struct lp_exec_mask *mask,
+ LLVMValueRef pred,
LLVMValueRef val,
LLVMValueRef dst)
{
+ /* Mix the predicate and execution mask */
if (mask->has_mask) {
+ if (pred) {
+ pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
+ } else {
+ pred = mask->exec_mask;
+ }
+ }
+
+ if (pred) {
LLVMValueRef real_val, dst_val;
dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
real_val = lp_build_select(mask->bld,
- mask->exec_mask,
+ pred,
val, dst_val);
LLVMBuildStore(mask->bld->builder, real_val, dst);
static LLVMValueRef
get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
unsigned index,
- unsigned swizzle,
+ unsigned chan,
boolean is_indirect,
LLVMValueRef addr)
{
+ assert(chan < 4);
if (!bld->has_indirect_addressing) {
- return bld->temps[index][swizzle];
+ return bld->temps[index][chan];
} else {
LLVMValueRef lindex =
- LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0);
+ LLVMConstInt(LLVMInt32Type(), index * 4 + chan, 0);
if (is_indirect)
lindex = lp_build_add(&bld->base, lindex, addr);
return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
const struct tgsi_full_src_register *reg = &inst->Src[index];
unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
LLVMValueRef res;
- LLVMValueRef addr;
+ LLVMValueRef addr = NULL;
switch (swizzle) {
case TGSI_SWIZZLE_X:
}
+/**
+ * Predicate.
+ */
+static void
+emit_fetch_predicate(
+ struct lp_build_tgsi_soa_context *bld,
+ const struct tgsi_full_instruction *inst,
+ LLVMValueRef *pred)
+{
+ unsigned index;
+ unsigned char swizzles[4];
+ LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
+ LLVMValueRef value;
+ unsigned chan;
+
+ if (!inst->Instruction.Predicate) {
+ FOR_EACH_CHANNEL( chan ) {
+ pred[chan] = NULL;
+ }
+ return;
+ }
+
+ swizzles[0] = inst->Predicate.SwizzleX;
+ swizzles[1] = inst->Predicate.SwizzleY;
+ swizzles[2] = inst->Predicate.SwizzleZ;
+ swizzles[3] = inst->Predicate.SwizzleW;
+
+ index = inst->Predicate.Index;
+ assert(index < LP_MAX_TGSI_PREDS);
+
+ FOR_EACH_CHANNEL( chan ) {
+ unsigned swizzle = swizzles[chan];
+
+ /*
+ * Only fetch the predicate register channels that are actually listed
+ * in the swizzles
+ */
+ if (!unswizzled[swizzle]) {
+ value = LLVMBuildLoad(bld->base.builder,
+ bld->preds[index][swizzle], "");
+
+ /*
+ * Convert the value to an integer mask.
+ *
+ * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
+ * is needlessly causing two comparisons due to storing the intermediate
+ * result as float vector instead of an integer mask vector.
+ */
+ value = lp_build_compare(bld->base.builder,
+ bld->base.type,
+ PIPE_FUNC_NOTEQUAL,
+ value,
+ bld->base.zero);
+ if (inst->Predicate.Negate) {
+ value = LLVMBuildNot(bld->base.builder, value, "");
+ }
+
+ unswizzled[swizzle] = value;
+ } else {
+ value = unswizzled[swizzle];
+ }
+
+ pred[chan] = value;
+ }
+}
+
+
/**
* Register store.
*/
const struct tgsi_full_instruction *inst,
unsigned index,
unsigned chan_index,
+ LLVMValueRef pred,
LLVMValueRef value)
{
const struct tgsi_full_dst_register *reg = &inst->Dst[index];
- LLVMValueRef addr;
+ LLVMValueRef addr = NULL;
switch( inst->Instruction.Saturate ) {
case TGSI_SAT_NONE:
switch( reg->Register.File ) {
case TGSI_FILE_OUTPUT:
- lp_exec_mask_store(&bld->exec_mask, value,
+ lp_exec_mask_store(&bld->exec_mask, pred, value,
bld->outputs[reg->Register.Index][chan_index]);
break;
chan_index,
reg->Register.Indirect,
addr);
- lp_exec_mask_store(&bld->exec_mask, value, temp_ptr);
+ lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
break;
}
case TGSI_FILE_ADDRESS:
- lp_exec_mask_store(&bld->exec_mask, value,
+ lp_exec_mask_store(&bld->exec_mask, pred, value,
bld->addr[reg->Indirect.Index][chan_index]);
break;
case TGSI_FILE_PREDICATE:
- /* FIXME */
+ lp_exec_mask_store(&bld->exec_mask, pred, value,
+ bld->preds[index][chan_index]);
break;
default:
break;
case TGSI_FILE_PREDICATE:
- _debug_printf("warning: predicate registers not yet implemented\n");
+ assert(idx < LP_MAX_TGSI_PREDS);
+ for (i = 0; i < NUM_CHANNELS; i++)
+ bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
+ vec_type, "");
break;
default:
*/
assert(info->num_dst <= 1);
- if(info->num_dst) {
+ if (info->num_dst) {
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
dst0[chan_index] = bld->base.undef;
}
}
if(info->num_dst) {
+ LLVMValueRef pred[NUM_CHANNELS];
+
+ emit_fetch_predicate( bld, inst, pred );
+
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
+ emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
}
}
const LLVMValueRef (*inputs)[NUM_CHANNELS],
LLVMValueRef (*outputs)[NUM_CHANNELS],
struct lp_build_sampler_soa *sampler,
- struct tgsi_shader_info *info)
+ const struct tgsi_shader_info *info)
{
struct lp_build_tgsi_soa_context bld;
struct tgsi_parse_context parse;
/* Setup build context */
memset(&bld, 0, sizeof bld);
lp_build_context_init(&bld.base, builder, type);
+ lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
bld.mask = mask;
bld.pos = pos;
bld.inputs = inputs;
LLVMValueRef function = LLVMGetBasicBlockParent(block);
debug_printf("11111111111111111111111111111 \n");
tgsi_dump(tokens, 0);
- LLVMDumpValue(function);
+ lp_debug_dump_value(function);
debug_printf("2222222222222222222222222222 \n");
}
tgsi_parse_free( &parse );