#include "lp_bld_swizzle.h"
#include "lp_bld_flow.h"
#include "lp_bld_tgsi.h"
+#include "lp_bld_limits.h"
#include "lp_bld_debug.h"
-#define LP_MAX_TEMPS 256
-#define LP_MAX_IMMEDIATES 256
-
-
#define FOR_EACH_CHANNEL( CHAN )\
for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
#define QUAD_BOTTOM_LEFT 2
#define QUAD_BOTTOM_RIGHT 3
-#define LP_TGSI_MAX_NESTING 16
struct lp_exec_mask {
struct lp_build_context *bld;
LLVMTypeRef int_vec_type;
- LLVMValueRef cond_stack[LP_TGSI_MAX_NESTING];
+ LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
int cond_stack_size;
LLVMValueRef cond_mask;
- LLVMValueRef break_stack[LP_TGSI_MAX_NESTING];
- int break_stack_size;
- LLVMValueRef break_mask;
-
- LLVMValueRef cont_stack[LP_TGSI_MAX_NESTING];
- int cont_stack_size;
+ LLVMBasicBlockRef loop_block;
LLVMValueRef cont_mask;
-
- LLVMBasicBlockRef loop_stack[LP_TGSI_MAX_NESTING];
+ LLVMValueRef break_mask;
+ LLVMValueRef break_var;
+ struct {
+ LLVMBasicBlockRef loop_block;
+ LLVMValueRef cont_mask;
+ LLVMValueRef break_mask;
+ LLVMValueRef break_var;
+ } loop_stack[LP_MAX_TGSI_NESTING];
int loop_stack_size;
- LLVMBasicBlockRef loop_block;
-
LLVMValueRef exec_mask;
};
{
struct lp_build_context base;
+ /* Builder for integer masks and indices */
+ struct lp_build_context int_bld;
+
LLVMValueRef consts_ptr;
const LLVMValueRef *pos;
const LLVMValueRef (*inputs)[NUM_CHANNELS];
LLVMValueRef (*outputs)[NUM_CHANNELS];
- struct lp_build_sampler_soa *sampler;
+ const struct lp_build_sampler_soa *sampler;
- LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
- LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
- LLVMValueRef addr[LP_MAX_TEMPS][NUM_CHANNELS];
+ LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
+ LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
+ LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
+ LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
/* we allocate an array of temps if we have indirect
* addressing and then the temps above is unused */
mask->has_mask = FALSE;
mask->cond_stack_size = 0;
mask->loop_stack_size = 0;
- mask->break_stack_size = 0;
- mask->cont_stack_size = 0;
mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
+ mask->break_mask = mask->cont_mask = mask->cond_mask =
+ LLVMConstAllOnes(mask->int_vec_type);
}
static void lp_exec_mask_update(struct lp_exec_mask *mask)
static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
LLVMValueRef val)
{
+ assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
+ if (mask->cond_stack_size == 0) {
+ assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
+ }
mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
- mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val,
- mask->int_vec_type, "");
+ assert(LLVMTypeOf(val) == mask->int_vec_type);
+ mask->cond_mask = val;
lp_exec_mask_update(mask);
}
static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
{
- LLVMValueRef prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
- LLVMValueRef inv_mask = LLVMBuildNot(mask->bld->builder,
- mask->cond_mask, "");
-
- /* means that we didn't have any mask before and that
- * we were fully enabled */
- if (mask->cond_stack_size <= 1) {
- prev_mask = LLVMConstAllOnes(mask->int_vec_type);
+ LLVMValueRef prev_mask;
+ LLVMValueRef inv_mask;
+
+ assert(mask->cond_stack_size);
+ prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
+ if (mask->cond_stack_size == 1) {
+ assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
}
+ inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
+
mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
inv_mask,
prev_mask, "");
static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
{
+ assert(mask->cond_stack_size);
mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
lp_exec_mask_update(mask);
}
static void lp_exec_bgnloop(struct lp_exec_mask *mask)
{
+ if (mask->loop_stack_size == 0) {
+ assert(mask->loop_block == NULL);
+ assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
+ assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
+ assert(mask->break_var == NULL);
+ }
+
+ assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
- if (mask->cont_stack_size == 0)
- mask->cont_mask = LLVMConstAllOnes(mask->int_vec_type);
- if (mask->break_stack_size == 0)
- mask->break_mask = LLVMConstAllOnes(mask->int_vec_type);
- if (mask->cond_stack_size == 0)
- mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type);
+ mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
+ mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
+ mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
+ mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
+ ++mask->loop_stack_size;
+
+ mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
+ LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
- mask->break_stack[mask->break_stack_size++] = mask->break_mask;
- mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask;
- mask->loop_stack[mask->loop_stack_size++] = mask->loop_block;
mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
LLVMBuildBr(mask->bld->builder, mask->loop_block);
LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
+ mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
+
lp_exec_mask_update(mask);
}
assert(mask->break_mask);
+ /*
+ * Restore the cont_mask, but don't pop
+ */
+ assert(mask->loop_stack_size);
+ mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
+ lp_exec_mask_update(mask);
+
+ /*
+ * Unlike the continue mask, the break_mask must be preserved across loop
+ * iterations
+ */
+ LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
+
/* i1cond = (mask == 0) */
i1cond = LLVMBuildICmp(
mask->bld->builder,
LLVMIntNE,
- LLVMBuildBitCast(mask->bld->builder, mask->break_mask, reg_type, ""),
+ LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
LLVMConstNull(reg_type), "");
endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
- mask->loop_block = mask->loop_stack[--mask->loop_stack_size];
- /* pop the cont mask */
- if (mask->cont_stack_size) {
- mask->cont_mask = mask->cont_stack[--mask->cont_stack_size];
- }
- /* pop the break mask */
- if (mask->break_stack_size) {
- mask->break_mask = mask->break_stack[--mask->break_stack_size];
- }
+ assert(mask->loop_stack_size);
+ --mask->loop_stack_size;
+ mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
+ mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
+ mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
+ mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
lp_exec_mask_update(mask);
}
* (0 means don't store this bit, 1 means do store).
*/
static void lp_exec_mask_store(struct lp_exec_mask *mask,
+ LLVMValueRef pred,
LLVMValueRef val,
LLVMValueRef dst)
{
+ /* Mix the predicate and execution mask */
if (mask->has_mask) {
+ if (pred) {
+ pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
+ } else {
+ pred = mask->exec_mask;
+ }
+ }
+
+ if (pred) {
LLVMValueRef real_val, dst_val;
dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
real_val = lp_build_select(mask->bld,
- mask->exec_mask,
+ pred,
val, dst_val);
LLVMBuildStore(mask->bld->builder, real_val, dst);
static LLVMValueRef
get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
unsigned index,
- unsigned swizzle,
+ unsigned chan,
boolean is_indirect,
LLVMValueRef addr)
{
+ assert(chan < 4);
if (!bld->has_indirect_addressing) {
- return bld->temps[index][swizzle];
+ return bld->temps[index][chan];
} else {
LLVMValueRef lindex =
- LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0);
+ LLVMConstInt(LLVMInt32Type(), index * 4 + chan, 0);
if (is_indirect)
lindex = lp_build_add(&bld->base, lindex, addr);
return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
const struct tgsi_full_src_register *reg = &inst->Src[index];
unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
LLVMValueRef res;
- LLVMValueRef addr;
+ LLVMValueRef addr = NULL;
switch (swizzle) {
case TGSI_SWIZZLE_X:
}
+/**
+ * Predicate.
+ */
+static void
+emit_fetch_predicate(
+ struct lp_build_tgsi_soa_context *bld,
+ const struct tgsi_full_instruction *inst,
+ LLVMValueRef *pred)
+{
+ unsigned index;
+ unsigned char swizzles[4];
+ LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
+ LLVMValueRef value;
+ unsigned chan;
+
+ if (!inst->Instruction.Predicate) {
+ FOR_EACH_CHANNEL( chan ) {
+ pred[chan] = NULL;
+ }
+ return;
+ }
+
+ swizzles[0] = inst->Predicate.SwizzleX;
+ swizzles[1] = inst->Predicate.SwizzleY;
+ swizzles[2] = inst->Predicate.SwizzleZ;
+ swizzles[3] = inst->Predicate.SwizzleW;
+
+ index = inst->Predicate.Index;
+ assert(index < LP_MAX_TGSI_PREDS);
+
+ FOR_EACH_CHANNEL( chan ) {
+ unsigned swizzle = swizzles[chan];
+
+ /*
+ * Only fetch the predicate register channels that are actually listed
+ * in the swizzles
+ */
+ if (!unswizzled[swizzle]) {
+ value = LLVMBuildLoad(bld->base.builder,
+ bld->preds[index][swizzle], "");
+
+ /*
+ * Convert the value to an integer mask.
+ *
+ * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
+ * is needlessly causing two comparisons due to storing the intermediate
+ * result as float vector instead of an integer mask vector.
+ */
+ value = lp_build_compare(bld->base.builder,
+ bld->base.type,
+ PIPE_FUNC_NOTEQUAL,
+ value,
+ bld->base.zero);
+ if (inst->Predicate.Negate) {
+ value = LLVMBuildNot(bld->base.builder, value, "");
+ }
+
+ unswizzled[swizzle] = value;
+ } else {
+ value = unswizzled[swizzle];
+ }
+
+ pred[chan] = value;
+ }
+}
+
+
/**
* Register store.
*/
const struct tgsi_full_instruction *inst,
unsigned index,
unsigned chan_index,
+ LLVMValueRef pred,
LLVMValueRef value)
{
const struct tgsi_full_dst_register *reg = &inst->Dst[index];
- LLVMValueRef addr;
+ LLVMValueRef addr = NULL;
switch( inst->Instruction.Saturate ) {
case TGSI_SAT_NONE:
switch( reg->Register.File ) {
case TGSI_FILE_OUTPUT:
- lp_exec_mask_store(&bld->exec_mask, value,
+ lp_exec_mask_store(&bld->exec_mask, pred, value,
bld->outputs[reg->Register.Index][chan_index]);
break;
chan_index,
reg->Register.Indirect,
addr);
- lp_exec_mask_store(&bld->exec_mask, value, temp_ptr);
+ lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
break;
}
case TGSI_FILE_ADDRESS:
- lp_exec_mask_store(&bld->exec_mask, value,
+ lp_exec_mask_store(&bld->exec_mask, pred, value,
bld->addr[reg->Indirect.Index][chan_index]);
break;
case TGSI_FILE_PREDICATE:
- /* FIXME */
+ lp_exec_mask_store(&bld->exec_mask, pred, value,
+ bld->preds[index][chan_index]);
break;
default:
* High-level instruction translators.
*/
+enum tex_modifier {
+ TEX_MODIFIER_NONE = 0,
+ TEX_MODIFIER_PROJECTED,
+ TEX_MODIFIER_LOD_BIAS,
+ TEX_MODIFIER_EXPLICIT_LOD,
+ TEX_MODIFIER_EXPLICIT_DERIV
+};
static void
emit_tex( struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
- boolean apply_lodbias,
- boolean projected,
+ enum tex_modifier modifier,
LLVMValueRef *texel)
{
- const uint unit = inst->Src[1].Register.Index;
- LLVMValueRef lodbias;
+ unsigned unit;
+ LLVMValueRef lod_bias, explicit_lod;
LLVMValueRef oow = NULL;
LLVMValueRef coords[3];
+ LLVMValueRef ddx[3];
+ LLVMValueRef ddy[3];
unsigned num_coords;
unsigned i;
return;
}
- if(apply_lodbias)
- lodbias = emit_fetch( bld, inst, 0, 3 );
- else
- lodbias = bld->base.zero;
+ if (modifier == TEX_MODIFIER_LOD_BIAS) {
+ lod_bias = emit_fetch( bld, inst, 0, 3 );
+ explicit_lod = NULL;
+ }
+ else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
+ lod_bias = NULL;
+ explicit_lod = emit_fetch( bld, inst, 0, 3 );
+ }
+ else {
+ lod_bias = NULL;
+ explicit_lod = NULL;
+ }
- if (projected) {
+ if (modifier == TEX_MODIFIER_PROJECTED) {
oow = emit_fetch( bld, inst, 0, 3 );
oow = lp_build_rcp(&bld->base, oow);
}
for (i = 0; i < num_coords; i++) {
coords[i] = emit_fetch( bld, inst, 0, i );
- if (projected)
+ if (modifier == TEX_MODIFIER_PROJECTED)
coords[i] = lp_build_mul(&bld->base, coords[i], oow);
}
for (i = num_coords; i < 3; i++) {
coords[i] = bld->base.undef;
}
+ if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) {
+ for (i = 0; i < num_coords; i++) {
+ ddx[i] = emit_fetch( bld, inst, 1, i );
+ ddy[i] = emit_fetch( bld, inst, 2, i );
+ }
+ unit = inst->Src[3].Register.Index;
+ } else {
+ for (i = 0; i < num_coords; i++) {
+ ddx[i] = emit_ddx( bld, coords[i] );
+ ddy[i] = emit_ddy( bld, coords[i] );
+ }
+ unit = inst->Src[1].Register.Index;
+ }
+ for (i = num_coords; i < 3; i++) {
+ ddx[i] = bld->base.undef;
+ ddy[i] = bld->base.undef;
+ }
+
bld->sampler->emit_fetch_texel(bld->sampler,
bld->base.builder,
bld->base.type,
- unit, num_coords, coords, lodbias,
+ unit, num_coords, coords,
+ ddx, ddy,
+ lod_bias, explicit_lod,
texel);
}
for (idx = first; idx <= last; ++idx) {
switch (decl->Declaration.File) {
case TGSI_FILE_TEMPORARY:
+ assert(idx < LP_MAX_TGSI_TEMPS);
if (bld->has_indirect_addressing) {
LLVMValueRef val = LLVMConstInt(LLVMInt32Type(),
last*4 + 4, 0);
break;
case TGSI_FILE_ADDRESS:
+ assert(idx < LP_MAX_TGSI_ADDRS);
for (i = 0; i < NUM_CHANNELS; i++)
bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
vec_type, "");
break;
case TGSI_FILE_PREDICATE:
- _debug_printf("warning: predicate registers not yet implemented\n");
+ assert(idx < LP_MAX_TGSI_PREDS);
+ for (i = 0; i < NUM_CHANNELS; i++)
+ bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
+ vec_type, "");
break;
default:
*/
assert(info->num_dst <= 1);
- if(info->num_dst) {
+ if (info->num_dst) {
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
dst0[chan_index] = bld->base.undef;
}
break;
case TGSI_OPCODE_TEX:
- emit_tex( bld, inst, FALSE, FALSE, dst0 );
+ emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 );
break;
case TGSI_OPCODE_TXD:
- /* FIXME */
- return FALSE;
+ emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
break;
case TGSI_OPCODE_UP2H:
break;
case TGSI_OPCODE_TXB:
- emit_tex( bld, inst, TRUE, FALSE, dst0 );
+ emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 );
break;
case TGSI_OPCODE_NRM:
break;
case TGSI_OPCODE_TXL:
- emit_tex( bld, inst, TRUE, FALSE, dst0 );
+ emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 );
break;
case TGSI_OPCODE_TXP:
- emit_tex( bld, inst, FALSE, TRUE, dst0 );
+ emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 );
break;
case TGSI_OPCODE_BRK:
}
if(info->num_dst) {
+ LLVMValueRef pred[NUM_CHANNELS];
+
+ emit_fetch_predicate( bld, inst, pred );
+
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
+ emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
}
}
const LLVMValueRef (*inputs)[NUM_CHANNELS],
LLVMValueRef (*outputs)[NUM_CHANNELS],
struct lp_build_sampler_soa *sampler,
- struct tgsi_shader_info *info)
+ const struct tgsi_shader_info *info)
{
struct lp_build_tgsi_soa_context bld;
struct tgsi_parse_context parse;
/* Setup build context */
memset(&bld, 0, sizeof bld);
lp_build_context_init(&bld.base, builder, type);
+ lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
bld.mask = mask;
bld.pos = pos;
bld.inputs = inputs;
{
const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
assert(size <= 4);
- assert(num_immediates < LP_MAX_IMMEDIATES);
+ assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
for( i = 0; i < size; ++i )
bld.immediates[num_immediates][i] =
lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
LLVMValueRef function = LLVMGetBasicBlockParent(block);
debug_printf("11111111111111111111111111111 \n");
tgsi_dump(tokens, 0);
- LLVMDumpValue(function);
+ lp_debug_dump_value(function);
debug_printf("2222222222222222222222222222 \n");
}
tgsi_parse_free( &parse );