X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Fgallivm%2Flp_bld_tgsi_soa.c;h=7f0f058c2225d75dd53fca86bec148f486f690e6;hp=53a5ce0cd877b609869adfe74ad8e67f6e0f5c7a;hb=b2ddb93ff3b8c88682634ccdef247967e31fab84;hpb=18a4a83ddab7655253fdb71d37393a32adcda488 diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 53a5ce0cd87..7f0f058c222 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -45,19 +45,20 @@ #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" -#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_scan.h" #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_arit.h" +#include "lp_bld_bitarit.h" +#include "lp_bld_gather.h" #include "lp_bld_logic.h" #include "lp_bld_swizzle.h" #include "lp_bld_flow.h" +#include "lp_bld_quad.h" #include "lp_bld_tgsi.h" +#include "lp_bld_limits.h" #include "lp_bld_debug.h" - - -#define LP_MAX_TEMPS 256 -#define LP_MAX_IMMEDIATES 256 +#include "lp_bld_printf.h" #define FOR_EACH_CHANNEL( CHAN )\ @@ -77,13 +78,10 @@ #define CHAN_Y 1 #define CHAN_Z 2 #define CHAN_W 3 +#define NUM_CHANNELS 4 -#define QUAD_TOP_LEFT 0 -#define QUAD_TOP_RIGHT 1 -#define QUAD_BOTTOM_LEFT 2 -#define QUAD_BOTTOM_RIGHT 3 +#define LP_MAX_INSTRUCTIONS 256 -#define LP_TGSI_MAX_NESTING 16 struct lp_exec_mask { struct lp_build_context *bld; @@ -92,22 +90,28 @@ struct lp_exec_mask { LLVMTypeRef int_vec_type; - LLVMValueRef cond_stack[LP_TGSI_MAX_NESTING]; + LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING]; int cond_stack_size; LLVMValueRef cond_mask; - LLVMValueRef break_stack[LP_TGSI_MAX_NESTING]; - int break_stack_size; - LLVMValueRef break_mask; - - LLVMValueRef cont_stack[LP_TGSI_MAX_NESTING]; - int cont_stack_size; + LLVMBasicBlockRef loop_block; LLVMValueRef cont_mask; - - LLVMBasicBlockRef loop_stack[LP_TGSI_MAX_NESTING]; + LLVMValueRef break_mask; + LLVMValueRef break_var; + struct { + LLVMBasicBlockRef loop_block; + LLVMValueRef cont_mask; + LLVMValueRef break_mask; + LLVMValueRef break_var; + } loop_stack[LP_MAX_TGSI_NESTING]; int loop_stack_size; - LLVMBasicBlockRef loop_block; + LLVMValueRef ret_mask; + struct { + int pc; + LLVMValueRef ret_mask; + } call_stack[LP_MAX_TGSI_NESTING]; + int call_stack_size; LLVMValueRef exec_mask; }; @@ -116,42 +120,51 @@ struct lp_build_tgsi_soa_context { struct lp_build_context base; + /* Builder for vector integer masks and indices */ + struct lp_build_context uint_bld; + + /* Builder for scalar elements of shader's data type (float) */ + struct lp_build_context elem_bld; + LLVMValueRef consts_ptr; const LLVMValueRef *pos; const LLVMValueRef (*inputs)[NUM_CHANNELS]; LLVMValueRef (*outputs)[NUM_CHANNELS]; - struct lp_build_sampler_soa *sampler; + const struct lp_build_sampler_soa *sampler; - LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS]; - LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS]; + LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS]; + LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS]; + LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS]; + LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS]; - struct lp_build_mask_context *mask; - struct lp_exec_mask exec_mask; -}; + /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is + * set in the indirect_files field. + * The temps[] array above is unused then. + */ + LLVMValueRef temps_array; -static const unsigned char -swizzle_left[4] = { - QUAD_TOP_LEFT, QUAD_TOP_LEFT, - QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT -}; + /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is + * set in the indirect_files field. + * The outputs[] array above is unused then. + */ + LLVMValueRef outputs_array; -static const unsigned char -swizzle_right[4] = { - QUAD_TOP_RIGHT, QUAD_TOP_RIGHT, - QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT -}; + /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is + * set in the indirect_files field. + * The inputs[] array above is unused then. + */ + LLVMValueRef inputs_array; -static const unsigned char -swizzle_top[4] = { - QUAD_TOP_LEFT, QUAD_TOP_RIGHT, - QUAD_TOP_LEFT, QUAD_TOP_RIGHT -}; + const struct tgsi_shader_info *info; + /** bitmask indicating which register files are accessed indirectly */ + unsigned indirect_files; -static const unsigned char -swizzle_bottom[4] = { - QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT, - QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT + struct lp_build_mask_context *mask; + struct lp_exec_mask exec_mask; + + struct tgsi_full_instruction *instructions; + uint max_instructions; }; static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) @@ -160,10 +173,11 @@ static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context mask->has_mask = FALSE; mask->cond_stack_size = 0; mask->loop_stack_size = 0; - mask->break_stack_size = 0; - mask->cont_stack_size = 0; + mask->call_stack_size = 0; mask->int_vec_type = lp_build_int_vec_type(mask->bld->type); + mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask = + LLVMConstAllOnes(mask->int_vec_type); } static void lp_exec_mask_update(struct lp_exec_mask *mask) @@ -183,33 +197,47 @@ static void lp_exec_mask_update(struct lp_exec_mask *mask) } else mask->exec_mask = mask->cond_mask; + if (mask->call_stack_size) { + mask->exec_mask = LLVMBuildAnd(mask->bld->builder, + mask->exec_mask, + mask->ret_mask, + "callmask"); + } mask->has_mask = (mask->cond_stack_size > 0 || - mask->loop_stack_size > 0); + mask->loop_stack_size > 0 || + mask->call_stack_size > 0); } static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, LLVMValueRef val) { + assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING); + if (mask->cond_stack_size == 0) { + assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type)); + } mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; - mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val, - mask->int_vec_type, ""); - + assert(LLVMTypeOf(val) == mask->int_vec_type); + mask->cond_mask = LLVMBuildAnd(mask->bld->builder, + mask->cond_mask, + val, + ""); lp_exec_mask_update(mask); } static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) { - LLVMValueRef prev_mask = mask->cond_stack[mask->cond_stack_size - 1]; - LLVMValueRef inv_mask = LLVMBuildNot(mask->bld->builder, - mask->cond_mask, ""); - - /* means that we didn't have any mask before and that - * we were fully enabled */ - if (mask->cond_stack_size <= 1) { - prev_mask = LLVMConstAllOnes(mask->int_vec_type); + LLVMValueRef prev_mask; + LLVMValueRef inv_mask; + + assert(mask->cond_stack_size); + prev_mask = mask->cond_stack[mask->cond_stack_size - 1]; + if (mask->cond_stack_size == 1) { + assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type)); } + inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, ""); + mask->cond_mask = LLVMBuildAnd(mask->bld->builder, inv_mask, prev_mask, ""); @@ -218,27 +246,37 @@ static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) { + assert(mask->cond_stack_size); mask->cond_mask = mask->cond_stack[--mask->cond_stack_size]; lp_exec_mask_update(mask); } static void lp_exec_bgnloop(struct lp_exec_mask *mask) { + if (mask->loop_stack_size == 0) { + assert(mask->loop_block == NULL); + assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type)); + assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type)); + assert(mask->break_var == NULL); + } + + assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING); - if (mask->cont_stack_size == 0) - mask->cont_mask = LLVMConstAllOnes(mask->int_vec_type); - if (mask->break_stack_size == 0) - mask->break_mask = LLVMConstAllOnes(mask->int_vec_type); - if (mask->cond_stack_size == 0) - mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type); + mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block; + mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask; + mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask; + mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var; + ++mask->loop_stack_size; + + mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, ""); + LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var); - mask->break_stack[mask->break_stack_size++] = mask->break_mask; - mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask; - mask->loop_stack[mask->loop_stack_size++] = mask->loop_block; mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop"); LLVMBuildBr(mask->bld->builder, mask->loop_block); LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block); + mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, ""); + lp_exec_mask_update(mask); } @@ -248,16 +286,9 @@ static void lp_exec_break(struct lp_exec_mask *mask) mask->exec_mask, "break"); - /* mask->break_stack_size > 1 implies that we encountered a break - * statemant already and if that's the case we want to make sure - * our mask is a combination of the previous break and the current - * execution mask */ - if (mask->break_stack_size > 1) { - mask->break_mask = LLVMBuildAnd(mask->bld->builder, - mask->break_mask, - exec_mask, "break_full"); - } else - mask->break_mask = exec_mask; + mask->break_mask = LLVMBuildAnd(mask->bld->builder, + mask->break_mask, + exec_mask, "break_full"); lp_exec_mask_update(mask); } @@ -268,12 +299,9 @@ static void lp_exec_continue(struct lp_exec_mask *mask) mask->exec_mask, ""); - if (mask->cont_stack_size > 1) { - mask->cont_mask = LLVMBuildAnd(mask->bld->builder, - mask->cont_mask, - exec_mask, ""); - } else - mask->cont_mask = exec_mask; + mask->cont_mask = LLVMBuildAnd(mask->bld->builder, + mask->cont_mask, + exec_mask, ""); lp_exec_mask_update(mask); } @@ -288,11 +316,24 @@ static void lp_exec_endloop(struct lp_exec_mask *mask) assert(mask->break_mask); + /* + * Restore the cont_mask, but don't pop + */ + assert(mask->loop_stack_size); + mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask; + lp_exec_mask_update(mask); + + /* + * Unlike the continue mask, the break_mask must be preserved across loop + * iterations + */ + LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var); + /* i1cond = (mask == 0) */ i1cond = LLVMBuildICmp( mask->bld->builder, LLVMIntNE, - LLVMBuildBitCast(mask->bld->builder, mask->break_mask, reg_type, ""), + LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""), LLVMConstNull(reg_type), ""); endloop = lp_build_insert_new_block(mask->bld->builder, "endloop"); @@ -302,15 +343,12 @@ static void lp_exec_endloop(struct lp_exec_mask *mask) LLVMPositionBuilderAtEnd(mask->bld->builder, endloop); - mask->loop_block = mask->loop_stack[--mask->loop_stack_size]; - /* pop the cont mask */ - if (mask->cont_stack_size) { - mask->cont_mask = mask->cont_stack[--mask->cont_stack_size]; - } - /* pop the break mask */ - if (mask->break_stack_size) { - mask->break_mask = mask->break_stack[--mask->break_stack_size]; - } + assert(mask->loop_stack_size); + --mask->loop_stack_size; + mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block; + mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask; + mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask; + mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var; lp_exec_mask_update(mask); } @@ -321,15 +359,25 @@ static void lp_exec_endloop(struct lp_exec_mask *mask) * (0 means don't store this bit, 1 means do store). */ static void lp_exec_mask_store(struct lp_exec_mask *mask, + LLVMValueRef pred, LLVMValueRef val, LLVMValueRef dst) { + /* Mix the predicate and execution mask */ if (mask->has_mask) { + if (pred) { + pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, ""); + } else { + pred = mask->exec_mask; + } + } + + if (pred) { LLVMValueRef real_val, dst_val; dst_val = LLVMBuildLoad(mask->bld->builder, dst, ""); real_val = lp_build_select(mask->bld, - mask->exec_mask, + pred, val, dst_val); LLVMBuildStore(mask->bld->builder, real_val, dst); @@ -337,24 +385,218 @@ static void lp_exec_mask_store(struct lp_exec_mask *mask, LLVMBuildStore(mask->bld->builder, val, dst); } +static void lp_exec_mask_call(struct lp_exec_mask *mask, + int func, + int *pc) +{ + assert(mask->call_stack_size < LP_MAX_TGSI_NESTING); + mask->call_stack[mask->call_stack_size].pc = *pc; + mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask; + mask->call_stack_size++; + *pc = func; +} + +static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) +{ + LLVMValueRef exec_mask; + + if (mask->call_stack_size == 0) { + /* returning from main() */ + *pc = -1; + return; + } + exec_mask = LLVMBuildNot(mask->bld->builder, + mask->exec_mask, + "ret"); + + mask->ret_mask = LLVMBuildAnd(mask->bld->builder, + mask->ret_mask, + exec_mask, "ret_full"); + lp_exec_mask_update(mask); +} + +static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask) +{ +} + +static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) +{ + assert(mask->call_stack_size); + mask->call_stack_size--; + *pc = mask->call_stack[mask->call_stack_size].pc; + mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask; + lp_exec_mask_update(mask); +} + + +/** + * Return pointer to a temporary register channel (src or dest). + * Note that indirect addressing cannot be handled here. + * \param index which temporary register + * \param chan which channel of the temp register. + */ static LLVMValueRef -emit_ddx(struct lp_build_tgsi_soa_context *bld, - LLVMValueRef src) +get_temp_ptr(struct lp_build_tgsi_soa_context *bld, + unsigned index, + unsigned chan) { - LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left); - LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right); - return lp_build_sub(&bld->base, src_right, src_left); + assert(chan < 4); + if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { + LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan); + return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, ""); + } + else { + return bld->temps[index][chan]; + } } +/** + * Return pointer to a output register channel (src or dest). + * Note that indirect addressing cannot be handled here. + * \param index which output register + * \param chan which channel of the output register. + */ +static LLVMValueRef +get_output_ptr(struct lp_build_tgsi_soa_context *bld, + unsigned index, + unsigned chan) +{ + assert(chan < 4); + if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { + LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan); + return LLVMBuildGEP(bld->base.builder, bld->outputs_array, &lindex, 1, ""); + } + else { + return bld->outputs[index][chan]; + } +} +/** + * Gather vector. + * XXX the lp_build_gather() function should be capable of doing this + * with a little work. + */ static LLVMValueRef -emit_ddy(struct lp_build_tgsi_soa_context *bld, - LLVMValueRef src) +build_gather(struct lp_build_tgsi_soa_context *bld, + LLVMValueRef base_ptr, + LLVMValueRef indexes) { - LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top); - LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom); - return lp_build_sub(&bld->base, src_top, src_bottom); + LLVMValueRef res = bld->base.undef; + unsigned i; + + /* + * Loop over elements of index_vec, load scalar value, insert it into 'res'. + */ + for (i = 0; i < bld->base.type.length; i++) { + LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder, + indexes, ii, ""); + LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr, + &index, 1, "gather_ptr"); + LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); + + res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, ""); + } + + return res; +} + + +/** + * Scatter/store vector. + */ +static void +emit_mask_scatter(struct lp_build_tgsi_soa_context *bld, + LLVMValueRef base_ptr, + LLVMValueRef indexes, + LLVMValueRef values, + struct lp_exec_mask *mask, + LLVMValueRef pred) +{ + LLVMBuilderRef builder = bld->base.builder; + unsigned i; + + /* Mix the predicate and execution mask */ + if (mask->has_mask) { + if (pred) { + pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, ""); + } + else { + pred = mask->exec_mask; + } + } + + /* + * Loop over elements of index_vec, store scalar value. + */ + for (i = 0; i < bld->base.type.length; i++) { + LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); + LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr"); + LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val"); + LLVMValueRef scalar_pred = pred ? + LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL; + + if (0) + lp_build_printf(builder, "scatter %d: val %f at %d %p\n", + ii, val, index, scalar_ptr); + + if (scalar_pred) { + LLVMValueRef real_val, dst_val; + dst_val = LLVMBuildLoad(builder, scalar_ptr, ""); + real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val); + LLVMBuildStore(builder, real_val, scalar_ptr); + } + else { + LLVMBuildStore(builder, val, scalar_ptr); + } + } +} + + +/** + * Read the current value of the ADDR register, convert the floats to + * ints, add the base index and return the vector of offsets. + * The offsets will be used to index into the constant buffer or + * temporary register file. + */ +static LLVMValueRef +get_indirect_index(struct lp_build_tgsi_soa_context *bld, + unsigned reg_file, unsigned reg_index, + const struct tgsi_src_register *indirect_reg) +{ + struct lp_build_context *uint_bld = &bld->uint_bld; + /* always use X component of address register */ + unsigned swizzle = indirect_reg->SwizzleX; + LLVMValueRef base; + LLVMValueRef rel; + LLVMValueRef max_index; + LLVMValueRef index; + + assert(bld->indirect_files & (1 << reg_file)); + + base = lp_build_const_int_vec(uint_bld->type, reg_index); + + assert(swizzle < 4); + rel = LLVMBuildLoad(bld->base.builder, + bld->addr[indirect_reg->Index][swizzle], + "load addr reg"); + + /* for indexing we want integers */ + rel = LLVMBuildFPToSI(bld->base.builder, + rel, + uint_bld->vec_type, ""); + + index = lp_build_add(uint_bld, base, rel); + + max_index = lp_build_const_int_vec(uint_bld->type, + bld->info->file_max[reg_file]); + + assert(!uint_bld->type.sign); + index = lp_build_min(uint_bld, index, max_index); + + return index; } @@ -365,52 +607,133 @@ static LLVMValueRef emit_fetch( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_instruction *inst, - unsigned index, + unsigned src_op, const unsigned chan_index ) { - const struct tgsi_full_src_register *reg = &inst->Src[index]; - unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); + struct lp_build_context *uint_bld = &bld->uint_bld; + const struct tgsi_full_src_register *reg = &inst->Src[src_op]; + const unsigned swizzle = + tgsi_util_get_full_src_register_swizzle(reg, chan_index); LLVMValueRef res; + LLVMValueRef indirect_index = NULL; - switch (swizzle) { - case TGSI_SWIZZLE_X: - case TGSI_SWIZZLE_Y: - case TGSI_SWIZZLE_Z: - case TGSI_SWIZZLE_W: - - switch (reg->Register.File) { - case TGSI_FILE_CONSTANT: { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0); - LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, ""); - LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); - res = lp_build_broadcast_scalar(&bld->base, scalar); - break; + if (swizzle > 3) { + assert(0 && "invalid swizzle in emit_fetch()"); + return bld->base.undef; + } + + if (reg->Register.Indirect) { + indirect_index = get_indirect_index(bld, + reg->Register.File, + reg->Register.Index, + ®->Indirect); + } else { + assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]); + } + + switch (reg->Register.File) { + case TGSI_FILE_CONSTANT: + if (reg->Register.Indirect) { + LLVMValueRef swizzle_vec = + lp_build_const_int_vec(uint_bld->type, swizzle); + LLVMValueRef index_vec; /* index into the const buffer */ + + /* index_vec = indirect_index * 4 + swizzle */ + index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); + index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); + + /* Gather values from the constant buffer */ + res = build_gather(bld, bld->consts_ptr, index_vec); } + else { + LLVMValueRef index; /* index into the const buffer */ + LLVMValueRef scalar, scalar_ptr; - case TGSI_FILE_IMMEDIATE: - res = bld->immediates[reg->Register.Index][swizzle]; - assert(res); - break; + index = lp_build_const_int32(reg->Register.Index*4 + swizzle); - case TGSI_FILE_INPUT: - res = bld->inputs[reg->Register.Index][swizzle]; - assert(res); - break; + scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, + &index, 1, ""); + scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); - case TGSI_FILE_TEMPORARY: - res = LLVMBuildLoad(bld->base.builder, bld->temps[reg->Register.Index][swizzle], ""); - if(!res) - return bld->base.undef; - break; + res = lp_build_broadcast_scalar(&bld->base, scalar); + } + break; - default: - assert( 0 ); - return bld->base.undef; + case TGSI_FILE_IMMEDIATE: + res = bld->immediates[reg->Register.Index][swizzle]; + assert(res); + break; + + case TGSI_FILE_INPUT: + if (reg->Register.Indirect) { + LLVMValueRef swizzle_vec = + lp_build_const_int_vec(uint_bld->type, swizzle); + LLVMValueRef length_vec = + lp_build_const_int_vec(uint_bld->type, bld->base.type.length); + LLVMValueRef index_vec; /* index into the const buffer */ + LLVMValueRef inputs_array; + LLVMTypeRef float4_ptr_type; + + /* index_vec = (indirect_index * 4 + swizzle) * length */ + index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); + index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); + index_vec = lp_build_mul(uint_bld, index_vec, length_vec); + + /* cast inputs_array pointer to float* */ + float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0); + inputs_array = LLVMBuildBitCast(uint_bld->builder, bld->inputs_array, + float4_ptr_type, ""); + + /* Gather values from the temporary register array */ + res = build_gather(bld, inputs_array, index_vec); + } else { + if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { + LLVMValueRef lindex = lp_build_const_int32(reg->Register.Index * 4 + swizzle); + LLVMValueRef input_ptr = LLVMBuildGEP(bld->base.builder, + bld->inputs_array, &lindex, 1, ""); + res = LLVMBuildLoad(bld->base.builder, input_ptr, ""); + } + else { + res = bld->inputs[reg->Register.Index][swizzle]; + } + } + assert(res); + break; + + case TGSI_FILE_TEMPORARY: + if (reg->Register.Indirect) { + LLVMValueRef swizzle_vec = + lp_build_const_int_vec(uint_bld->type, swizzle); + LLVMValueRef length_vec = + lp_build_const_int_vec(uint_bld->type, bld->base.type.length); + LLVMValueRef index_vec; /* index into the const buffer */ + LLVMValueRef temps_array; + LLVMTypeRef float4_ptr_type; + + /* index_vec = (indirect_index * 4 + swizzle) * length */ + index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); + index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); + index_vec = lp_build_mul(uint_bld, index_vec, length_vec); + + /* cast temps_array pointer to float* */ + float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0); + temps_array = LLVMBuildBitCast(uint_bld->builder, bld->temps_array, + float4_ptr_type, ""); + + /* Gather values from the temporary register array */ + res = build_gather(bld, temps_array, index_vec); + } + else { + LLVMValueRef temp_ptr; + temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle); + res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); + if (!res) + return bld->base.undef; } break; default: - assert( 0 ); + assert(0 && "invalid src register in emit_fetch()"); return bld->base.undef; } @@ -420,13 +743,10 @@ emit_fetch( break; case TGSI_UTIL_SIGN_SET: - /* TODO: Use bitwese OR for floating point */ res = lp_build_abs( &bld->base, res ); - res = LLVMBuildNeg( bld->base.builder, res, "" ); - break; - + /* fall through */ case TGSI_UTIL_SIGN_TOGGLE: - res = LLVMBuildNeg( bld->base.builder, res, "" ); + res = lp_build_negate( &bld->base, res ); break; case TGSI_UTIL_SIGN_KEEP: @@ -460,10 +780,77 @@ emit_fetch_deriv( /* TODO: use interpolation coeffs for inputs */ if(ddx) - *ddx = emit_ddx(bld, src); + *ddx = lp_build_ddx(&bld->base, src); if(ddy) - *ddy = emit_ddy(bld, src); + *ddy = lp_build_ddy(&bld->base, src); +} + + +/** + * Predicate. + */ +static void +emit_fetch_predicate( + struct lp_build_tgsi_soa_context *bld, + const struct tgsi_full_instruction *inst, + LLVMValueRef *pred) +{ + unsigned index; + unsigned char swizzles[4]; + LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL}; + LLVMValueRef value; + unsigned chan; + + if (!inst->Instruction.Predicate) { + FOR_EACH_CHANNEL( chan ) { + pred[chan] = NULL; + } + return; + } + + swizzles[0] = inst->Predicate.SwizzleX; + swizzles[1] = inst->Predicate.SwizzleY; + swizzles[2] = inst->Predicate.SwizzleZ; + swizzles[3] = inst->Predicate.SwizzleW; + + index = inst->Predicate.Index; + assert(index < LP_MAX_TGSI_PREDS); + + FOR_EACH_CHANNEL( chan ) { + unsigned swizzle = swizzles[chan]; + + /* + * Only fetch the predicate register channels that are actually listed + * in the swizzles + */ + if (!unswizzled[swizzle]) { + value = LLVMBuildLoad(bld->base.builder, + bld->preds[index][swizzle], ""); + + /* + * Convert the value to an integer mask. + * + * TODO: Short-circuit this comparison -- a D3D setp_xx instructions + * is needlessly causing two comparisons due to storing the intermediate + * result as float vector instead of an integer mask vector. + */ + value = lp_build_compare(bld->base.builder, + bld->base.type, + PIPE_FUNC_NOTEQUAL, + value, + bld->base.zero); + if (inst->Predicate.Negate) { + value = LLVMBuildNot(bld->base.builder, value, ""); + } + + unswizzled[swizzle] = value; + } else { + value = unswizzled[swizzle]; + } + + pred[chan] = value; + } } @@ -476,9 +863,12 @@ emit_store( const struct tgsi_full_instruction *inst, unsigned index, unsigned chan_index, + LLVMValueRef pred, LLVMValueRef value) { const struct tgsi_full_dst_register *reg = &inst->Dst[index]; + struct lp_build_context *uint_bld = &bld->uint_bld; + LLVMValueRef indirect_index = NULL; switch( inst->Instruction.Saturate ) { case TGSI_SAT_NONE: @@ -498,25 +888,108 @@ emit_store( assert(0); } + if (reg->Register.Indirect) { + indirect_index = get_indirect_index(bld, + reg->Register.File, + reg->Register.Index, + ®->Indirect); + } else { + assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]); + } + switch( reg->Register.File ) { case TGSI_FILE_OUTPUT: - lp_exec_mask_store(&bld->exec_mask, value, - bld->outputs[reg->Register.Index][chan_index]); + if (reg->Register.Indirect) { + LLVMBuilderRef builder = bld->base.builder; + LLVMValueRef chan_vec = + lp_build_const_int_vec(uint_bld->type, chan_index); + LLVMValueRef length_vec = + lp_build_const_int_vec(uint_bld->type, bld->base.type.length); + LLVMValueRef index_vec; /* indexes into the temp registers */ + LLVMValueRef outputs_array; + LLVMValueRef pixel_offsets; + LLVMTypeRef float_ptr_type; + int i; + + /* build pixel offset vector: {0, 1, 2, 3, ...} */ + pixel_offsets = uint_bld->undef; + for (i = 0; i < bld->base.type.length; i++) { + LLVMValueRef ii = lp_build_const_int32(i); + pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, + ii, ii, ""); + } + + /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ + index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); + index_vec = lp_build_add(uint_bld, index_vec, chan_vec); + index_vec = lp_build_mul(uint_bld, index_vec, length_vec); + index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); + + float_ptr_type = LLVMPointerType(LLVMFloatType(), 0); + outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, + float_ptr_type, ""); + + /* Scatter store values into temp registers */ + emit_mask_scatter(bld, outputs_array, index_vec, value, + &bld->exec_mask, pred); + } + else { + LLVMValueRef out_ptr = get_output_ptr(bld, reg->Register.Index, + chan_index); + lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr); + } break; case TGSI_FILE_TEMPORARY: - lp_exec_mask_store(&bld->exec_mask, value, - bld->temps[reg->Register.Index][chan_index]); + if (reg->Register.Indirect) { + LLVMBuilderRef builder = bld->base.builder; + LLVMValueRef chan_vec = + lp_build_const_int_vec(uint_bld->type, chan_index); + LLVMValueRef length_vec = + lp_build_const_int_vec(uint_bld->type, bld->base.type.length); + LLVMValueRef index_vec; /* indexes into the temp registers */ + LLVMValueRef temps_array; + LLVMValueRef pixel_offsets; + LLVMTypeRef float_ptr_type; + int i; + + /* build pixel offset vector: {0, 1, 2, 3, ...} */ + pixel_offsets = uint_bld->undef; + for (i = 0; i < bld->base.type.length; i++) { + LLVMValueRef ii = lp_build_const_int32(i); + pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, + ii, ii, ""); + } + + /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ + index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); + index_vec = lp_build_add(uint_bld, index_vec, chan_vec); + index_vec = lp_build_mul(uint_bld, index_vec, length_vec); + index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); + + float_ptr_type = LLVMPointerType(LLVMFloatType(), 0); + temps_array = LLVMBuildBitCast(builder, bld->temps_array, + float_ptr_type, ""); + + /* Scatter store values into temp registers */ + emit_mask_scatter(bld, temps_array, index_vec, value, + &bld->exec_mask, pred); + } + else { + LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, + chan_index); + lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr); + } break; case TGSI_FILE_ADDRESS: - /* FIXME */ - assert(0); + lp_exec_mask_store(&bld->exec_mask, pred, value, + bld->addr[reg->Indirect.Index][chan_index]); break; case TGSI_FILE_PREDICATE: - /* FIXME */ - assert(0); + lp_exec_mask_store(&bld->exec_mask, pred, value, + bld->preds[reg->Register.Index][chan_index]); break; default: @@ -529,21 +1002,29 @@ emit_store( * High-level instruction translators. */ - static void emit_tex( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_instruction *inst, - boolean apply_lodbias, - boolean projected, + enum lp_build_tex_modifier modifier, LLVMValueRef *texel) { - const uint unit = inst->Src[1].Register.Index; - LLVMValueRef lodbias; + unsigned unit; + LLVMValueRef lod_bias, explicit_lod; LLVMValueRef oow = NULL; LLVMValueRef coords[3]; + LLVMValueRef ddx[3]; + LLVMValueRef ddy[3]; unsigned num_coords; unsigned i; + if (!bld->sampler) { + _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); + for (i = 0; i < 4; i++) { + texel[i] = bld->base.undef; + } + return; + } + switch (inst->Texture.Texture) { case TGSI_TEXTURE_1D: num_coords = 1; @@ -564,32 +1045,101 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, return; } - if(apply_lodbias) - lodbias = emit_fetch( bld, inst, 0, 3 ); - else - lodbias = bld->base.zero; + if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { + lod_bias = emit_fetch( bld, inst, 0, 3 ); + explicit_lod = NULL; + } + else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { + lod_bias = NULL; + explicit_lod = emit_fetch( bld, inst, 0, 3 ); + } + else { + lod_bias = NULL; + explicit_lod = NULL; + } - if (projected) { + if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) { oow = emit_fetch( bld, inst, 0, 3 ); oow = lp_build_rcp(&bld->base, oow); } for (i = 0; i < num_coords; i++) { coords[i] = emit_fetch( bld, inst, 0, i ); - if (projected) + if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) coords[i] = lp_build_mul(&bld->base, coords[i], oow); } for (i = num_coords; i < 3; i++) { coords[i] = bld->base.undef; } + if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { + LLVMTypeRef i32t = LLVMInt32Type(); + LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); + for (i = 0; i < num_coords; i++) { + LLVMValueRef src1 = emit_fetch( bld, inst, 1, i ); + LLVMValueRef src2 = emit_fetch( bld, inst, 2, i ); + ddx[i] = LLVMBuildExtractElement(bld->base.builder, src1, index0, ""); + ddy[i] = LLVMBuildExtractElement(bld->base.builder, src2, index0, ""); + } + unit = inst->Src[3].Register.Index; + } else { + for (i = 0; i < num_coords; i++) { + ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] ); + ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] ); + } + unit = inst->Src[1].Register.Index; + } + for (i = num_coords; i < 3; i++) { + ddx[i] = LLVMGetUndef(bld->base.elem_type); + ddy[i] = LLVMGetUndef(bld->base.elem_type); + } + bld->sampler->emit_fetch_texel(bld->sampler, bld->base.builder, bld->base.type, - unit, num_coords, coords, lodbias, + unit, num_coords, coords, + ddx, ddy, + lod_bias, explicit_lod, texel); } +static boolean +near_end_of_shader(struct lp_build_tgsi_soa_context *bld, + int pc) +{ + int i; + + for (i = 0; i < 5; i++) { + unsigned opcode; + + if (pc + i >= bld->info->num_instructions) + return TRUE; + + opcode = bld->instructions[pc + i].Instruction.Opcode; + + if (opcode == TGSI_OPCODE_END) + return TRUE; + + if (opcode == TGSI_OPCODE_TEX || + opcode == TGSI_OPCODE_TXP || + opcode == TGSI_OPCODE_TXD || + opcode == TGSI_OPCODE_TXB || + opcode == TGSI_OPCODE_TXL || + opcode == TGSI_OPCODE_TXF || + opcode == TGSI_OPCODE_TXQ || + opcode == TGSI_OPCODE_CAL || + opcode == TGSI_OPCODE_CALLNZ || + opcode == TGSI_OPCODE_IF || + opcode == TGSI_OPCODE_IFC || + opcode == TGSI_OPCODE_BGNLOOP || + opcode == TGSI_OPCODE_SWITCH) + return FALSE; + } + + return TRUE; +} + + /** * Kill fragment if any of the src register values are negative. @@ -597,7 +1147,8 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, static void emit_kil( struct lp_build_tgsi_soa_context *bld, - const struct tgsi_full_instruction *inst ) + const struct tgsi_full_instruction *inst, + int pc) { const struct tgsi_full_src_register *reg = &inst->Src[0]; LLVMValueRef terms[NUM_CHANNELS]; @@ -636,8 +1187,12 @@ emit_kil( } } - if(mask) + if(mask) { lp_build_mask_update(bld->mask, mask); + + if (!near_end_of_shader(bld, pc)) + lp_build_mask_check(bld->mask); + } } @@ -649,7 +1204,8 @@ emit_kil( */ static void emit_kilp(struct lp_build_tgsi_soa_context *bld, - const struct tgsi_full_instruction *inst) + const struct tgsi_full_instruction *inst, + int pc) { LLVMValueRef mask; @@ -660,68 +1216,104 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld, mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp"); } else { - mask = bld->base.zero; + LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type); + mask = zero; } lp_build_mask_update(bld->mask, mask); + + if (!near_end_of_shader(bld, pc)) + lp_build_mask_check(bld->mask); } /** - * Check if inst src/dest regs use indirect addressing into temporary - * register file. + * Emit code which will dump the value of all the temporary registers + * to stdout. */ -static boolean -indirect_temp_reference(const struct tgsi_full_instruction *inst) +static void +emit_dump_temps(struct lp_build_tgsi_soa_context *bld) { - uint i; - for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *reg = &inst->Src[i]; - if (reg->Register.File == TGSI_FILE_TEMPORARY && - reg->Register.Indirect) - return TRUE; - } - for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - const struct tgsi_full_dst_register *reg = &inst->Dst[i]; - if (reg->Register.File == TGSI_FILE_TEMPORARY && - reg->Register.Indirect) - return TRUE; + LLVMBuilderRef builder = bld->base.builder; + LLVMValueRef temp_ptr; + LLVMValueRef i0 = lp_build_const_int32(0); + LLVMValueRef i1 = lp_build_const_int32(1); + LLVMValueRef i2 = lp_build_const_int32(2); + LLVMValueRef i3 = lp_build_const_int32(3); + int index; + int n = bld->info->file_max[TGSI_FILE_TEMPORARY]; + + for (index = 0; index < n; index++) { + LLVMValueRef idx = lp_build_const_int32(index); + LLVMValueRef v[4][4], res; + int chan; + + lp_build_printf(builder, "TEMP[%d]:\n", idx); + + for (chan = 0; chan < 4; chan++) { + temp_ptr = get_temp_ptr(bld, index, chan); + res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); + v[chan][0] = LLVMBuildExtractElement(builder, res, i0, ""); + v[chan][1] = LLVMBuildExtractElement(builder, res, i1, ""); + v[chan][2] = LLVMBuildExtractElement(builder, res, i2, ""); + v[chan][3] = LLVMBuildExtractElement(builder, res, i3, ""); + } + + lp_build_printf(builder, " X: %f %f %f %f\n", + v[0][0], v[0][1], v[0][2], v[0][3]); + lp_build_printf(builder, " Y: %f %f %f %f\n", + v[1][0], v[1][1], v[1][2], v[1][3]); + lp_build_printf(builder, " Z: %f %f %f %f\n", + v[2][0], v[2][1], v[2][2], v[2][3]); + lp_build_printf(builder, " W: %f %f %f %f\n", + v[3][0], v[3][1], v[3][2], v[3][3]); } - return FALSE; } -static int + + +static void emit_declaration( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_declaration *decl) { - unsigned first = decl->Range.First; - unsigned last = decl->Range.Last; + LLVMTypeRef vec_type = bld->base.vec_type; + const unsigned first = decl->Range.First; + const unsigned last = decl->Range.Last; unsigned idx, i; - LLVMBasicBlockRef current_block = - LLVMGetInsertBlock(bld->base.builder); - LLVMBasicBlockRef first_block = - LLVMGetEntryBasicBlock( - LLVMGetBasicBlockParent(current_block)); - LLVMValueRef first_inst = - LLVMGetFirstInstruction(first_block); - - /* we want alloca's to be the first instruction - * in the function so we need to rewind the builder - * to the very beginning */ - LLVMPositionBuilderBefore(bld->base.builder, - first_inst); for (idx = first; idx <= last; ++idx) { + assert(last <= bld->info->file_max[decl->Declaration.File]); switch (decl->Declaration.File) { case TGSI_FILE_TEMPORARY: - for (i = 0; i < NUM_CHANNELS; i++) - bld->temps[idx][i] = lp_build_alloca(&bld->base); + assert(idx < LP_MAX_TGSI_TEMPS); + if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) { + for (i = 0; i < NUM_CHANNELS; i++) + bld->temps[idx][i] = lp_build_alloca(bld->base.builder, + vec_type, "temp"); + } break; case TGSI_FILE_OUTPUT: + if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { + for (i = 0; i < NUM_CHANNELS; i++) + bld->outputs[idx][i] = lp_build_alloca(bld->base.builder, + vec_type, "output"); + } + break; + + case TGSI_FILE_ADDRESS: + assert(idx < LP_MAX_TGSI_ADDRS); for (i = 0; i < NUM_CHANNELS; i++) - bld->outputs[idx][i] = lp_build_alloca(&bld->base); + bld->addr[idx][i] = lp_build_alloca(bld->base.builder, + vec_type, "addr"); + break; + + case TGSI_FILE_PREDICATE: + assert(idx < LP_MAX_TGSI_PREDS); + for (i = 0; i < NUM_CHANNELS; i++) + bld->preds[idx][i] = lp_build_alloca(bld->base.builder, + vec_type, "predicate"); break; default: @@ -729,10 +1321,6 @@ emit_declaration( break; } } - - LLVMPositionBuilderAtEnd(bld->base.builder, - current_block); - return TRUE; } @@ -744,7 +1332,8 @@ static boolean emit_instruction( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_instruction *inst, - const struct tgsi_opcode_info *info) + const struct tgsi_opcode_info *info, + int *pc) { unsigned chan_index; LLVMValueRef src0, src1, src2; @@ -757,10 +1346,6 @@ emit_instruction( LLVMValueRef res; LLVMValueRef dst0[NUM_CHANNELS]; - /* we can't handle indirect addressing into temp register file yet */ - if (indirect_temp_reference(inst)) - return FALSE; - /* * Stores and write masks are handled in a general fashion after the long * instruction opcode switch statement. @@ -772,25 +1357,23 @@ emit_instruction( * redundant code. */ + (*pc)++; + assert(info->num_dst <= 1); - if(info->num_dst) { + if (info->num_dst) { FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { dst0[chan_index] = bld->base.undef; } } switch (inst->Instruction.Opcode) { -#if 0 case TGSI_OPCODE_ARL: - /* FIXME */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_flr(bld, 0, 0); - emit_f2it( bld, 0 ); + tmp0 = lp_build_floor(&bld->base, tmp0); dst0[chan_index] = tmp0; } break; -#endif case TGSI_OPCODE_MOV: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { @@ -1231,12 +1814,12 @@ emit_instruction( case TGSI_OPCODE_KILP: /* predicated kill */ - emit_kilp( bld, inst ); + emit_kilp( bld, inst, (*pc)-1 ); break; case TGSI_OPCODE_KIL: /* conditional kill */ - emit_kil( bld, inst ); + emit_kil( bld, inst, (*pc)-1 ); break; case TGSI_OPCODE_PK2H: @@ -1316,12 +1899,11 @@ emit_instruction( break; case TGSI_OPCODE_TEX: - emit_tex( bld, inst, FALSE, FALSE, dst0 ); + emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 ); break; case TGSI_OPCODE_TXD: - /* FIXME */ - return FALSE; + emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 ); break; case TGSI_OPCODE_UP2H: @@ -1360,17 +1942,13 @@ emit_instruction( return FALSE; break; -#if 0 case TGSI_OPCODE_ARR: - /* FIXME */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_rnd( bld, 0, 0 ); - emit_f2it( bld, 0 ); + tmp0 = lp_build_round(&bld->base, tmp0); dst0[chan_index] = tmp0; } break; -#endif case TGSI_OPCODE_BRA: /* deprecated */ @@ -1379,16 +1957,22 @@ emit_instruction( break; case TGSI_OPCODE_CAL: - /* FIXME */ - return FALSE; + lp_exec_mask_call(&bld->exec_mask, + inst->Label.Label, + pc); + break; case TGSI_OPCODE_RET: - /* FIXME */ - return FALSE; + lp_exec_mask_ret(&bld->exec_mask, pc); break; case TGSI_OPCODE_END: + if (0) { + /* for debugging */ + emit_dump_temps(bld); + } + *pc = -1; break; case TGSI_OPCODE_SSG: @@ -1427,7 +2011,7 @@ emit_instruction( break; case TGSI_OPCODE_TXB: - emit_tex( bld, inst, TRUE, FALSE, dst0 ); + emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 ); break; case TGSI_OPCODE_NRM: @@ -1532,11 +2116,11 @@ emit_instruction( break; case TGSI_OPCODE_TXL: - emit_tex( bld, inst, TRUE, FALSE, dst0 ); + emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 ); break; case TGSI_OPCODE_TXP: - emit_tex( bld, inst, FALSE, TRUE, dst0 ); + emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 ); break; case TGSI_OPCODE_BRK: @@ -1550,20 +2134,12 @@ emit_instruction( lp_exec_mask_cond_push(&bld->exec_mask, tmp0); break; - case TGSI_OPCODE_BGNFOR: - /* deprecated */ - assert(0); - return FALSE; - break; - case TGSI_OPCODE_BGNLOOP: lp_exec_bgnloop(&bld->exec_mask); break; - case TGSI_OPCODE_REP: - /* deprecated */ - assert(0); - return FALSE; + case TGSI_OPCODE_BGNSUB: + lp_exec_mask_bgnsub(&bld->exec_mask); break; case TGSI_OPCODE_ELSE: @@ -1574,20 +2150,12 @@ emit_instruction( lp_exec_mask_cond_pop(&bld->exec_mask); break; - case TGSI_OPCODE_ENDFOR: - /* deprecated */ - assert(0); - return FALSE; - break; - case TGSI_OPCODE_ENDLOOP: lp_exec_endloop(&bld->exec_mask); break; - case TGSI_OPCODE_ENDREP: - /* deprecated */ - assert(0); - return FALSE; + case TGSI_OPCODE_ENDSUB: + lp_exec_mask_endsub(&bld->exec_mask, pc); break; case TGSI_OPCODE_PUSHA: @@ -1702,8 +2270,12 @@ emit_instruction( } if(info->num_dst) { + LLVMValueRef pred[NUM_CHANNELS]; + + emit_fetch_predicate( bld, inst, pred ); + FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, dst0[chan_index]); + emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]); } } @@ -1720,25 +2292,89 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, const LLVMValueRef *pos, const LLVMValueRef (*inputs)[NUM_CHANNELS], LLVMValueRef (*outputs)[NUM_CHANNELS], - struct lp_build_sampler_soa *sampler) + struct lp_build_sampler_soa *sampler, + const struct tgsi_shader_info *info) { struct lp_build_tgsi_soa_context bld; struct tgsi_parse_context parse; uint num_immediates = 0; + uint num_instructions = 0; unsigned i; + int pc = 0; + + struct lp_type res_type; + + assert(type.length <= LP_MAX_VECTOR_LENGTH); + memset(&res_type, 0, sizeof res_type); + res_type.width = type.width; + res_type.length = type.length; + res_type.sign = 1; /* Setup build context */ memset(&bld, 0, sizeof bld); lp_build_context_init(&bld.base, builder, type); + lp_build_context_init(&bld.uint_bld, builder, lp_uint_type(type)); + lp_build_context_init(&bld.elem_bld, builder, lp_elem_type(type)); bld.mask = mask; bld.pos = pos; bld.inputs = inputs; bld.outputs = outputs; bld.consts_ptr = consts_ptr; bld.sampler = sampler; + bld.info = info; + bld.indirect_files = info->indirect_files; + bld.instructions = (struct tgsi_full_instruction *) + MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) ); + bld.max_instructions = LP_MAX_INSTRUCTIONS; + + if (!bld.instructions) { + return; + } lp_exec_mask_init(&bld.exec_mask, &bld.base); + if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { + LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), + info->file_max[TGSI_FILE_TEMPORARY]*4 + 4, 0); + bld.temps_array = lp_build_array_alloca(bld.base.builder, + bld.base.vec_type, array_size, + "temp_array"); + } + + if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) { + LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), + info->file_max[TGSI_FILE_OUTPUT]*4 + 4, 0); + bld.outputs_array = lp_build_array_alloca(bld.base.builder, + bld.base.vec_type, array_size, + "output_array"); + } + + /* If we have indirect addressing in inputs we need to copy them into + * our alloca array to be able to iterate over them */ + if (bld.indirect_files & (1 << TGSI_FILE_INPUT)) { + unsigned index, chan; + LLVMTypeRef vec_type = bld.base.vec_type; + LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), + info->file_max[TGSI_FILE_INPUT]*4 + 4, 0); + bld.inputs_array = lp_build_array_alloca(bld.base.builder, + vec_type, array_size, + "input_array"); + + assert(info->num_inputs <= info->file_max[TGSI_FILE_INPUT] + 1); + + for (index = 0; index < info->num_inputs; ++index) { + for (chan = 0; chan < NUM_CHANNELS; ++chan) { + LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan); + LLVMValueRef input_ptr = + LLVMBuildGEP(bld.base.builder, bld.inputs_array, + &lindex, 1, ""); + LLVMValueRef value = bld.inputs[index][chan]; + if (value) + LLVMBuildStore(bld.base.builder, value, input_ptr); + } + } + } + tgsi_parse_init( &parse, tokens ); while( !tgsi_parse_end_of_tokens( &parse ) ) { @@ -1747,19 +2383,31 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: /* Inputs already interpolated */ - { - if (!emit_declaration( &bld, &parse.FullToken.FullDeclaration )) - _debug_printf("warning: failed to define LLVM variable\n"); - } + emit_declaration( &bld, &parse.FullToken.FullDeclaration ); break; case TGSI_TOKEN_TYPE_INSTRUCTION: { - unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode; - const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode); - if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info )) - _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", - info ? info->mnemonic : ""); + /* save expanded instruction */ + if (num_instructions == bld.max_instructions) { + struct tgsi_full_instruction *instructions; + instructions = REALLOC(bld.instructions, + bld.max_instructions + * sizeof(struct tgsi_full_instruction), + (bld.max_instructions + LP_MAX_INSTRUCTIONS) + * sizeof(struct tgsi_full_instruction)); + if (!instructions) { + break; + } + bld.instructions = instructions; + bld.max_instructions += LP_MAX_INSTRUCTIONS; + } + + memcpy(bld.instructions + num_instructions, + &parse.FullToken.FullInstruction, + sizeof(bld.instructions[0])); + + num_instructions++; } break; @@ -1769,7 +2417,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, { const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; assert(size <= 4); - assert(num_immediates < LP_MAX_IMMEDIATES); + assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); for( i = 0; i < size; ++i ) bld.immediates[num_immediates][i] = lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float); @@ -1786,14 +2434,45 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, assert( 0 ); } } + + while (pc != -1) { + struct tgsi_full_instruction *instr = bld.instructions + pc; + const struct tgsi_opcode_info *opcode_info = + tgsi_get_opcode_info(instr->Instruction.Opcode); + if (!emit_instruction( &bld, instr, opcode_info, &pc )) + _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", + opcode_info->mnemonic); + } + + /* If we have indirect addressing in outputs we need to copy our alloca array + * to the outputs slots specified by the called */ + if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) { + unsigned index, chan; + assert(info->num_outputs <= info->file_max[TGSI_FILE_OUTPUT] + 1); + for (index = 0; index < info->num_outputs; ++index) { + for (chan = 0; chan < NUM_CHANNELS; ++chan) { + bld.outputs[index][chan] = get_output_ptr(&bld, index, chan); + } + } + } + if (0) { LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); LLVMValueRef function = LLVMGetBasicBlockParent(block); debug_printf("11111111111111111111111111111 \n"); tgsi_dump(tokens, 0); - LLVMDumpValue(function); + lp_debug_dump_value(function); debug_printf("2222222222222222222222222222 \n"); } tgsi_parse_free( &parse ); + + if (0) { + LLVMModuleRef module = LLVMGetGlobalParent( + LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder))); + LLVMDumpModule(module); + + } + + FREE( bld.instructions ); }