X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Fgallivm%2Flp_bld_tgsi_soa.c;h=d3c769e28b88ae2c9236aa57c58ed9feda79ce1a;hb=c7f5c9a3dc6350252e73b541bb85ab3ed9e64a9c;hp=8901e656aed290c4f527c1663d960300349a36b8;hpb=4fb2daf42c8171579cdc18605c5ceeb1963f8b31;p=mesa.git diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 8901e656aed..d3c769e28b8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -46,6 +46,7 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_scan.h" #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_arit.h" @@ -125,6 +126,12 @@ struct lp_build_tgsi_soa_context LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS]; LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS]; + LLVMValueRef addr[LP_MAX_TEMPS][NUM_CHANNELS]; + + /* we allocate an array of temps if we have indirect + * addressing and then the temps above is unused */ + LLVMValueRef temps_array; + boolean has_indirect_addressing; struct lp_build_mask_context *mask; struct lp_exec_mask exec_mask; @@ -169,9 +176,9 @@ static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context static void lp_exec_mask_update(struct lp_exec_mask *mask) { if (mask->loop_stack_size) { - /*for loops we need to update the entire mask at - * runtime */ + /*for loops we need to update the entire mask at runtime */ LLVMValueRef tmp; + assert(mask->break_mask); tmp = LLVMBuildAnd(mask->bld->builder, mask->cont_mask, mask->break_mask, @@ -227,10 +234,13 @@ static void lp_exec_bgnloop(struct lp_exec_mask *mask) if (mask->cont_stack_size == 0) mask->cont_mask = LLVMConstAllOnes(mask->int_vec_type); - if (mask->cont_stack_size == 0) + if (mask->break_stack_size == 0) mask->break_mask = LLVMConstAllOnes(mask->int_vec_type); if (mask->cond_stack_size == 0) mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type); + + mask->break_stack[mask->break_stack_size++] = mask->break_mask; + mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask; mask->loop_stack[mask->loop_stack_size++] = mask->loop_block; mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop"); LLVMBuildBr(mask->bld->builder, mask->loop_block); @@ -245,7 +255,10 @@ static void lp_exec_break(struct lp_exec_mask *mask) mask->exec_mask, "break"); - mask->break_stack[mask->break_stack_size++] = mask->break_mask; + /* mask->break_stack_size > 1 implies that we encountered a break + * statemant already and if that's the case we want to make sure + * our mask is a combination of the previous break and the current + * execution mask */ if (mask->break_stack_size > 1) { mask->break_mask = LLVMBuildAnd(mask->bld->builder, mask->break_mask, @@ -262,7 +275,6 @@ static void lp_exec_continue(struct lp_exec_mask *mask) mask->exec_mask, ""); - mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask; if (mask->cont_stack_size > 1) { mask->cont_mask = LLVMBuildAnd(mask->bld->builder, mask->cont_mask, @@ -279,8 +291,12 @@ static void lp_exec_endloop(struct lp_exec_mask *mask) LLVMBasicBlockRef endloop; LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width* mask->bld->type.length); + LLVMValueRef i1cond; + + assert(mask->break_mask); + /* i1cond = (mask == 0) */ - LLVMValueRef i1cond = LLVMBuildICmp( + i1cond = LLVMBuildICmp( mask->bld->builder, LLVMIntNE, LLVMBuildBitCast(mask->bld->builder, mask->break_mask, reg_type, ""), @@ -294,17 +310,23 @@ static void lp_exec_endloop(struct lp_exec_mask *mask) LLVMPositionBuilderAtEnd(mask->bld->builder, endloop); mask->loop_block = mask->loop_stack[--mask->loop_stack_size]; - /* pop the break mask */ + /* pop the cont mask */ if (mask->cont_stack_size) { mask->cont_mask = mask->cont_stack[--mask->cont_stack_size]; } + /* pop the break mask */ if (mask->break_stack_size) { - mask->break_mask = mask->cont_stack[--mask->break_stack_size]; + mask->break_mask = mask->break_stack[--mask->break_stack_size]; } lp_exec_mask_update(mask); } +/* stores val into an address pointed to by dst. + * mask->exec_mask is used to figure out which bits of val + * should be stored into the address + * (0 means don't store this bit, 1 means do store). + */ static void lp_exec_mask_store(struct lp_exec_mask *mask, LLVMValueRef val, LLVMValueRef dst) @@ -342,6 +364,23 @@ emit_ddy(struct lp_build_tgsi_soa_context *bld, return lp_build_sub(&bld->base, src_top, src_bottom); } +static LLVMValueRef +get_temp_ptr(struct lp_build_tgsi_soa_context *bld, + unsigned index, + unsigned swizzle, + boolean is_indirect, + LLVMValueRef addr) +{ + if (!bld->has_indirect_addressing) { + return bld->temps[index][swizzle]; + } else { + LLVMValueRef lindex = + LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0); + if (is_indirect) + lindex = lp_build_add(&bld->base, lindex, addr); + return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, ""); + } +} /** * Register fetch. @@ -356,6 +395,7 @@ emit_fetch( const struct tgsi_full_src_register *reg = &inst->Src[index]; unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); LLVMValueRef res; + LLVMValueRef addr; switch (swizzle) { case TGSI_SWIZZLE_X: @@ -363,11 +403,34 @@ emit_fetch( case TGSI_SWIZZLE_Z: case TGSI_SWIZZLE_W: + if (reg->Register.Indirect) { + LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); + unsigned swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, chan_index ); + addr = LLVMBuildLoad(bld->base.builder, + bld->addr[reg->Indirect.Index][swizzle], + ""); + /* for indexing we want integers */ + addr = LLVMBuildFPToSI(bld->base.builder, addr, + int_vec_type, ""); + addr = LLVMBuildExtractElement(bld->base.builder, + addr, LLVMConstInt(LLVMInt32Type(), 0, 0), + ""); + addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0)); + } + switch (reg->Register.File) { case TGSI_FILE_CONSTANT: { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0); - LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, ""); - LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); + LLVMValueRef scalar, scalar_ptr; + + if (reg->Register.Indirect) { + /*lp_build_printf(bld->base.builder, + "\taddr = %d\n", addr);*/ + index = lp_build_add(&bld->base, index, addr); + } + scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, ""); + scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); + res = lp_build_broadcast_scalar(&bld->base, scalar); break; } @@ -382,11 +445,16 @@ emit_fetch( assert(res); break; - case TGSI_FILE_TEMPORARY: - res = LLVMBuildLoad(bld->base.builder, bld->temps[reg->Register.Index][swizzle], ""); + case TGSI_FILE_TEMPORARY: { + LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, + swizzle, + reg->Register.Indirect, + addr); + res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); if(!res) return bld->base.undef; break; + } default: assert( 0 ); @@ -464,6 +532,7 @@ emit_store( LLVMValueRef value) { const struct tgsi_full_dst_register *reg = &inst->Dst[index]; + LLVMValueRef addr; switch( inst->Instruction.Saturate ) { case TGSI_SAT_NONE: @@ -483,20 +552,39 @@ emit_store( assert(0); } + if (reg->Register.Indirect) { + LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); + unsigned swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, chan_index ); + addr = LLVMBuildLoad(bld->base.builder, + bld->addr[reg->Indirect.Index][swizzle], + ""); + /* for indexing we want integers */ + addr = LLVMBuildFPToSI(bld->base.builder, addr, + int_vec_type, ""); + addr = LLVMBuildExtractElement(bld->base.builder, + addr, LLVMConstInt(LLVMInt32Type(), 0, 0), + ""); + addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0)); + } + switch( reg->Register.File ) { case TGSI_FILE_OUTPUT: lp_exec_mask_store(&bld->exec_mask, value, bld->outputs[reg->Register.Index][chan_index]); break; - case TGSI_FILE_TEMPORARY: - lp_exec_mask_store(&bld->exec_mask, value, - bld->temps[reg->Register.Index][chan_index]); + case TGSI_FILE_TEMPORARY: { + LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, + chan_index, + reg->Register.Indirect, + addr); + lp_exec_mask_store(&bld->exec_mask, value, temp_ptr); break; + } case TGSI_FILE_ADDRESS: - /* FIXME */ - assert(0); + lp_exec_mask_store(&bld->exec_mask, value, + bld->addr[reg->Indirect.Index][chan_index]); break; case TGSI_FILE_PREDICATE: @@ -576,6 +664,9 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, } +/** + * Kill fragment if any of the src register values are negative. + */ static void emit_kil( struct lp_build_tgsi_soa_context *bld, @@ -606,6 +697,9 @@ emit_kil( if(terms[chan_index]) { LLVMValueRef chan_mask; + /* + * If term < 0 then mask = 0 else mask = ~0. + */ chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero); if(mask) @@ -621,26 +715,28 @@ emit_kil( /** - * Check if inst src/dest regs use indirect addressing into temporary - * register file. + * Predicated fragment kill. + * XXX Actually, we do an unconditional kill (as in tgsi_exec.c). + * The only predication is the execution mask which will apply if + * we're inside a loop or conditional. */ -static boolean -indirect_temp_reference(const struct tgsi_full_instruction *inst) +static void +emit_kilp(struct lp_build_tgsi_soa_context *bld, + const struct tgsi_full_instruction *inst) { - uint i; - for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *reg = &inst->Src[i]; - if (reg->Register.File == TGSI_FILE_TEMPORARY && - reg->Register.Indirect) - return TRUE; + LLVMValueRef mask; + + /* For those channels which are "alive", disable fragment shader + * execution. + */ + if (bld->exec_mask.has_mask) { + mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp"); } - for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - const struct tgsi_full_dst_register *reg = &inst->Dst[i]; - if (reg->Register.File == TGSI_FILE_TEMPORARY && - reg->Register.Indirect) - return TRUE; + else { + mask = bld->base.zero; } - return FALSE; + + lp_build_mask_update(bld->mask, mask); } static int @@ -648,33 +744,37 @@ emit_declaration( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_declaration *decl) { + LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type); + unsigned first = decl->Range.First; unsigned last = decl->Range.Last; unsigned idx, i; - LLVMBasicBlockRef current_block = - LLVMGetInsertBlock(bld->base.builder); - LLVMBasicBlockRef first_block = - LLVMGetEntryBasicBlock( - LLVMGetBasicBlockParent(current_block)); - LLVMValueRef first_inst = - LLVMGetFirstInstruction(first_block); - - /* we want alloca's to be the first instruction - * in the function so we need to rewind the builder - * to the very beginning */ - LLVMPositionBuilderBefore(bld->base.builder, - first_inst); for (idx = first; idx <= last; ++idx) { switch (decl->Declaration.File) { case TGSI_FILE_TEMPORARY: - for (i = 0; i < NUM_CHANNELS; i++) - bld->temps[idx][i] = lp_build_alloca(&bld->base); + if (bld->has_indirect_addressing) { + LLVMValueRef val = LLVMConstInt(LLVMInt32Type(), + last*4 + 4, 0); + bld->temps_array = lp_build_array_alloca(bld->base.builder, + vec_type, val, ""); + } else { + for (i = 0; i < NUM_CHANNELS; i++) + bld->temps[idx][i] = lp_build_alloca(bld->base.builder, + vec_type, ""); + } break; case TGSI_FILE_OUTPUT: for (i = 0; i < NUM_CHANNELS; i++) - bld->outputs[idx][i] = lp_build_alloca(&bld->base); + bld->outputs[idx][i] = lp_build_alloca(bld->base.builder, + vec_type, ""); + break; + + case TGSI_FILE_ADDRESS: + for (i = 0; i < NUM_CHANNELS; i++) + bld->addr[idx][i] = lp_build_alloca(bld->base.builder, + vec_type, ""); break; default: @@ -683,12 +783,15 @@ emit_declaration( } } - LLVMPositionBuilderAtEnd(bld->base.builder, - current_block); return TRUE; } -static int + +/** + * Emit LLVM for one TGSI instruction. + * \param return TRUE for success, FALSE otherwise + */ +static boolean emit_instruction( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_instruction *inst, @@ -705,10 +808,6 @@ emit_instruction( LLVMValueRef res; LLVMValueRef dst0[NUM_CHANNELS]; - /* we can't handle indirect addressing into temp register file yet */ - if (indirect_temp_reference(inst)) - return FALSE; - /* * Stores and write masks are handled in a general fashion after the long * instruction opcode switch statement. @@ -728,17 +827,13 @@ emit_instruction( } switch (inst->Instruction.Opcode) { -#if 0 case TGSI_OPCODE_ARL: - /* FIXME */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_flr(bld, 0, 0); - emit_f2it( bld, 0 ); + tmp0 = lp_build_floor(&bld->base, tmp0); dst0[chan_index] = tmp0; } break; -#endif case TGSI_OPCODE_MOV: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { @@ -1136,7 +1231,7 @@ emit_instruction( case TGSI_OPCODE_RCC: /* deprecated? */ assert(0); - return 0; + return FALSE; case TGSI_OPCODE_DPH: tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); @@ -1179,8 +1274,7 @@ emit_instruction( case TGSI_OPCODE_KILP: /* predicated kill */ - /* FIXME */ - return 0; + emit_kilp( bld, inst ); break; case TGSI_OPCODE_KIL: @@ -1189,23 +1283,23 @@ emit_instruction( break; case TGSI_OPCODE_PK2H: - return 0; + return FALSE; break; case TGSI_OPCODE_PK2US: - return 0; + return FALSE; break; case TGSI_OPCODE_PK4B: - return 0; + return FALSE; break; case TGSI_OPCODE_PK4UB: - return 0; + return FALSE; break; case TGSI_OPCODE_RFL: - return 0; + return FALSE; break; case TGSI_OPCODE_SEQ: @@ -1270,71 +1364,67 @@ emit_instruction( case TGSI_OPCODE_TXD: /* FIXME */ - return 0; + return FALSE; break; case TGSI_OPCODE_UP2H: /* deprecated */ assert (0); - return 0; + return FALSE; break; case TGSI_OPCODE_UP2US: /* deprecated */ assert(0); - return 0; + return FALSE; break; case TGSI_OPCODE_UP4B: /* deprecated */ assert(0); - return 0; + return FALSE; break; case TGSI_OPCODE_UP4UB: /* deprecated */ assert(0); - return 0; + return FALSE; break; case TGSI_OPCODE_X2D: /* deprecated? */ assert(0); - return 0; + return FALSE; break; case TGSI_OPCODE_ARA: /* deprecated */ assert(0); - return 0; + return FALSE; break; -#if 0 case TGSI_OPCODE_ARR: - /* FIXME */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_rnd( bld, 0, 0 ); - emit_f2it( bld, 0 ); + tmp0 = lp_build_round(&bld->base, tmp0); dst0[chan_index] = tmp0; } break; -#endif case TGSI_OPCODE_BRA: /* deprecated */ assert(0); - return 0; + return FALSE; break; case TGSI_OPCODE_CAL: /* FIXME */ - return 0; + return FALSE; break; case TGSI_OPCODE_RET: /* FIXME */ - return 0; + return FALSE; break; case TGSI_OPCODE_END: @@ -1464,7 +1554,7 @@ emit_instruction( case TGSI_OPCODE_DIV: /* deprecated */ assert( 0 ); - return 0; + return FALSE; break; case TGSI_OPCODE_DP2: @@ -1499,22 +1589,10 @@ emit_instruction( lp_exec_mask_cond_push(&bld->exec_mask, tmp0); break; - case TGSI_OPCODE_BGNFOR: - /* deprecated */ - assert(0); - return 0; - break; - case TGSI_OPCODE_BGNLOOP: lp_exec_bgnloop(&bld->exec_mask); break; - case TGSI_OPCODE_REP: - /* deprecated */ - assert(0); - return 0; - break; - case TGSI_OPCODE_ELSE: lp_exec_mask_cond_invert(&bld->exec_mask); break; @@ -1523,32 +1601,20 @@ emit_instruction( lp_exec_mask_cond_pop(&bld->exec_mask); break; - case TGSI_OPCODE_ENDFOR: - /* deprecated */ - assert(0); - return 0; - break; - case TGSI_OPCODE_ENDLOOP: lp_exec_endloop(&bld->exec_mask); break; - case TGSI_OPCODE_ENDREP: - /* deprecated */ - assert(0); - return 0; - break; - case TGSI_OPCODE_PUSHA: /* deprecated? */ assert(0); - return 0; + return FALSE; break; case TGSI_OPCODE_POPA: /* deprecated? */ assert(0); - return 0; + return FALSE; break; case TGSI_OPCODE_CEIL: @@ -1561,13 +1627,13 @@ emit_instruction( case TGSI_OPCODE_I2F: /* deprecated? */ assert(0); - return 0; + return FALSE; break; case TGSI_OPCODE_NOT: /* deprecated? */ assert(0); - return 0; + return FALSE; break; case TGSI_OPCODE_TRUNC: @@ -1580,55 +1646,55 @@ emit_instruction( case TGSI_OPCODE_SHL: /* deprecated? */ assert(0); - return 0; + return FALSE; break; case TGSI_OPCODE_ISHR: /* deprecated? */ assert(0); - return 0; + return FALSE; break; case TGSI_OPCODE_AND: /* deprecated? */ assert(0); - return 0; + return FALSE; break; case TGSI_OPCODE_OR: /* deprecated? */ assert(0); - return 0; + return FALSE; break; case TGSI_OPCODE_MOD: /* deprecated? */ assert(0); - return 0; + return FALSE; break; case TGSI_OPCODE_XOR: /* deprecated? */ assert(0); - return 0; + return FALSE; break; case TGSI_OPCODE_SAD: /* deprecated? */ assert(0); - return 0; + return FALSE; break; case TGSI_OPCODE_TXF: /* deprecated? */ assert(0); - return 0; + return FALSE; break; case TGSI_OPCODE_TXQ: /* deprecated? */ assert(0); - return 0; + return FALSE; break; case TGSI_OPCODE_CONT: @@ -1636,18 +1702,18 @@ emit_instruction( break; case TGSI_OPCODE_EMIT: - return 0; + return FALSE; break; case TGSI_OPCODE_ENDPRIM: - return 0; + return FALSE; break; case TGSI_OPCODE_NOP: break; default: - return 0; + return FALSE; } if(info->num_dst) { @@ -1656,7 +1722,7 @@ emit_instruction( } } - return 1; + return TRUE; } @@ -1669,7 +1735,8 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, const LLVMValueRef *pos, const LLVMValueRef (*inputs)[NUM_CHANNELS], LLVMValueRef (*outputs)[NUM_CHANNELS], - struct lp_build_sampler_soa *sampler) + struct lp_build_sampler_soa *sampler, + struct tgsi_shader_info *info) { struct lp_build_tgsi_soa_context bld; struct tgsi_parse_context parse; @@ -1685,6 +1752,8 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, bld.outputs = outputs; bld.consts_ptr = consts_ptr; bld.sampler = sampler; + bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 || + info->opcode_count[TGSI_OPCODE_ARL] > 0; lp_exec_mask_init(&bld.exec_mask, &bld.base); @@ -1705,10 +1774,10 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, case TGSI_TOKEN_TYPE_INSTRUCTION: { unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode; - const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode); - if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info )) + const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(opcode); + if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, opcode_info )) _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", - info ? info->mnemonic : ""); + opcode_info->mnemonic); } break;