*
**************************************************************************/
+/**
+ * @file
+ * TGSI to LLVM IR translation -- SoA.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ *
+ * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
+ * Brian Paul, and others.
+ */
+
#include "pipe/p_config.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_debug.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_exec.h"
#include "lp_bld_const.h"
#include "lp_bld_intr.h"
#include "lp_bld_arit.h"
+#include "lp_bld_logic.h"
#include "lp_bld_swizzle.h"
+#include "lp_bld_flow.h"
#include "lp_bld_tgsi.h"
+#include "lp_bld_debug.h"
#define LP_MAX_TEMPS 256
for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
- ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
+ ((INST)->FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
#define CHAN_Z 2
#define CHAN_W 3
+#define QUAD_TOP_LEFT 0
+#define QUAD_TOP_RIGHT 1
+#define QUAD_BOTTOM_LEFT 2
+#define QUAD_BOTTOM_RIGHT 3
+
struct lp_build_tgsi_soa_context
{
struct lp_build_context base;
- LLVMValueRef (*inputs)[4];
LLVMValueRef consts_ptr;
- LLVMValueRef (*outputs)[4];
- LLVMValueRef samplers_ptr;
+ const LLVMValueRef *pos;
+ const LLVMValueRef (*inputs)[NUM_CHANNELS];
+ LLVMValueRef (*outputs)[NUM_CHANNELS];
+
+ struct lp_build_sampler_soa *sampler;
+
+ LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
+ LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
- LLVMValueRef immediates[LP_MAX_IMMEDIATES][4];
- LLVMValueRef temps[LP_MAX_TEMPS][4];
+ struct lp_build_mask_context *mask;
};
-/**
- * Function call helpers.
- */
+static const unsigned char
+swizzle_left[4] = {
+ QUAD_TOP_LEFT, QUAD_TOP_LEFT,
+ QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
+};
-/**
- * NOTE: In gcc, if the destination uses the SSE intrinsics, then it must be
- * defined with __attribute__((force_align_arg_pointer)), as we do not guarantee
- * that the stack pointer is 16 byte aligned, as expected.
- */
-static void
-emit_func_call(
- struct lp_build_tgsi_soa_context *bld,
- const LLVMValueRef *args,
- unsigned nr_args,
- void (PIPE_CDECL *code)() )
+static const unsigned char
+swizzle_right[4] = {
+ QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
+ QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
+};
+
+static const unsigned char
+swizzle_top[4] = {
+ QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
+ QUAD_TOP_LEFT, QUAD_TOP_RIGHT
+};
+
+static const unsigned char
+swizzle_bottom[4] = {
+ QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
+ QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
+};
+
+
+static LLVMValueRef
+emit_ddx(struct lp_build_tgsi_soa_context *bld,
+ LLVMValueRef src)
{
-#if 0
- LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global,
- void* Addr);
-#endif
+ LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
+ LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
+ return lp_build_sub(&bld->base, src_right, src_left);
+}
+
+static LLVMValueRef
+emit_ddy(struct lp_build_tgsi_soa_context *bld,
+ LLVMValueRef src)
+{
+ LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
+ LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
+ return lp_build_sub(&bld->base, src_top, src_bottom);
}
/**
* Register fetch.
*/
-
static LLVMValueRef
emit_fetch(
struct lp_build_tgsi_soa_context *bld,
- const struct tgsi_full_src_register *reg,
+ const struct tgsi_full_instruction *inst,
+ unsigned index,
const unsigned chan_index )
{
- unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
+ const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index];
+ unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
LLVMValueRef res;
switch (swizzle) {
- case TGSI_EXTSWIZZLE_X:
- case TGSI_EXTSWIZZLE_Y:
- case TGSI_EXTSWIZZLE_Z:
- case TGSI_EXTSWIZZLE_W:
+ case TGSI_SWIZZLE_X:
+ case TGSI_SWIZZLE_Y:
+ case TGSI_SWIZZLE_Z:
+ case TGSI_SWIZZLE_W:
switch (reg->SrcRegister.File) {
case TGSI_FILE_CONSTANT: {
default:
assert( 0 );
+ return bld->base.undef;
}
break;
- case TGSI_EXTSWIZZLE_ZERO:
- res = bld->base.zero;
- break;
-
- case TGSI_EXTSWIZZLE_ONE:
- res = bld->base.one;
- break;
-
default:
assert( 0 );
+ return bld->base.undef;
}
switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
break;
case TGSI_UTIL_SIGN_SET:
+ /* TODO: Use bitwese OR for floating point */
res = lp_build_abs( &bld->base, res );
res = LLVMBuildNeg( bld->base.builder, res, "" );
break;
return res;
}
-#define FETCH( FUNC, INST, INDEX, CHAN )\
- emit_fetch( FUNC, &(INST).FullSrcRegisters[INDEX], CHAN )
/**
- * Register store.
+ * Register fetch with derivatives.
*/
+static void
+emit_fetch_deriv(
+ struct lp_build_tgsi_soa_context *bld,
+ const struct tgsi_full_instruction *inst,
+ unsigned index,
+ const unsigned chan_index,
+ LLVMValueRef *res,
+ LLVMValueRef *ddx,
+ LLVMValueRef *ddy)
+{
+ LLVMValueRef src;
+
+ src = emit_fetch(bld, inst, index, chan_index);
+
+ if(res)
+ *res = src;
+
+ /* TODO: use interpolation coeffs for inputs */
+
+ if(ddx)
+ *ddx = emit_ddx(bld, src);
+
+ if(ddy)
+ *ddy = emit_ddy(bld, src);
+}
+
+/**
+ * Register store.
+ */
static void
emit_store(
struct lp_build_tgsi_soa_context *bld,
- const struct tgsi_full_dst_register *reg,
const struct tgsi_full_instruction *inst,
+ unsigned index,
unsigned chan_index,
LLVMValueRef value)
{
+ const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[index];
+
switch( inst->Instruction.Saturate ) {
case TGSI_SAT_NONE:
break;
case TGSI_SAT_ZERO_ONE:
- /* assert( 0 ); */
+ value = lp_build_max(&bld->base, value, bld->base.zero);
+ value = lp_build_min(&bld->base, value, bld->base.one);
break;
case TGSI_SAT_MINUS_PLUS_ONE:
- assert( 0 );
+ value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0));
+ value = lp_build_min(&bld->base, value, bld->base.one);
break;
+
+ default:
+ assert(0);
}
switch( reg->DstRegister.File ) {
}
}
-#define STORE( FUNC, INST, INDEX, CHAN, VAL )\
- emit_store( FUNC, &(INST).FullDstRegisters[INDEX], &(INST), CHAN, VAL )
-
-
-void PIPE_CDECL
-lp_build_tgsi_fetch_texel_soa( struct tgsi_sampler **samplers,
- uint32_t unit,
- float *store )
-{
- struct tgsi_sampler *sampler = samplers[unit];
-
-#if 0
- uint j;
-
- debug_printf("%s sampler: %p (%p) store: %p\n",
- __FUNCTION__,
- sampler, *sampler,
- store );
-
- debug_printf("lodbias %f\n", store[12]);
-
- for (j = 0; j < 4; j++)
- debug_printf("sample %d texcoord %f %f\n",
- j,
- store[0+j],
- store[4+j]);
-#endif
-
- {
- float rgba[NUM_CHANNELS][QUAD_SIZE];
- sampler->get_samples(sampler,
- &store[0],
- &store[4],
- &store[8],
- 0.0f, /*store[12], lodbias */
- rgba);
- memcpy(store, rgba, sizeof rgba);
- }
-
-#if 0
- for (j = 0; j < 4; j++)
- debug_printf("sample %d result %f %f %f %f\n",
- j,
- store[0+j],
- store[4+j],
- store[8+j],
- store[12+j]);
-#endif
-}
/**
* High-level instruction translators.
*/
+
static void
emit_tex( struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
boolean apply_lodbias,
- boolean projected)
+ boolean projected,
+ LLVMValueRef *texel)
{
- LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
LLVMValueRef lodbias;
- LLVMValueRef oow;
- LLVMValueRef store_ptr;
- LLVMValueRef args[3];
- unsigned count;
+ LLVMValueRef oow = NULL;
+ LLVMValueRef coords[3];
+ unsigned num_coords;
unsigned i;
switch (inst->InstructionExtTexture.Texture) {
case TGSI_TEXTURE_1D:
- case TGSI_TEXTURE_SHADOW1D:
- count = 1;
+ num_coords = 1;
break;
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
+ num_coords = 2;
+ break;
+ case TGSI_TEXTURE_SHADOW1D:
case TGSI_TEXTURE_SHADOW2D:
case TGSI_TEXTURE_SHADOWRECT:
- count = 2;
- break;
case TGSI_TEXTURE_3D:
case TGSI_TEXTURE_CUBE:
- count = 3;
+ num_coords = 3;
break;
default:
assert(0);
}
if(apply_lodbias)
- lodbias = FETCH( bld, *inst, 0, 3 );
+ lodbias = emit_fetch( bld, inst, 0, 3 );
else
lodbias = bld->base.zero;
- store_ptr = LLVMBuildArrayAlloca(bld->base.builder,
- vec_type,
- LLVMConstInt(LLVMInt32Type(), 4, 0),
- "store");
-
if (projected) {
- oow = FETCH( bld, *inst, 0, 3 );
+ oow = emit_fetch( bld, inst, 0, 3 );
oow = lp_build_rcp(&bld->base, oow);
}
- for (i = 0; i < count; i++) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- LLVMValueRef coord_ptr = LLVMBuildGEP(bld->base.builder, store_ptr, &index, 1, "");
- LLVMValueRef coord;
-
- coord = FETCH( bld, *inst, 0, i );
-
+ for (i = 0; i < num_coords; i++) {
+ coords[i] = emit_fetch( bld, inst, 0, i );
if (projected)
- coord = lp_build_mul(&bld->base, coord, oow);
-
- LLVMBuildStore(bld->base.builder, coord, coord_ptr);
+ coords[i] = lp_build_mul(&bld->base, coords[i], oow);
}
- args[0] = bld->samplers_ptr;
- args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
- args[2] = store_ptr;
-
- lp_build_intrinsic(bld->base.builder, "fetch_texel", LLVMVoidType(), args, 3);
-
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, i ) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- LLVMValueRef res_ptr = LLVMBuildGEP(bld->base.builder, store_ptr, &index, 1, "");
- LLVMValueRef res = LLVMBuildLoad(bld->base.builder, res_ptr, "");
- STORE( bld, *inst, 0, i, res );
- }
+ bld->sampler->emit_fetch_texel(bld->sampler,
+ bld->base.builder,
+ bld->base.type,
+ unit, num_coords, coords, lodbias,
+ texel);
}
static void
emit_kil(
struct lp_build_tgsi_soa_context *bld,
- const struct tgsi_full_src_register *reg )
+ const struct tgsi_full_instruction *inst )
{
-#if 0
- unsigned uniquemask;
- unsigned unique_count = 0;
+ const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[0];
+ LLVMValueRef terms[NUM_CHANNELS];
+ LLVMValueRef mask;
unsigned chan_index;
- unsigned i;
- /* This mask stores component bits that were already tested. Note that
- * we test if the value is less than zero, so 1.0 and 0.0 need not to be
- * tested. */
- uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
+ memset(&terms, 0, sizeof terms);
FOR_EACH_CHANNEL( chan_index ) {
unsigned swizzle;
- /* unswizzle channel */
- swizzle = tgsi_util_get_full_src_register_extswizzle(
- reg,
- chan_index );
-
- /* check if the component has not been already tested */
- if( !(uniquemask & (1 << swizzle)) ) {
- uniquemask |= 1 << swizzle;
+ /* Unswizzle channel */
+ swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
- /* allocate register */
- emit_fetch(
- bld,
- unique_count++,
- reg,
- chan_index );
- }
+ /* Check if the component has not been already tested. */
+ assert(swizzle < NUM_CHANNELS);
+ if( !terms[swizzle] )
+ /* TODO: change the comparison operator instead of setting the sign */
+ terms[swizzle] = emit_fetch(bld, inst, 0, chan_index );
}
- x86_push(
- bld,
- x86_make_reg( file_REG32, reg_AX ) );
- x86_push(
- bld,
- x86_make_reg( file_REG32, reg_DX ) );
-
- for (i = 0 ; i < unique_count; i++ ) {
- LLVMValueRef dataXMM = make_xmm(i);
-
- sse_cmpps(
- bld,
- dataXMM,
- get_temp(
- TGSI_EXEC_TEMP_00000000_I,
- TGSI_EXEC_TEMP_00000000_C ),
- cc_LessThan );
-
- if( i == 0 ) {
- sse_movmskps(
- bld,
- x86_make_reg( file_REG32, reg_AX ),
- dataXMM );
- }
- else {
- sse_movmskps(
- bld,
- x86_make_reg( file_REG32, reg_DX ),
- dataXMM );
- x86_or(
- bld,
- x86_make_reg( file_REG32, reg_AX ),
- x86_make_reg( file_REG32, reg_DX ) );
- }
- }
+ mask = NULL;
+ FOR_EACH_CHANNEL( chan_index ) {
+ if(terms[chan_index]) {
+ LLVMValueRef chan_mask;
- x86_or(
- bld,
- get_temp(
- TGSI_EXEC_TEMP_KILMASK_I,
- TGSI_EXEC_TEMP_KILMASK_C ),
- x86_make_reg( file_REG32, reg_AX ) );
-
- x86_pop(
- bld,
- x86_make_reg( file_REG32, reg_DX ) );
- x86_pop(
- bld,
- x86_make_reg( file_REG32, reg_AX ) );
-#endif
-}
+ chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
+ if(mask)
+ mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
+ else
+ mask = chan_mask;
+ }
+ }
-static void
-emit_kilp(
- struct lp_build_tgsi_soa_context *bld )
-{
- /* XXX todo / fix me */
+ if(mask)
+ lp_build_mask_update(bld->mask, mask);
}
static int
emit_instruction(
struct lp_build_tgsi_soa_context *bld,
- struct tgsi_full_instruction *inst )
+ const struct tgsi_full_instruction *inst,
+ const struct tgsi_opcode_info *info)
{
unsigned chan_index;
- LLVMValueRef tmp;
+ LLVMValueRef src0, src1, src2;
+ LLVMValueRef tmp0, tmp1, tmp2;
+ LLVMValueRef tmp3 = NULL;
+ LLVMValueRef tmp4 = NULL;
+ LLVMValueRef tmp5 = NULL;
+ LLVMValueRef tmp6 = NULL;
+ LLVMValueRef tmp7 = NULL;
+ LLVMValueRef res;
+ LLVMValueRef dst0[NUM_CHANNELS];
/* we can't handle indirect addressing into temp register file yet */
if (indirect_temp_reference(inst))
return FALSE;
+ assert(info->num_dst <= 1);
+ if(info->num_dst) {
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = bld->base.undef;
+ }
+ }
+
switch (inst->Instruction.Opcode) {
#if 0
case TGSI_OPCODE_ARL:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( bld, *inst, 0, 0, chan_index );
+ /* FIXME */
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
emit_flr(bld, 0, 0);
emit_f2it( bld, 0 );
- STORE( bld, *inst, 0, 0, chan_index );
+ dst0[chan_index] = tmp0;
}
break;
#endif
case TGSI_OPCODE_MOV:
- case TGSI_OPCODE_SWZ:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( bld, *inst, 0, chan_index, FETCH( bld, *inst, 0, chan_index ) );
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
}
break;
-#if 0
case TGSI_OPCODE_LIT:
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
- IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
- emit_tempf(
- bld,
- 0,
- TEMP_ONE_I,
- TEMP_ONE_C);
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
- STORE( bld, *inst, 0, 0, CHAN_X );
- }
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
- STORE( bld, *inst, 0, 0, CHAN_W );
- }
+ if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
+ dst0[CHAN_X] = bld->base.one;
}
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
- IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
- tmp = FETCH( bld, *inst, 0, 0, CHAN_X );
- sse_maxps(
- bld,
- make_xmm( 0 ),
- get_temp(
- TGSI_EXEC_TEMP_00000000_I,
- TGSI_EXEC_TEMP_00000000_C ) );
- STORE( bld, *inst, 0, 0, CHAN_Y );
- }
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
- /* XMM[1] = SrcReg[0].yyyy */
- FETCH( bld, *inst, 1, 0, CHAN_Y );
- /* XMM[1] = max(XMM[1], 0) */
- sse_maxps(
- bld,
- make_xmm( 1 ),
- get_temp(
- TGSI_EXEC_TEMP_00000000_I,
- TGSI_EXEC_TEMP_00000000_C ) );
- /* XMM[2] = SrcReg[0].wwww */
- FETCH( bld, *inst, 2, 0, CHAN_W );
- /* XMM[2] = min(XMM[2], 128.0) */
- sse_minps(
- bld,
- make_xmm( 2 ),
- get_temp(
- TGSI_EXEC_TEMP_128_I,
- TGSI_EXEC_TEMP_128_C ) );
- /* XMM[2] = max(XMM[2], -128.0) */
- sse_maxps(
- bld,
- make_xmm( 2 ),
- get_temp(
- TGSI_EXEC_TEMP_MINUS_128_I,
- TGSI_EXEC_TEMP_MINUS_128_C ) );
- emit_pow( bld, 3, 1, 1, 2 );
- FETCH( bld, *inst, 0, 0, CHAN_X );
- sse_xorps(
- bld,
- make_xmm( 2 ),
- make_xmm( 2 ) );
- sse_cmpps(
- bld,
- make_xmm( 2 ),
- make_xmm( 0 ),
- cc_LessThan );
- sse_andps(
- bld,
- make_xmm( 2 ),
- make_xmm( 1 ) );
- STORE( bld, *inst, 2, 0, CHAN_Z );
- }
+ if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
+ src0 = emit_fetch( bld, inst, 0, CHAN_X );
+ dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
+ }
+ if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
+ /* XMM[1] = SrcReg[0].yyyy */
+ tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
+ /* XMM[1] = max(XMM[1], 0) */
+ tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
+ /* XMM[2] = SrcReg[0].wwww */
+ tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
+ tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
+ tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
+ dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
+ }
+ if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
+ dst0[CHAN_W] = bld->base.one;
}
break;
-#endif
case TGSI_OPCODE_RCP:
/* TGSI_OPCODE_RECIP */
- tmp = FETCH( bld, *inst, 0, CHAN_X );
- tmp = lp_build_rcp(&bld->base, tmp);
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( bld, *inst, 0, chan_index, tmp );
+ src0 = emit_fetch( bld, inst, 0, CHAN_X );
+ res = lp_build_rcp(&bld->base, src0);
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = res;
}
break;
case TGSI_OPCODE_RSQ:
/* TGSI_OPCODE_RECIPSQRT */
- tmp = FETCH( bld, *inst, 0, CHAN_X );
- tmp = lp_build_abs(&bld->base, tmp);
- tmp = lp_build_rsqrt(&bld->base, tmp);
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( bld, *inst, 0, chan_index, tmp );
+ src0 = emit_fetch( bld, inst, 0, CHAN_X );
+ src0 = lp_build_abs(&bld->base, src0);
+ res = lp_build_rsqrt(&bld->base, src0);
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = res;
}
break;
-#if 0
case TGSI_OPCODE_EXP:
- if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
- IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
- IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- FETCH( bld, *inst, 0, 0, CHAN_X );
- if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
- IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- emit_MOV( bld, 1, 0 );
- emit_flr( bld, 2, 1 );
- /* dst.x = ex2(floor(src.x)) */
- if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
- emit_MOV( bld, 2, 1 );
- emit_ex2( bld, 3, 2 );
- STORE( bld, *inst, 2, 0, CHAN_X );
- }
- /* dst.y = src.x - floor(src.x) */
- if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- emit_MOV( bld, 2, 0 );
- emit_sub( bld, 2, 1 );
- STORE( bld, *inst, 2, 0, CHAN_Y );
- }
- }
- /* dst.z = ex2(src.x) */
- if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- emit_ex2( bld, 3, 0 );
- STORE( bld, *inst, 0, 0, CHAN_Z );
- }
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
+ IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
+ IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
+ LLVMValueRef *p_exp2_int_part = NULL;
+ LLVMValueRef *p_frac_part = NULL;
+ LLVMValueRef *p_exp2 = NULL;
+
+ src0 = emit_fetch( bld, inst, 0, CHAN_X );
+
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
+ p_exp2_int_part = &tmp0;
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
+ p_frac_part = &tmp1;
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
+ p_exp2 = &tmp2;
+
+ lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
+
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
+ dst0[CHAN_X] = tmp0;
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
+ dst0[CHAN_Y] = tmp1;
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
+ dst0[CHAN_Z] = tmp2;
}
/* dst.w = 1.0 */
- if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {
- emit_tempf( bld, 0, TEMP_ONE_I, TEMP_ONE_C );
- STORE( bld, *inst, 0, 0, CHAN_W );
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
+ dst0[CHAN_W] = bld->base.one;
}
break;
-#endif
-#if 0
case TGSI_OPCODE_LOG:
- if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
- IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
- IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- FETCH( bld, *inst, 0, 0, CHAN_X );
- emit_abs( bld, 0 );
- emit_MOV( bld, 1, 0 );
- emit_lg2( bld, 2, 1 );
- /* dst.z = lg2(abs(src.x)) */
- if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- STORE( bld, *inst, 1, 0, CHAN_Z );
- }
- if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
- IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- emit_flr( bld, 2, 1 );
- /* dst.x = floor(lg2(abs(src.x))) */
- if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( bld, *inst, 1, 0, CHAN_X );
- }
- /* dst.x = abs(src)/ex2(floor(lg2(abs(src.x)))) */
- if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- emit_ex2( bld, 2, 1 );
- emit_rcp( bld, 1, 1 );
- emit_mul( bld, 0, 1 );
- STORE( bld, *inst, 0, 0, CHAN_Y );
- }
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
+ IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
+ IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
+ LLVMValueRef *p_floor_log2 = NULL;
+ LLVMValueRef *p_exp = NULL;
+ LLVMValueRef *p_log2 = NULL;
+
+ src0 = emit_fetch( bld, inst, 0, CHAN_X );
+ src0 = lp_build_abs( &bld->base, src0 );
+
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
+ p_floor_log2 = &tmp0;
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
+ p_exp = &tmp1;
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
+ p_log2 = &tmp2;
+
+ lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
+
+ /* dst.x = floor(lg2(abs(src.x))) */
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
+ dst0[CHAN_X] = tmp0;
+ /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
+ dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
}
+ /* dst.z = lg2(abs(src.x)) */
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
+ dst0[CHAN_Z] = tmp2;
}
/* dst.w = 1.0 */
- if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {
- emit_tempf( bld, 0, TEMP_ONE_I, TEMP_ONE_C );
- STORE( bld, *inst, 0, 0, CHAN_W );
+ if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
+ dst0[CHAN_W] = bld->base.one;
}
break;
-#endif
case TGSI_OPCODE_MUL:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- LLVMValueRef a = FETCH( bld, *inst, 0, chan_index );
- LLVMValueRef b = FETCH( bld, *inst, 1, chan_index );
- tmp = lp_build_mul(&bld->base, a, b);
- STORE( bld, *inst, 0, chan_index, tmp );
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
}
break;
case TGSI_OPCODE_ADD:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- LLVMValueRef a = FETCH( bld, *inst, 0, chan_index );
- LLVMValueRef b = FETCH( bld, *inst, 1, chan_index );
- tmp = lp_build_add(&bld->base, a, b);
- STORE( bld, *inst, 0, chan_index, tmp );
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
}
break;
-#if 0
case TGSI_OPCODE_DP3:
/* TGSI_OPCODE_DOT3 */
- FETCH( bld, *inst, 0, 0, CHAN_X );
- FETCH( bld, *inst, 1, 1, CHAN_X );
- emit_mul( bld, 0, 1 );
- FETCH( bld, *inst, 1, 0, CHAN_Y );
- FETCH( bld, *inst, 2, 1, CHAN_Y );
- emit_mul( bld, 1, 2 );
- emit_add( bld, 0, 1 );
- FETCH( bld, *inst, 1, 0, CHAN_Z );
- FETCH( bld, *inst, 2, 1, CHAN_Z );
- emit_mul( bld, 1, 2 );
- emit_add( bld, 0, 1 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( bld, *inst, 0, 0, chan_index );
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
+ tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
+ tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
+ tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
+ tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
+ tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
+ tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = tmp0;
}
break;
case TGSI_OPCODE_DP4:
/* TGSI_OPCODE_DOT4 */
- FETCH( bld, *inst, 0, 0, CHAN_X );
- FETCH( bld, *inst, 1, 1, CHAN_X );
- emit_mul( bld, 0, 1 );
- FETCH( bld, *inst, 1, 0, CHAN_Y );
- FETCH( bld, *inst, 2, 1, CHAN_Y );
- emit_mul( bld, 1, 2 );
- emit_add( bld, 0, 1 );
- FETCH( bld, *inst, 1, 0, CHAN_Z );
- FETCH( bld, *inst, 2, 1, CHAN_Z );
- emit_mul(bld, 1, 2 );
- emit_add(bld, 0, 1 );
- FETCH( bld, *inst, 1, 0, CHAN_W );
- FETCH( bld, *inst, 2, 1, CHAN_W );
- emit_mul( bld, 1, 2 );
- emit_add( bld, 0, 1 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( bld, *inst, 0, 0, chan_index );
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
+ tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
+ tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
+ tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
+ tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
+ tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
+ tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
+ tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
+ tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = tmp0;
}
break;
case TGSI_OPCODE_DST:
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
- emit_tempf(
- bld,
- 0,
- TEMP_ONE_I,
- TEMP_ONE_C );
- STORE( bld, *inst, 0, 0, CHAN_X );
+ IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
+ dst0[CHAN_X] = bld->base.one;
}
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
- FETCH( bld, *inst, 0, 0, CHAN_Y );
- FETCH( bld, *inst, 1, 1, CHAN_Y );
- emit_mul( bld, 0, 1 );
- STORE( bld, *inst, 0, 0, CHAN_Y );
+ IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
+ tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
+ dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
}
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
- FETCH( bld, *inst, 0, 0, CHAN_Z );
- STORE( bld, *inst, 0, 0, CHAN_Z );
+ IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
+ dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
}
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
- FETCH( bld, *inst, 0, 1, CHAN_W );
- STORE( bld, *inst, 0, 0, CHAN_W );
+ IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
+ dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
}
break;
case TGSI_OPCODE_MIN:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( bld, *inst, 0, 0, chan_index );
- FETCH( bld, *inst, 1, 1, chan_index );
- sse_minps(
- bld,
- make_xmm( 0 ),
- make_xmm( 1 ) );
- STORE( bld, *inst, 0, 0, chan_index );
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
}
break;
case TGSI_OPCODE_MAX:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( bld, *inst, 0, 0, chan_index );
- FETCH( bld, *inst, 1, 1, chan_index );
- sse_maxps(
- bld,
- make_xmm( 0 ),
- make_xmm( 1 ) );
- STORE( bld, *inst, 0, 0, chan_index );
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
}
break;
case TGSI_OPCODE_SLT:
/* TGSI_OPCODE_SETLT */
- emit_setcc( bld, inst, cc_LessThan );
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
+ }
break;
case TGSI_OPCODE_SGE:
/* TGSI_OPCODE_SETGE */
- emit_setcc( bld, inst, cc_NotLessThan );
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
+ }
break;
case TGSI_OPCODE_MAD:
/* TGSI_OPCODE_MADD */
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( bld, *inst, 0, 0, chan_index );
- FETCH( bld, *inst, 1, 1, chan_index );
- FETCH( bld, *inst, 2, 2, chan_index );
- emit_mul( bld, 0, 1 );
- emit_add( bld, 0, 2 );
- STORE( bld, *inst, 0, 0, chan_index );
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ tmp1 = emit_fetch( bld, inst, 1, chan_index );
+ tmp2 = emit_fetch( bld, inst, 2, chan_index );
+ tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
+ dst0[chan_index] = tmp0;
}
break;
case TGSI_OPCODE_SUB:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( bld, *inst, 0, 0, chan_index );
- FETCH( bld, *inst, 1, 1, chan_index );
- emit_sub( bld, 0, 1 );
- STORE( bld, *inst, 0, 0, chan_index );
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ tmp1 = emit_fetch( bld, inst, 1, chan_index );
+ dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
}
break;
case TGSI_OPCODE_LRP:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( bld, *inst, 0, 0, chan_index );
- FETCH( bld, *inst, 1, 1, chan_index );
- FETCH( bld, *inst, 2, 2, chan_index );
- emit_sub( bld, 1, 2 );
- emit_mul( bld, 0, 1 );
- emit_add( bld, 0, 2 );
- STORE( bld, *inst, 0, 0, chan_index );
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ src2 = emit_fetch( bld, inst, 2, chan_index );
+ tmp0 = lp_build_sub( &bld->base, src1, src2 );
+ tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
+ dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
}
break;
case TGSI_OPCODE_CND:
- return 0;
- break;
-
- case TGSI_OPCODE_CND0:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ src2 = emit_fetch( bld, inst, 2, chan_index );
+ tmp1 = lp_build_const_scalar(bld->base.type, 0.5);
+ tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
+ }
break;
case TGSI_OPCODE_DP2A:
- FETCH( bld, *inst, 0, 0, CHAN_X ); /* xmm0 = src[0].x */
- FETCH( bld, *inst, 1, 1, CHAN_X ); /* xmm1 = src[1].x */
- emit_mul( bld, 0, 1 ); /* xmm0 = xmm0 * xmm1 */
- FETCH( bld, *inst, 1, 0, CHAN_Y ); /* xmm1 = src[0].y */
- FETCH( bld, *inst, 2, 1, CHAN_Y ); /* xmm2 = src[1].y */
- emit_mul( bld, 1, 2 ); /* xmm1 = xmm1 * xmm2 */
- emit_add( bld, 0, 1 ); /* xmm0 = xmm0 + xmm1 */
- FETCH( bld, *inst, 1, 2, CHAN_X ); /* xmm1 = src[2].x */
- emit_add( bld, 0, 1 ); /* xmm0 = xmm0 + xmm1 */
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( bld, *inst, 0, 0, chan_index ); /* dest[ch] = xmm0 */
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
+ tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
+ tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
+ tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
+ tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
+ tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
}
break;
case TGSI_OPCODE_FRC:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( bld, *inst, 0, 0, chan_index );
- emit_frc( bld, 0, 0 );
- STORE( bld, *inst, 0, 0, chan_index );
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ tmp0 = lp_build_floor(&bld->base, src0);
+ tmp0 = lp_build_sub(&bld->base, tmp0, src0);
+ dst0[chan_index] = tmp0;
}
break;
case TGSI_OPCODE_CLAMP:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ src2 = emit_fetch( bld, inst, 2, chan_index );
+ tmp0 = lp_build_max(&bld->base, tmp0, src1);
+ tmp0 = lp_build_min(&bld->base, tmp0, src2);
+ dst0[chan_index] = tmp0;
+ }
break;
case TGSI_OPCODE_FLR:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( bld, *inst, 0, 0, chan_index );
- emit_flr( bld, 0, 0 );
- STORE( bld, *inst, 0, 0, chan_index );
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
}
break;
case TGSI_OPCODE_ROUND:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( bld, *inst, 0, 0, chan_index );
- emit_rnd( bld, 0, 0 );
- STORE( bld, *inst, 0, 0, chan_index );
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ dst0[chan_index] = lp_build_round(&bld->base, tmp0);
}
break;
- case TGSI_OPCODE_EX2:
- FETCH( bld, *inst, 0, 0, CHAN_X );
- emit_ex2( bld, 0, 0 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( bld, *inst, 0, 0, chan_index );
+ case TGSI_OPCODE_EX2: {
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
+ tmp0 = lp_build_exp2( &bld->base, tmp0);
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = tmp0;
}
break;
+ }
case TGSI_OPCODE_LG2:
- FETCH( bld, *inst, 0, 0, CHAN_X );
- emit_lg2( bld, 0, 0 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( bld, *inst, 0, 0, chan_index );
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
+ tmp0 = lp_build_log2( &bld->base, tmp0);
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = tmp0;
}
break;
case TGSI_OPCODE_POW:
- FETCH( bld, *inst, 0, 0, CHAN_X );
- FETCH( bld, *inst, 1, 1, CHAN_X );
- emit_pow( bld, 0, 0, 0, 1 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( bld, *inst, 0, 0, chan_index );
+ src0 = emit_fetch( bld, inst, 0, CHAN_X );
+ src1 = emit_fetch( bld, inst, 1, CHAN_X );
+ res = lp_build_pow( &bld->base, src0, src1 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = res;
}
break;
case TGSI_OPCODE_XPD:
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
- IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
- FETCH( bld, *inst, 1, 1, CHAN_Z );
- FETCH( bld, *inst, 3, 0, CHAN_Z );
- }
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
- IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
- FETCH( bld, *inst, 0, 0, CHAN_Y );
- FETCH( bld, *inst, 4, 1, CHAN_Y );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
- emit_MOV( bld, 2, 0 );
- emit_mul( bld, 2, 1 );
- emit_MOV( bld, 5, 3 );
- emit_mul( bld, 5, 4 );
- emit_sub( bld, 2, 5 );
- STORE( bld, *inst, 2, 0, CHAN_X );
- }
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
- IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
- FETCH( bld, *inst, 2, 1, CHAN_X );
- FETCH( bld, *inst, 5, 0, CHAN_X );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
- emit_mul( bld, 3, 2 );
- emit_mul( bld, 1, 5 );
- emit_sub( bld, 3, 1 );
- STORE( bld, *inst, 3, 0, CHAN_Y );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
- emit_mul( bld, 5, 4 );
- emit_mul( bld, 0, 2 );
- emit_sub( bld, 5, 0 );
- STORE( bld, *inst, 5, 0, CHAN_Z );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
- emit_tempf(
- bld,
- 0,
- TEMP_ONE_I,
- TEMP_ONE_C );
- STORE( bld, *inst, 0, 0, CHAN_W );
+ if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
+ IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
+ tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
+ tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
+ }
+ if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
+ IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
+ tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
+ tmp2 = tmp0;
+ tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
+ tmp5 = tmp3;
+ tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
+ tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
+ dst0[CHAN_X] = tmp2;
+ }
+ if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
+ IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
+ tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
+ tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
+ tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
+ tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
+ dst0[CHAN_Y] = tmp3;
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
+ tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
+ tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
+ tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
+ dst0[CHAN_Z] = tmp5;
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
+ dst0[CHAN_W] = bld->base.one;
}
break;
case TGSI_OPCODE_ABS:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( bld, *inst, 0, 0, chan_index );
- emit_abs( bld, 0) ;
-
- STORE( bld, *inst, 0, 0, chan_index );
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
}
break;
case TGSI_OPCODE_RCC:
+ /* deprecated? */
+ assert(0);
return 0;
- break;
case TGSI_OPCODE_DPH:
- FETCH( bld, *inst, 0, 0, CHAN_X );
- FETCH( bld, *inst, 1, 1, CHAN_X );
- emit_mul( bld, 0, 1 );
- FETCH( bld, *inst, 1, 0, CHAN_Y );
- FETCH( bld, *inst, 2, 1, CHAN_Y );
- emit_mul( bld, 1, 2 );
- emit_add( bld, 0, 1 );
- FETCH( bld, *inst, 1, 0, CHAN_Z );
- FETCH( bld, *inst, 2, 1, CHAN_Z );
- emit_mul( bld, 1, 2 );
- emit_add( bld, 0, 1 );
- FETCH( bld, *inst, 1, 1, CHAN_W );
- emit_add( bld, 0, 1 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( bld, *inst, 0, 0, chan_index );
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
+ tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
+ tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
+ tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
+ tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
+ tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
+ tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
+ tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = tmp0;
}
break;
case TGSI_OPCODE_COS:
- FETCH( bld, *inst, 0, 0, CHAN_X );
- emit_cos( bld, 0, 0 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( bld, *inst, 0, 0, chan_index );
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
+ tmp0 = lp_build_cos( &bld->base, tmp0 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = tmp0;
}
break;
case TGSI_OPCODE_DDX:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
+ }
break;
case TGSI_OPCODE_DDY:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
+ }
break;
case TGSI_OPCODE_KILP:
/* predicated kill */
- emit_kilp( bld );
- return 0; /* XXX fix me */
+ /* FIXME */
+ return 0;
break;
case TGSI_OPCODE_KIL:
/* conditional kill */
- emit_kil( bld, &inst->FullSrcRegisters[0] );
+ emit_kil( bld, inst );
break;
case TGSI_OPCODE_PK2H:
break;
case TGSI_OPCODE_SEQ:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
+ }
break;
case TGSI_OPCODE_SFL:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = bld->base.zero;
+ }
break;
case TGSI_OPCODE_SGT:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
+ }
break;
case TGSI_OPCODE_SIN:
- FETCH( bld, *inst, 0, 0, CHAN_X );
- emit_sin( bld, 0, 0 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( bld, *inst, 0, 0, chan_index );
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
+ tmp0 = lp_build_sin( &bld->base, tmp0 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = tmp0;
}
break;
case TGSI_OPCODE_SLE:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
+ }
break;
case TGSI_OPCODE_SNE:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
+ }
break;
case TGSI_OPCODE_STR:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = bld->base.one;
+ }
break;
-#endif
case TGSI_OPCODE_TEX:
- emit_tex( bld, inst, FALSE, FALSE );
+ emit_tex( bld, inst, FALSE, FALSE, dst0 );
break;
-#if 0
case TGSI_OPCODE_TXD:
+ /* FIXME */
return 0;
break;
case TGSI_OPCODE_UP2H:
+ /* deprecated */
+ assert (0);
return 0;
break;
case TGSI_OPCODE_UP2US:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_UP4B:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_UP4UB:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_X2D:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_ARA:
+ /* deprecated */
+ assert(0);
return 0;
break;
+#if 0
case TGSI_OPCODE_ARR:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( bld, *inst, 0, 0, chan_index );
+ /* FIXME */
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
emit_rnd( bld, 0, 0 );
emit_f2it( bld, 0 );
- STORE( bld, *inst, 0, 0, chan_index );
+ dst0[chan_index] = tmp0;
}
break;
+#endif
case TGSI_OPCODE_BRA:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_CAL:
+ /* FIXME */
return 0;
break;
case TGSI_OPCODE_RET:
- emit_ret( bld );
+ /* FIXME */
+ return 0;
break;
-#endif
case TGSI_OPCODE_END:
break;
-#if 0
case TGSI_OPCODE_SSG:
/* TGSI_OPCODE_SGN */
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( bld, *inst, 0, 0, chan_index );
- emit_sgn( bld, 0, 0 );
- STORE( bld, *inst, 0, 0, chan_index );
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
}
break;
case TGSI_OPCODE_CMP:
- emit_cmp (bld, inst);
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ src2 = emit_fetch( bld, inst, 2, chan_index );
+ tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
+ }
break;
case TGSI_OPCODE_SCS:
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
- FETCH( bld, *inst, 0, 0, CHAN_X );
- emit_cos( bld, 0, 0 );
- STORE( bld, *inst, 0, 0, CHAN_X );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
- FETCH( bld, *inst, 0, 0, CHAN_X );
- emit_sin( bld, 0, 0 );
- STORE( bld, *inst, 0, 0, CHAN_Y );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
- emit_tempf(
- bld,
- 0,
- TGSI_EXEC_TEMP_00000000_I,
- TGSI_EXEC_TEMP_00000000_C );
- STORE( bld, *inst, 0, 0, CHAN_Z );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
- emit_tempf(
- bld,
- 0,
- TEMP_ONE_I,
- TEMP_ONE_C );
- STORE( bld, *inst, 0, 0, CHAN_W );
+ IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
+ dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
+ dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
+ dst0[CHAN_Z] = bld->base.zero;
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
+ dst0[CHAN_W] = bld->base.one;
}
break;
-#endif
case TGSI_OPCODE_TXB:
- emit_tex( bld, inst, TRUE, FALSE );
+ emit_tex( bld, inst, TRUE, FALSE, dst0 );
break;
-#if 0
case TGSI_OPCODE_NRM:
/* fall-through */
case TGSI_OPCODE_NRM4:
{
uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
- if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) ||
- IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) ||
- IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z) ||
- (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W) && dims == 4)) {
+ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
+ IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
+ IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
+ (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
/* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
/* xmm4 = src.x */
/* xmm0 = src.x * src.x */
- FETCH(bld, *inst, 0, 0, CHAN_X);
- if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
- emit_MOV(bld, 4, 0);
+ tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
+ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
+ tmp4 = tmp0;
}
- emit_mul(bld, 0, 0);
+ tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
/* xmm5 = src.y */
/* xmm0 = xmm0 + src.y * src.y */
- FETCH(bld, *inst, 1, 0, CHAN_Y);
- if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
- emit_MOV(bld, 5, 1);
+ tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
+ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
+ tmp5 = tmp1;
}
- emit_mul(bld, 1, 1);
- emit_add(bld, 0, 1);
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
/* xmm6 = src.z */
/* xmm0 = xmm0 + src.z * src.z */
- FETCH(bld, *inst, 1, 0, CHAN_Z);
- if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
- emit_MOV(bld, 6, 1);
+ tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
+ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
+ tmp6 = tmp1;
}
- emit_mul(bld, 1, 1);
- emit_add(bld, 0, 1);
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
if (dims == 4) {
/* xmm7 = src.w */
/* xmm0 = xmm0 + src.w * src.w */
- FETCH(bld, *inst, 1, 0, CHAN_W);
- if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) {
- emit_MOV(bld, 7, 1);
+ tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
+ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
+ tmp7 = tmp1;
}
- emit_mul(bld, 1, 1);
- emit_add(bld, 0, 1);
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
}
/* xmm1 = 1 / sqrt(xmm0) */
- emit_rsqrt(bld, 1, 0);
+ tmp1 = lp_build_rsqrt( &bld->base, tmp0);
/* dst.x = xmm1 * src.x */
- if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
- emit_mul(bld, 4, 1);
- STORE(bld, *inst, 4, 0, CHAN_X);
+ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
+ dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
}
/* dst.y = xmm1 * src.y */
- if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
- emit_mul(bld, 5, 1);
- STORE(bld, *inst, 5, 0, CHAN_Y);
+ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
+ dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
}
/* dst.z = xmm1 * src.z */
- if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
- emit_mul(bld, 6, 1);
- STORE(bld, *inst, 6, 0, CHAN_Z);
+ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
+ dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
}
/* dst.w = xmm1 * src.w */
- if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) && dims == 4) {
- emit_mul(bld, 7, 1);
- STORE(bld, *inst, 7, 0, CHAN_W);
+ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
+ dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
}
}
- /* dst0.w = 1.0 */
- if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W) && dims == 3) {
- emit_tempf(bld, 0, TEMP_ONE_I, TEMP_ONE_C);
- STORE(bld, *inst, 0, 0, CHAN_W);
+ /* dst.w = 1.0 */
+ if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
+ dst0[CHAN_W] = bld->base.one;
}
}
break;
case TGSI_OPCODE_DIV:
+ /* deprecated */
+ assert( 0 );
return 0;
break;
case TGSI_OPCODE_DP2:
- FETCH( bld, *inst, 0, 0, CHAN_X ); /* xmm0 = src[0].x */
- FETCH( bld, *inst, 1, 1, CHAN_X ); /* xmm1 = src[1].x */
- emit_mul( bld, 0, 1 ); /* xmm0 = xmm0 * xmm1 */
- FETCH( bld, *inst, 1, 0, CHAN_Y ); /* xmm1 = src[0].y */
- FETCH( bld, *inst, 2, 1, CHAN_Y ); /* xmm2 = src[1].y */
- emit_mul( bld, 1, 2 ); /* xmm1 = xmm1 * xmm2 */
- emit_add( bld, 0, 1 ); /* xmm0 = xmm0 + xmm1 */
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( bld, *inst, 0, 0, chan_index ); /* dest[ch] = xmm0 */
+ tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */
+ tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */
+ tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */
+ tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */
+ tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */
+ tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
+ tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
}
break;
-#endif
case TGSI_OPCODE_TXL:
- emit_tex( bld, inst, TRUE, FALSE );
+ emit_tex( bld, inst, TRUE, FALSE, dst0 );
break;
case TGSI_OPCODE_TXP:
- emit_tex( bld, inst, FALSE, TRUE );
+ emit_tex( bld, inst, FALSE, TRUE, dst0 );
break;
-#if 0
case TGSI_OPCODE_BRK:
+ /* FIXME */
return 0;
break;
case TGSI_OPCODE_IF:
+ /* FIXME */
return 0;
break;
- case TGSI_OPCODE_LOOP:
+ case TGSI_OPCODE_BGNFOR:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_REP:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_ELSE:
+ /* FIXME */
return 0;
break;
case TGSI_OPCODE_ENDIF:
+ /* FIXME */
return 0;
break;
- case TGSI_OPCODE_ENDLOOP:
+ case TGSI_OPCODE_ENDFOR:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_ENDREP:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_PUSHA:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_POPA:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_CEIL:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
+ }
break;
case TGSI_OPCODE_I2F:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_NOT:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_TRUNC:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( bld, *inst, 0, 0, chan_index );
- emit_f2it( bld, 0 );
- emit_i2f( bld, 0 );
- STORE( bld, *inst, 0, 0, chan_index );
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
}
break;
case TGSI_OPCODE_SHL:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_SHR:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_AND:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_OR:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_MOD:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_XOR:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_SAD:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_TXF:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_TXQ:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_CONT:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_ENDPRIM:
return 0;
break;
-#endif
+
+ case TGSI_OPCODE_NOP:
+ break;
default:
return 0;
}
- return 1;
-}
-
-static void
-emit_declaration(
- struct lp_build_tgsi_soa_context *bld,
- struct tgsi_full_declaration *decl )
-{
-#if 0
- if( decl->Declaration.File == TGSI_FILE_INPUT ) {
- unsigned first, last, mask;
- unsigned i, j;
- LLVMValueRef tmp;
-
- first = decl->DeclarationRange.First;
- last = decl->DeclarationRange.Last;
- mask = decl->Declaration.UsageMask;
-
- for( i = first; i <= last; i++ ) {
- for( j = 0; j < NUM_CHANNELS; j++ ) {
- if( mask & (1 << j) ) {
- switch( decl->Declaration.Interpolate ) {
- case TGSI_INTERPOLATE_CONSTANT:
- bld->inputs[i][j] = bld->interp_coefs[i].a0[j];
- break;
-
- case TGSI_INTERPOLATE_LINEAR:
- tmp = bld->interp_coefs[i].a0[j];
- tmp = lp_build_add(&bld->base, tmp, lp_build_mul(&bld->base, bld->pos[0], bld->interp_coefs[i].dadx[j]));
- tmp = lp_build_add(&bld->base, tmp, lp_build_mul(&bld->base, bld->pos[1], bld->interp_coefs[i].dady[j]));
- bld->inputs[i][j] = tmp;
- break;
-
- case TGSI_INTERPOLATE_PERSPECTIVE:
- tmp = bld->interp_coefs[i].a0[j];
- tmp = lp_build_add(&bld->base, tmp, lp_build_mul(&bld->base, bld->pos[0], bld->interp_coefs[i].dadx[j]));
- tmp = lp_build_add(&bld->base, tmp, lp_build_mul(&bld->base, bld->pos[1], bld->interp_coefs[i].dady[j]));
- tmp = lp_build_div(&bld->base, tmp, bld->pos[3]);
- bld->inputs[i][j] = tmp;
- break;
-
- default:
- assert( 0 );
- break;
- }
- }
- }
+ if(info->num_dst) {
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
}
}
-#endif
+
+ return 1;
}
-/**
- * Translate a TGSI vertex/fragment shader to SSE2 code.
- * Slightly different things are done for vertex vs. fragment shaders.
- *
- * \param tokens the TGSI input shader
- * \param bld the output SSE code/function
- * \param immediates buffer to place immediates, later passed to SSE bld
- * \param return 1 for success, 0 if translation failed
- */
+
void
lp_build_tgsi_soa(LLVMBuilderRef builder,
const struct tgsi_token *tokens,
- union lp_type type,
- LLVMValueRef (*inputs)[4],
+ struct lp_type type,
+ struct lp_build_mask_context *mask,
LLVMValueRef consts_ptr,
- LLVMValueRef (*outputs)[4],
- LLVMValueRef samplers_ptr)
+ const LLVMValueRef *pos,
+ const LLVMValueRef (*inputs)[NUM_CHANNELS],
+ LLVMValueRef (*outputs)[NUM_CHANNELS],
+ struct lp_build_sampler_soa *sampler)
{
struct lp_build_tgsi_soa_context bld;
struct tgsi_parse_context parse;
/* Setup build context */
memset(&bld, 0, sizeof bld);
lp_build_context_init(&bld.base, builder, type);
+ bld.mask = mask;
+ bld.pos = pos;
bld.inputs = inputs;
bld.outputs = outputs;
bld.consts_ptr = consts_ptr;
- bld.samplers_ptr = samplers_ptr;
+ bld.sampler = sampler;
tgsi_parse_init( &parse, tokens );
switch( parse.FullToken.Token.Type ) {
case TGSI_TOKEN_TYPE_DECLARATION:
- if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
- emit_declaration( &bld, &parse.FullToken.FullDeclaration );
- }
+ /* Inputs already interpolated */
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
- if (!emit_instruction( &bld, &parse.FullToken.FullInstruction )) {
- debug_printf("failed to translate tgsi opcode %d to SSE (%s)\n",
- parse.FullToken.FullInstruction.Instruction.Opcode,
- parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ?
- "vertex shader" : "fragment shader");
- }
+ {
+ unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
+ const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
+ if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
+ _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
+ info ? info->mnemonic : "<invalid>");
+ }
+
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
assert(num_immediates < LP_MAX_IMMEDIATES);
for( i = 0; i < size; ++i )
bld.immediates[num_immediates][i] =
- lp_build_const_uni(type, parse.FullToken.FullImmediate.u[i].Float);
+ lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float);
for( i = size; i < 4; ++i )
bld.immediates[num_immediates][i] = bld.base.undef;
num_immediates++;