#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_exec.h"
#ifdef PIPE_ARCH_X86
#define DISASSEM 0
+#define FAST_MATH 1
static const char *files[] =
{
return TRUE;
}
+
+
/* A wrapper for powf().
* Makes sure it is cdecl and operates on floats.
*/
static float PIPE_CDECL _powerf( float x, float y )
{
+#if FAST_MATH
+ return util_fast_pow(x, y);
+#else
return powf( x, y );
+#endif
}
+#if FAST_MATH
+static float PIPE_CDECL _exp2(float x)
+{
+ return util_fast_exp2(x);
+}
+#endif
+
+
/* Really not sufficient -- need to check for conditions that could
* generate inf/nan values, which will slow things down hugely.
*/
}
+#if FAST_MATH
+static boolean emit_EXPBASE2( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ uint i;
+
+ /* For absolute correctness, need to spill/invalidate all XMM regs
+ * too.
+ */
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].dirty)
+ spill(cp, i);
+ aos_release_xmm_reg(cp, i);
+ }
+
+ /* Push caller-save (ie scratch) regs.
+ */
+ x86_cdecl_caller_push_regs( cp->func );
+
+ x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, -4) );
+
+ x87_fld_src( cp, &op->FullSrcRegisters[0], 0 );
+ x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) );
+
+ /* tmp_EAX has been pushed & will be restored below */
+ x86_mov_reg_imm( cp->func, cp->tmp_EAX, (unsigned long) _exp2 );
+ x86_call( cp->func, cp->tmp_EAX );
+
+ x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, 4) );
+
+ x86_cdecl_caller_pop_regs( cp->func );
+
+ /* Note retval on x87 stack:
+ */
+ cp->func->x87_stack++;
+
+ x87_fstp_dest4( cp, &op->FullDstRegisters[0] );
+
+ return TRUE;
+}
+#endif
+
+
static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
return emit_RND(cp, inst);
case TGSI_OPCODE_EXPBASE2:
-#if 0
+#if FAST_MATH
+ return emit_EXPBASE2(cp, inst);
+#elif 0
/* this seems to fail for "larger" exponents.
* See glean tvertProg1's EX2 test.
*/
struct aos_compilation cp;
unsigned fixup, label;
+ util_init_math();
+
tgsi_parse_init( &parse, varient->base.vs->state.tokens );
memset(&cp, 0, sizeof(cp));
-#endif
+#endif /* PIPE_ARCH_X86 */
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi_exec.h"
+#include "util/u_math.h"
+
+#define FAST_MATH 1
#define TILE_TOP_LEFT 0
#define TILE_TOP_RIGHT 1
tgsi_dump(tokens, 0);
#endif
+ util_init_math();
+
mach->Tokens = tokens;
mach->Samplers = samplers;
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src)
{
+#if FAST_MATH
+ dst->f[0] = util_fast_exp2( src->f[0] );
+ dst->f[1] = util_fast_exp2( src->f[1] );
+ dst->f[2] = util_fast_exp2( src->f[2] );
+ dst->f[3] = util_fast_exp2( src->f[3] );
+#else
dst->f[0] = powf( 2.0f, src->f[0] );
dst->f[1] = powf( 2.0f, src->f[1] );
dst->f[2] = powf( 2.0f, src->f[2] );
dst->f[3] = powf( 2.0f, src->f[3] );
+#endif
}
static void
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
{
+#if FAST_MATH
+ dst->f[0] = util_fast_log2( src->f[0] );
+ dst->f[1] = util_fast_log2( src->f[1] );
+ dst->f[2] = util_fast_log2( src->f[2] );
+ dst->f[3] = util_fast_log2( src->f[3] );
+#else
dst->f[0] = logf( src->f[0] ) * 1.442695f;
dst->f[1] = logf( src->f[1] ) * 1.442695f;
dst->f[2] = logf( src->f[2] ) * 1.442695f;
dst->f[3] = logf( src->f[3] ) * 1.442695f;
+#endif
}
static void
const union tgsi_exec_channel *src0,
const union tgsi_exec_channel *src1 )
{
+#if FAST_MATH
+ dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
+ dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
+ dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
+ dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
+#else
dst->f[0] = powf( src0->f[0], src1->f[0] );
dst->f[1] = powf( src0->f[1], src1->f[1] );
dst->f[2] = powf( src0->f[2], src1->f[2] );
dst->f[3] = powf( src0->f[3], src1->f[3] );
+#endif
}
static void
/* TGSI_OPCODE_EX2 */
FETCH(&r[0], 0, CHAN_X);
+#if FAST_MATH
+ micro_exp2( &r[0], &r[0] );
+#else
micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
+#endif
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( &r[0], 0, chan_index );
#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi_exec.h"
*/
#define HIGH_PRECISION 1
+#define FAST_MATH 1
+
#define FOR_EACH_CHANNEL( CHAN )\
for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
{
const unsigned X = 0;
+#if FAST_MATH
+ store[X + 0] = util_fast_exp2( store[X + 0] );
+ store[X + 1] = util_fast_exp2( store[X + 1] );
+ store[X + 2] = util_fast_exp2( store[X + 2] );
+ store[X + 3] = util_fast_exp2( store[X + 3] );
+#else
store[X + 0] = powf( 2.0f, store[X + 0] );
store[X + 1] = powf( 2.0f, store[X + 1] );
store[X + 2] = powf( 2.0f, store[X + 2] );
store[X + 3] = powf( 2.0f, store[X + 3] );
+#endif
}
static void
{
const unsigned X = 0;
+#if FAST_MATH
+ store[X + 0] = util_fast_pow( store[X + 0], store[X + 4] );
+ store[X + 1] = util_fast_pow( store[X + 1], store[X + 5] );
+ store[X + 2] = util_fast_pow( store[X + 2], store[X + 6] );
+ store[X + 3] = util_fast_pow( store[X + 3], store[X + 7] );
+#else
store[X + 0] = powf( store[X + 0], store[X + 4] );
store[X + 1] = powf( store[X + 1], store[X + 5] );
store[X + 2] = powf( store[X + 2], store[X + 6] );
store[X + 3] = powf( store[X + 3], store[X + 7] );
+#endif
}
static void
unsigned ok = 1;
uint num_immediates = 0;
+ util_init_math();
+
func->csr = func->store;
tgsi_parse_init( &parse, tokens );