#include "util/u_memory.h"
#include "util/u_math.h"
#include "pipe/p_shader_tokens.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_exec.h"
return x86_make_disp(aos_get_x86(cp, 1, X86_CONSTANTS), idx * 4 * sizeof(float));
default:
- ERROR(cp, "unknown reg file");
+ AOS_ERROR(cp, "unknown reg file");
return x86_make_reg(0,0);
}
}
(cp->xmm[idx].file != TGSI_FILE_INPUT && /* inputs are fetched into xmm & set dirty */
cp->xmm[idx].file != TGSI_FILE_OUTPUT &&
cp->xmm[idx].file != TGSI_FILE_TEMPORARY)) {
- ERROR(cp, "invalid spill");
+ AOS_ERROR(cp, "invalid spill");
return;
}
else {
}
+static void aos_soft_release_xmm( struct aos_compilation *cp,
+ struct x86_reg reg )
+{
+ if (reg.file == file_XMM) {
+ assert(cp->xmm[reg.idx].last_used == cp->insn_counter);
+ cp->xmm[reg.idx].last_used = cp->insn_counter - 1;
+ }
+}
+
/* Mark an xmm reg as holding the current copy of a shader reg.
unsigned abs = 0;
for (i = 0; i < 4; i++) {
- unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( src, i );
+ unsigned swizzle = tgsi_util_get_full_src_register_swizzle( src, i );
unsigned neg = tgsi_util_get_full_src_register_sign_mode( src, i );
- switch (swizzle) {
- case TGSI_EXTSWIZZLE_ZERO:
- case TGSI_EXTSWIZZLE_ONE:
- ERROR(cp, "not supporting full swizzles yet in tgsi_aos_sse2");
- break;
-
- default:
- swz |= (swizzle & 0x3) << (i * 2);
- break;
- }
+ swz |= (swizzle & 0x3) << (i * 2);
switch (neg) {
case TGSI_UTIL_SIGN_TOGGLE:
break;
default:
- ERROR(cp, "unsupported sign-mode");
+ AOS_ERROR(cp, "unsupported sign-mode");
break;
}
}
sse_mulps(cp->func, dst, tmp);
aos_release_xmm_reg(cp, tmp.idx);
+ aos_soft_release_xmm(cp, imm_swz);
}
else if (negs) {
struct x86_reg imm_negs = aos_get_internal_xmm(cp, IMM_NEGS);
sse_mulps(cp->func, dst, imm_negs);
+ aos_soft_release_xmm(cp, imm_negs);
}
if (abs && abs != 0xf) {
- ERROR(cp, "unsupported partial abs");
+ AOS_ERROR(cp, "unsupported partial abs");
}
else if (abs) {
struct x86_reg neg = aos_get_internal(cp, IMM_NEGS);
sse_maxps(cp->func, dst, tmp);
aos_release_xmm_reg(cp, tmp.idx);
+ aos_soft_release_xmm(cp, neg);
}
+ aos_soft_release_xmm(cp, arg0);
return dst;
}
src->SrcRegister.File,
src->SrcRegister.Index);
- unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( src, channel );
+ unsigned swizzle = tgsi_util_get_full_src_register_swizzle( src, channel );
unsigned neg = tgsi_util_get_full_src_register_sign_mode( src, channel );
- switch (swizzle) {
- case TGSI_EXTSWIZZLE_ZERO:
- x87_fldz( cp->func );
- break;
-
- case TGSI_EXTSWIZZLE_ONE:
- x87_fld1( cp->func );
- break;
-
- default:
- x87_fld( cp->func, x86_make_disp(arg0, (swizzle & 3) * sizeof(float)) );
- break;
- }
-
+ x87_fld( cp->func, x86_make_disp(arg0, (swizzle & 3) * sizeof(float)) );
switch (neg) {
case TGSI_UTIL_SIGN_TOGGLE:
break;
default:
- ERROR(cp, "unsupported sign-mode");
+ AOS_ERROR(cp, "unsupported sign-mode");
break;
}
}
struct x86_reg st1 = x86_make_reg(file_x87, 1);
int stack = cp->func->x87_stack;
-// set_fpu_round_neg_inf( cp );
+ /* set_fpu_round_neg_inf( cp ); */
x87_fld(cp->func, st0); /* a a */
x87_fprndint( cp->func ); /* int(a) a*/
*/
static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
-
if (0) {
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg r = aos_get_xmm_reg(cp);
return TRUE;
}
else {
- struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
- struct x86_reg r = aos_get_xmm_reg(cp);
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg r = aos_get_xmm_reg(cp);
struct x86_reg neg_half = get_reg_ptr( cp, AOS_FILE_INTERNAL, IMM_RSQ );
struct x86_reg one_point_five = x86_make_disp( neg_half, 4 );
struct x86_reg src = get_xmm_writable( cp, arg0 );
-
- sse_rsqrtss( cp->func, r, src ); /* rsqrtss(a) */
- sse_mulss( cp->func, src, neg_half ); /* -.5 * a */
- sse_mulss( cp->func, src, r ); /* -.5 * a * r */
- sse_mulss( cp->func, src, r ); /* -.5 * a * r * r */
- sse_addss( cp->func, src, one_point_five ); /* 1.5 - .5 * a * r * r */
- sse_mulss( cp->func, r, src ); /* r * (1.5 - .5 * a * r * r) */
+ struct x86_reg neg = aos_get_internal(cp, IMM_NEGS);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+
+ sse_movaps(cp->func, tmp, src);
+ sse_mulps(cp->func, tmp, neg);
+ sse_maxps(cp->func, tmp, src);
+
+ sse_rsqrtss( cp->func, r, tmp ); /* rsqrtss(a) */
+ sse_mulss( cp->func, tmp, neg_half ); /* -.5 * a */
+ sse_mulss( cp->func, tmp, r ); /* -.5 * a * r */
+ sse_mulss( cp->func, tmp, r ); /* -.5 * a * r * r */
+ sse_addss( cp->func, tmp, one_point_five ); /* 1.5 - .5 * a * r * r */
+ sse_mulss( cp->func, r, tmp ); /* r * (1.5 - .5 * a * r * r) */
store_scalar_dest(cp, &op->FullDstRegisters[0], r);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+
return TRUE;
}
}
case TGSI_OPCODE_SUB:
return emit_SUB(cp, inst);
- case TGSI_OPCODE_LERP:
-// return emit_LERP(cp, inst);
+ case TGSI_OPCODE_LRP:
+ /*return emit_LERP(cp, inst);*/
return FALSE;
- case TGSI_OPCODE_FRAC:
+ case TGSI_OPCODE_FRC:
return emit_FRC(cp, inst);
case TGSI_OPCODE_CLAMP:
-// return emit_CLAMP(cp, inst);
+ /*return emit_CLAMP(cp, inst);*/
return FALSE;
- case TGSI_OPCODE_FLOOR:
+ case TGSI_OPCODE_FLR:
return emit_FLR(cp, inst);
case TGSI_OPCODE_ROUND:
return emit_RND(cp, inst);
- case TGSI_OPCODE_EXPBASE2:
+ case TGSI_OPCODE_EX2:
#if FAST_MATH
return emit_EXPBASE2(cp, inst);
#elif 0
return FALSE;
#endif
- case TGSI_OPCODE_LOGBASE2:
+ case TGSI_OPCODE_LG2:
return emit_LG2(cp, inst);
- case TGSI_OPCODE_POWER:
+ case TGSI_OPCODE_POW:
return emit_POW(cp, inst);
- case TGSI_OPCODE_CROSSPRODUCT:
+ case TGSI_OPCODE_XPD:
return emit_XPD(cp, inst);
case TGSI_OPCODE_ABS:
unsigned pos = cp->num_immediates++;
unsigned j;
+ assert( imm->Immediate.NrTokens <= 4 + 1 );
for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
- cp->vaos->machine->immediate[pos][j] = imm->u.ImmediateFloat32[j].Float;
+ cp->vaos->machine->immediate[pos][j] = imm->u[j].Float;
}
return TRUE;