#if defined(PIPE_ARCH_PPC)
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_sse.h"
+#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi_dump.h"
* How many TGSI temps should be implemented with real PPC vector registers
* rather than memory.
*/
-#define MAX_PPC_TEMPS 4
-
-
-struct reg_chan_vec
-{
- struct tgsi_full_src_register src;
- uint chan;
- uint vec;
-};
+#define MAX_PPC_TEMPS 3
/**
}
+/**
+ * Is the given TGSI register stored as a real PPC vector register?
+ */
+static boolean
+is_ppc_vec_temporary(const struct tgsi_full_src_register *reg)
+{
+ return (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
+ reg->SrcRegister.Index < MAX_PPC_TEMPS);
+}
+
+
+/**
+ * Is the given TGSI register stored as a real PPC vector register?
+ */
+static boolean
+is_ppc_vec_temporary_dst(const struct tgsi_full_dst_register *reg)
+{
+ return (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
+ reg->DstRegister.Index < MAX_PPC_TEMPS);
+}
+
+
+
/**
* All PPC vector load/store instructions form an effective address
* by adding the contents of two registers. For example:
const struct tgsi_full_src_register *reg,
const unsigned chan_index)
{
- uint swizzle = tgsi_util_get_full_src_register_extswizzle(reg, chan_index);
+ uint swizzle = tgsi_util_get_full_src_register_swizzle(reg, chan_index);
int dst_vec = -1;
switch (swizzle) {
- case TGSI_EXTSWIZZLE_X:
- case TGSI_EXTSWIZZLE_Y:
- case TGSI_EXTSWIZZLE_Z:
- case TGSI_EXTSWIZZLE_W:
+ case TGSI_SWIZZLE_X:
+ case TGSI_SWIZZLE_Y:
+ case TGSI_SWIZZLE_Z:
+ case TGSI_SWIZZLE_W:
switch (reg->SrcRegister.File) {
case TGSI_FILE_INPUT:
{
}
break;
case TGSI_FILE_TEMPORARY:
- if (reg->SrcRegister.Index < MAX_PPC_TEMPS) {
+ if (is_ppc_vec_temporary(reg)) {
/* use PPC vec register */
dst_vec = gen->temps_map[reg->SrcRegister.Index][swizzle];
}
assert( 0 );
}
break;
- case TGSI_EXTSWIZZLE_ZERO:
- ppc_vzero(gen->f, dst_vec);
- break;
- case TGSI_EXTSWIZZLE_ONE:
- {
- int one_vec = gen_one_vec(gen);
- dst_vec = ppc_allocate_vec_register(gen->f);
- ppc_vmove(gen->f, dst_vec, one_vec);
- }
- break;
default:
assert( 0 );
}
uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index);
if (sign_op != TGSI_UTIL_SIGN_KEEP) {
int bit31_vec = gen_get_bit31_vec(gen);
+ int dst_vec2;
+
+ if (is_ppc_vec_temporary(reg)) {
+ /* need to use a new temp */
+ dst_vec2 = ppc_allocate_vec_register(gen->f);
+ }
+ else {
+ dst_vec2 = dst_vec;
+ }
switch (sign_op) {
case TGSI_UTIL_SIGN_CLEAR:
/* vec = vec & ~bit31 */
- ppc_vandc(gen->f, dst_vec, dst_vec, bit31_vec);
+ ppc_vandc(gen->f, dst_vec2, dst_vec, bit31_vec);
break;
case TGSI_UTIL_SIGN_SET:
/* vec = vec | bit31 */
- ppc_vor(gen->f, dst_vec, dst_vec, bit31_vec);
+ ppc_vor(gen->f, dst_vec2, dst_vec, bit31_vec);
break;
case TGSI_UTIL_SIGN_TOGGLE:
/* vec = vec ^ bit31 */
- ppc_vxor(gen->f, dst_vec, dst_vec, bit31_vec);
+ ppc_vxor(gen->f, dst_vec2, dst_vec, bit31_vec);
break;
default:
assert(0);
}
+ return dst_vec2;
}
}
return FALSE;
if (a->SrcRegister.Index != b->SrcRegister.Index)
return FALSE;
- swz_a = tgsi_util_get_full_src_register_extswizzle(a, chan_a);
- swz_b = tgsi_util_get_full_src_register_extswizzle(b, chan_b);
+ swz_a = tgsi_util_get_full_src_register_swizzle(a, chan_a);
+ swz_b = tgsi_util_get_full_src_register_swizzle(b, chan_b);
if (swz_a != swz_b)
return FALSE;
sign_a = tgsi_util_get_full_src_register_sign_mode(a, chan_a);
uint i;
for (i = 0; i < gen->num_regs; i++) {
const const struct tgsi_full_src_register src = gen->regs[i].src;
- if (!(src.SrcRegister.File == TGSI_FILE_TEMPORARY &&
- src.SrcRegister.Index < MAX_PPC_TEMPS)) {
+ if (!is_ppc_vec_temporary(&src)) {
ppc_release_vec_register(gen->f, gen->regs[i].vec);
}
}
{
const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0];
- if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
- reg->DstRegister.Index < MAX_PPC_TEMPS) {
+ if (is_ppc_vec_temporary_dst(reg)) {
int vec = gen->temps_map[reg->DstRegister.Index][chan_index];
return vec;
}
}
break;
case TGSI_FILE_TEMPORARY:
- if (reg->DstRegister.Index < MAX_PPC_TEMPS) {
+ if (is_ppc_vec_temporary_dst(reg)) {
if (!free_vec) {
int dst_vec = gen->temps_map[reg->DstRegister.Index][chan_index];
if (dst_vec != src_vec)
emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
{
uint chan_index;
+
FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
int v0 = get_src_vec(gen, inst, 0, chan_index); /* v0 = srcreg[0] */
int v1 = get_dst_vec(gen, inst, chan_index);
ppc_vandc(gen->f, v1, v0, bit31_vec); /* v1 = v0 & ~bit31 */
}
break;
- case TGSI_OPCODE_FLOOR:
+ case TGSI_OPCODE_FLR:
ppc_vrfim(gen->f, v1, v0); /* v1 = floor(v0) */
break;
- case TGSI_OPCODE_FRAC:
+ case TGSI_OPCODE_FRC:
ppc_vrfim(gen->f, v1, v0); /* tmp = floor(v0) */
ppc_vsubfp(gen->f, v1, v0, v1); /* v1 = v0 - v1 */
break;
- case TGSI_OPCODE_EXPBASE2:
+ case TGSI_OPCODE_EX2:
ppc_vexptefp(gen->f, v1, v0); /* v1 = 2^v0 */
break;
- case TGSI_OPCODE_LOGBASE2:
+ case TGSI_OPCODE_LG2:
/* XXX this may be broken! */
ppc_vlogefp(gen->f, v1, v0); /* v1 = log2(v0) */
break;
case TGSI_OPCODE_MOV:
- case TGSI_OPCODE_SWZ:
if (v0 != v1)
ppc_vmove(gen->f, v1, v0);
break;
v2 = ppc_allocate_vec_register(gen->f);
- ppc_vxor(gen->f, v2, v2, v2); /* v2 = {0, 0, 0, 0} */
+ ppc_vzero(gen->f, v2); /* v2 = {0, 0, 0, 0} */
v0 = get_src_vec(gen, inst, 0, CHAN_X); /* v0 = src0.XXXX */
v1 = get_src_vec(gen, inst, 1, CHAN_X); /* v1 = src1.XXXX */
ppc_vzero(f, zero_vec);
ppc_vlogefp(f, t_vec, va); /* t = log2(va) */
- ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec); /* t = t * vb */
+ ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec); /* t = t * vb + zero */
ppc_vexptefp(f, vr, t_vec); /* vr = 2^t */
ppc_release_vec_register(f, t_vec);
emit_instruction(struct gen_context *gen,
struct tgsi_full_instruction *inst)
{
+
+ /* we don't handle saturation/clamping yet */
+ if (inst->Instruction.Saturate != TGSI_SAT_NONE)
+ return 0;
+
+ /* need to use extra temps to fix SOA dependencies : */
+ if (tgsi_check_soa_dependencies(inst))
+ return FALSE;
+
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_MOV:
- case TGSI_OPCODE_SWZ:
case TGSI_OPCODE_ABS:
- case TGSI_OPCODE_FLOOR:
- case TGSI_OPCODE_FRAC:
- case TGSI_OPCODE_EXPBASE2:
- case TGSI_OPCODE_LOGBASE2:
+ case TGSI_OPCODE_FLR:
+ case TGSI_OPCODE_FRC:
+ case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_LG2:
emit_unaryop(gen, inst);
break;
case TGSI_OPCODE_RSQ:
static void
emit_epilogue(struct ppc_function *func)
{
+ ppc_comment(func, -4, "Epilogue:");
ppc_return(func);
/* XXX restore prev stack frame */
+#if 0
debug_printf("PPC: Emitted %u instructions\n", func->num_inst);
+#endif
}
unsigned ok = 1;
uint num_immediates = 0;
struct gen_context gen;
+ uint ic = 0;
if (use_ppc_asm < 0) {
/* If GALLIUM_NOPPC is set, don't use PPC codegen */
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
+ if (func->print) {
+ _debug_printf("# ");
+ ic++;
+ tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic);
+ }
+
ok = emit_instruction(&gen, &parse.FullToken.FullInstruction);
if (!ok) {
- debug_printf("failed to translate tgsi opcode %d to PPC (%s)\n",
- parse.FullToken.FullInstruction.Instruction.Opcode,
+ uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
+ debug_printf("failed to translate tgsi opcode %d (%s) to PPC (%s)\n",
+ opcode,
+ tgsi_get_opcode_name(opcode),
parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ?
"vertex shader" : "fragment shader");
}
case TGSI_TOKEN_TYPE_IMMEDIATE:
/* splat each immediate component into a float[4] vector for SoA */
{
- const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
+ const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
uint i;
assert(size <= 4);
assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
for (i = 0; i < size; i++) {
immediates[num_immediates][i] =
- parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
+ parse.FullToken.FullImmediate.u[i].Float;
}
num_immediates++;
}