#include "radeon_program_alu.h"
#include "radeon_swizzle.h"
#include "radeon_emulate_branches.h"
+#include "radeon_emulate_loops.h"
+#include "radeon_remove_constants.h"
+
+struct loop {
+ int BgnLoop;
+
+};
/*
* Take an already-setup and valid source then swizzle it appropriately to
t_swizzle(GET_SWZ(src->Swizzle, 2)),
t_swizzle(GET_SWZ(src->Swizzle, 3)),
t_src_class(src->File),
- src->Negate) | (src->RelAddr << 4);
+ src->Negate) |
+ (src->RelAddr << 4) | (src->Abs << 3);
}
static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
t_swizzle(GET_SWZ(src->Swizzle, 0)),
t_src_class(src->File),
src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
- (src->RelAddr << 4);
+ (src->RelAddr << 4) | (src->Abs << 3);
}
static int valid_dst(struct r300_vertex_program_code *vp,
inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
}
+static void mark_write(void * userdata, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ unsigned int * writemasks = userdata;
+
+ if (file != RC_FILE_TEMPORARY)
+ return;
+
+ if (index >= R300_VS_MAX_TEMPS)
+ return;
+
+ writemasks[index] |= mask;
+}
+
+static unsigned long t_pred_src(struct r300_vertex_program_compiler * compiler)
+{
+ return PVS_SRC_OPERAND(compiler->PredicateIndex,
+ t_swizzle(RC_SWIZZLE_ZERO),
+ t_swizzle(RC_SWIZZLE_ZERO),
+ t_swizzle(RC_SWIZZLE_ZERO),
+ t_swizzle(RC_SWIZZLE_W),
+ t_src_class(RC_FILE_TEMPORARY),
+ 0);
+}
+
+static unsigned long t_pred_dst(struct r300_vertex_program_compiler * compiler,
+ unsigned int hw_opcode, int is_math)
+{
+ return PVS_OP_DST_OPERAND(hw_opcode,
+ is_math,
+ 0,
+ compiler->PredicateIndex,
+ RC_MASK_W,
+ t_dst_class(RC_FILE_TEMPORARY));
+
+}
+
+static void ei_if(struct r300_vertex_program_compiler * compiler,
+ struct rc_instruction *rci,
+ unsigned int * inst,
+ unsigned int branch_depth)
+{
+ unsigned int predicate_opcode;
+ int is_math = 0;
+
+ if (!compiler->Base.is_r500) {
+ rc_error(&compiler->Base,"Opcode IF not supported\n");
+ return;
+ }
+
+ /* Reserve a temporary to use as our predicate stack counter, if we
+ * don't already have one. */
+ if (!compiler->PredicateMask) {
+ unsigned int writemasks[RC_REGISTER_MAX_INDEX];
+ struct rc_instruction * inst;
+ unsigned int i;
+ memset(writemasks, 0, sizeof(writemasks));
+ for(inst = compiler->Base.Program.Instructions.Next;
+ inst != &compiler->Base.Program.Instructions;
+ inst = inst->Next) {
+ rc_for_all_writes_mask(inst, mark_write, writemasks);
+ }
+ for(i = 0; i < compiler->Base.max_temp_regs; i++) {
+ unsigned int mask = ~writemasks[i] & RC_MASK_XYZW;
+ /* Only the W component can be used fo the predicate
+ * stack counter. */
+ if (mask & RC_MASK_W) {
+ compiler->PredicateMask = RC_MASK_W;
+ compiler->PredicateIndex = i;
+ break;
+ }
+ }
+ if (i == compiler->Base.max_temp_regs) {
+ rc_error(&compiler->Base, "No free temporary to use for"
+ " predicate stack counter.\n");
+ return;
+ }
+ }
+ predicate_opcode =
+ branch_depth ? VE_PRED_SET_NEQ_PUSH : ME_PRED_SET_NEQ;
+
+ rci->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(rci->U.I.SrcReg[0].Swizzle,0));
+ if (branch_depth == 0) {
+ is_math = 1;
+ predicate_opcode = ME_PRED_SET_NEQ;
+ inst[1] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
+ inst[2] = 0;
+ } else {
+ predicate_opcode = VE_PRED_SET_NEQ_PUSH;
+ inst[1] = t_pred_src(compiler);
+ inst[2] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
+ }
+
+ inst[0] = t_pred_dst(compiler, predicate_opcode, is_math);
+ inst[3] = 0;
+
+}
+
+static void ei_else(struct r300_vertex_program_compiler * compiler,
+ unsigned int * inst)
+{
+ if (!compiler->Base.is_r500) {
+ rc_error(&compiler->Base,"Opcode ELSE not supported\n");
+ return;
+ }
+ inst[0] = t_pred_dst(compiler, ME_PRED_SET_INV, 1);
+ inst[1] = t_pred_src(compiler);
+ inst[2] = 0;
+ inst[3] = 0;
+}
+
+static void ei_endif(struct r300_vertex_program_compiler *compiler,
+ unsigned int * inst)
+{
+ if (!compiler->Base.is_r500) {
+ rc_error(&compiler->Base,"Opcode ENDIF not supported\n");
+ return;
+ }
+ inst[0] = t_pred_dst(compiler, ME_PRED_SET_POP, 1);
+ inst[1] = t_pred_src(compiler);
+ inst[2] = 0;
+ inst[3] = 0;
+}
-static void translate_vertex_program(struct r300_vertex_program_compiler * compiler)
+static void translate_vertex_program(struct radeon_compiler *c, void *user)
{
+ struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c;
struct rc_instruction *rci;
+ struct loop * loops = NULL;
+ int current_loop_depth = 0;
+ int loops_reserved = 0;
+
+ unsigned int branch_depth = 0;
+
compiler->code->pos_end = 0; /* Not supported yet */
compiler->code->length = 0;
+ compiler->code->num_temporaries = 0;
compiler->SetHwInputOutput(compiler);
for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
struct rc_sub_instruction *vpi = &rci->U.I;
unsigned int *inst = compiler->code->body.d + compiler->code->length;
+ const struct rc_opcode_info *info = rc_get_opcode_info(vpi->Opcode);
/* Skip instructions writing to non-existing destination */
if (!valid_dst(compiler->code, &vpi->DstReg))
continue;
- if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) {
+ if (info->HasDstReg) {
+ /* Relative addressing of destination operands is not supported yet. */
+ if (vpi->DstReg.RelAddr) {
+ rc_error(&compiler->Base, "Vertex program does not support relative "
+ "addressing of destination operands (yet).\n");
+ return;
+ }
+
+ /* Neither is Saturate. */
+ if (vpi->SaturateMode != RC_SATURATE_NONE) {
+ rc_error(&compiler->Base, "Vertex program does not support the Saturate "
+ "modifier (yet).\n");
+ }
+ }
+
+ if (compiler->code->length >= c->max_alu_insts * 4) {
rc_error(&compiler->Base, "Vertex program has too many instructions\n");
return;
}
+ assert(compiler->Base.is_r500 ||
+ (vpi->Opcode != RC_OPCODE_SEQ &&
+ vpi->Opcode != RC_OPCODE_SNE));
+
switch (vpi->Opcode) {
case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
+ case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
+ case RC_OPCODE_ELSE: ei_else(compiler, inst); break;
+ case RC_OPCODE_ENDIF: ei_endif(compiler, inst); branch_depth--; break;
case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
+ case RC_OPCODE_IF: ei_if(compiler, rci, inst, branch_depth); branch_depth++; break;
case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
+ case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break;
case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
+ case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break;
case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
+ case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
+ case RC_OPCODE_BGNLOOP:
+ {
+ struct loop * l;
+
+ if ((!compiler->Base.is_r500
+ && loops_reserved >= R300_VS_MAX_LOOP_DEPTH)
+ || loops_reserved >= R500_VS_MAX_FC_DEPTH) {
+ rc_error(&compiler->Base,
+ "Loops are nested too deep.");
+ return;
+ }
+ memory_pool_array_reserve(&compiler->Base.Pool,
+ struct loop, loops, current_loop_depth,
+ loops_reserved, 1);
+ l = &loops[current_loop_depth++];
+ memset(l , 0, sizeof(struct loop));
+ l->BgnLoop = (compiler->code->length / 4);
+ continue;
+ }
+ case RC_OPCODE_ENDLOOP:
+ {
+ struct loop * l;
+ unsigned int act_addr;
+ unsigned int last_addr;
+ unsigned int ret_addr;
+
+ assert(loops);
+ l = &loops[current_loop_depth - 1];
+ act_addr = l->BgnLoop - 1;
+ last_addr = (compiler->code->length / 4) - 1;
+ ret_addr = l->BgnLoop;
+
+ if (loops_reserved >= R300_VS_MAX_FC_OPS) {
+ rc_error(&compiler->Base,
+ "Too many flow control instructions.");
+ return;
+ }
+ if (compiler->Base.is_r500) {
+ compiler->code->fc_op_addrs.r500
+ [compiler->code->num_fc_ops].lw =
+ R500_PVS_FC_ACT_ADRS(act_addr)
+ | R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff)
+ ;
+ compiler->code->fc_op_addrs.r500
+ [compiler->code->num_fc_ops].uw =
+ R500_PVS_FC_LAST_INST(last_addr)
+ | R500_PVS_FC_RTN_INST(ret_addr)
+ ;
+ } else {
+ compiler->code->fc_op_addrs.r300
+ [compiler->code->num_fc_ops] =
+ R300_PVS_FC_ACT_ADRS(act_addr)
+ | R300_PVS_FC_LOOP_CNT_JMP_INST(0xff)
+ | R300_PVS_FC_LAST_INST(last_addr)
+ | R300_PVS_FC_RTN_INST(ret_addr)
+ ;
+ }
+ compiler->code->fc_loop_index[compiler->code->num_fc_ops] =
+ R300_PVS_FC_LOOP_INIT_VAL(0x0)
+ | R300_PVS_FC_LOOP_STEP_VAL(0x1)
+ ;
+ compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP(
+ compiler->code->num_fc_ops);
+ compiler->code->num_fc_ops++;
+ current_loop_depth--;
+ continue;
+ }
+
default:
- rc_error(&compiler->Base, "Unknown opcode %s\n", rc_get_opcode_info(vpi->Opcode)->Name);
+ rc_error(&compiler->Base, "Unknown opcode %s\n", info->Name);
+ return;
+ }
+
+ /* Non-flow control instructions that are inside an if statement
+ * need to pay attention to the predicate bit. */
+ if (branch_depth
+ && vpi->Opcode != RC_OPCODE_IF
+ && vpi->Opcode != RC_OPCODE_ELSE
+ && vpi->Opcode != RC_OPCODE_ENDIF) {
+
+ inst[0] |= (PVS_DST_PRED_ENABLE_MASK
+ << PVS_DST_PRED_ENABLE_SHIFT);
+ inst[0] |= (PVS_DST_PRED_SENSE_MASK
+ << PVS_DST_PRED_SENSE_SHIFT);
+ }
+
+ /* Update the number of temporaries. */
+ if (info->HasDstReg && vpi->DstReg.File == RC_FILE_TEMPORARY &&
+ vpi->DstReg.Index >= compiler->code->num_temporaries)
+ compiler->code->num_temporaries = vpi->DstReg.Index + 1;
+
+ for (unsigned i = 0; i < info->NumSrcRegs; i++)
+ if (vpi->SrcReg[i].File == RC_FILE_TEMPORARY &&
+ vpi->SrcReg[i].Index >= compiler->code->num_temporaries)
+ compiler->code->num_temporaries = vpi->SrcReg[i].Index + 1;
+
+ if (compiler->PredicateMask)
+ if (compiler->PredicateIndex >= compiler->code->num_temporaries)
+ compiler->code->num_temporaries = compiler->PredicateIndex + 1;
+
+ if (compiler->code->num_temporaries > compiler->Base.max_temp_regs) {
+ rc_error(&compiler->Base, "Too many temporaries.\n");
return;
}
struct rc_instruction * LastRead;
};
-static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
+static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
{
+ struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c;
struct rc_instruction *inst;
+ struct rc_instruction *end_loop = NULL;
unsigned int num_orig_temps = 0;
- char hwtemps[VSF_MAX_FRAGMENT_TEMPS];
+ char hwtemps[RC_REGISTER_MAX_INDEX];
struct temporary_allocation * ta;
unsigned int i, j;
+ struct rc_instruction *last_inst_src_reladdr = NULL;
- compiler->code->num_temporaries = 0;
memset(hwtemps, 0, sizeof(hwtemps));
- /* Pass 1: Count original temporaries and allocate structures */
+ rc_recompute_ips(c);
+
+ /* Pass 1: Count original temporaries. */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
}
}
+ /* Pass 2: If there is relative addressing of dst temporaries, we cannot change register indices. Give up.
+ * For src temporaries, save the last instruction which uses relative addressing. */
+ for (inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (opcode->HasDstReg)
+ if (inst->U.I.DstReg.RelAddr)
+ return;
+
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY &&
+ inst->U.I.SrcReg[i].RelAddr) {
+ last_inst_src_reladdr = inst;
+ }
+ }
+ }
+
ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
sizeof(struct temporary_allocation) * num_orig_temps);
memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
- /* Pass 2: Determine original temporary lifetimes */
+ /* Pass 3: Determine original temporary lifetimes */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ /* Instructions inside of loops need to use the ENDLOOP
+ * instruction as their LastRead. */
+ if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ int endloops = 1;
+ struct rc_instruction * ptr;
+ for(ptr = inst->Next;
+ ptr != &compiler->Base.Program.Instructions;
+ ptr = ptr->Next){
+ if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ endloops++;
+ } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
+ endloops--;
+ if (endloops <= 0) {
+ end_loop = ptr;
+ break;
+ }
+ }
+ }
+ }
+
+ if (inst == end_loop) {
+ end_loop = NULL;
+ continue;
+ }
for (i = 0; i < opcode->NumSrcRegs; ++i) {
- if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY)
- ta[inst->U.I.SrcReg[i].Index].LastRead = inst;
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+ struct rc_instruction *last_read;
+
+ /* From "last_inst_src_reladdr", "end_loop", and "inst",
+ * select the instruction with the highest instruction index (IP).
+ * Note that "end_loop", if available, has always a higher index than "inst". */
+ if (last_inst_src_reladdr) {
+ if (end_loop) {
+ last_read = last_inst_src_reladdr->IP > end_loop->IP ?
+ last_inst_src_reladdr : end_loop;
+ } else {
+ last_read = last_inst_src_reladdr->IP > inst->IP ?
+ last_inst_src_reladdr : inst;
+ }
+ } else {
+ last_read = end_loop ? end_loop : inst;
+ }
+
+ ta[inst->U.I.SrcReg[i].Index].LastRead = last_read;
+ }
}
}
- /* Pass 3: Register allocation */
+ /* Pass 4: Register allocation */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- for (i = 0; i < opcode->NumSrcRegs; ++i) {
- if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
- unsigned int orig = inst->U.I.SrcReg[i].Index;
- inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
+ if (!last_inst_src_reladdr || last_inst_src_reladdr->IP < inst->IP) {
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+ unsigned int orig = inst->U.I.SrcReg[i].Index;
+ inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
- if (ta[orig].Allocated && inst == ta[orig].LastRead)
- hwtemps[ta[orig].HwTemp] = 0;
+ if (ta[orig].Allocated && inst == ta[orig].LastRead)
+ hwtemps[ta[orig].HwTemp] = 0;
+ }
}
}
unsigned int orig = inst->U.I.DstReg.Index;
if (!ta[orig].Allocated) {
- for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) {
+ for(j = 0; j < c->max_temp_regs; ++j) {
if (!hwtemps[j])
break;
}
- if (j >= VSF_MAX_FRAGMENT_TEMPS) {
- fprintf(stderr, "Out of hw temporaries\n");
+ if (j >= c->max_temp_regs) {
+ rc_error(c, "Too many temporaries\n");
+ return;
} else {
ta[orig].Allocated = 1;
- ta[orig].HwTemp = j;
- hwtemps[j] = 1;
-
- if (j >= compiler->code->num_temporaries)
- compiler->code->num_temporaries = j + 1;
+ if (last_inst_src_reladdr &&
+ last_inst_src_reladdr->IP > inst->IP) {
+ ta[orig].HwTemp = orig;
+ } else {
+ ta[orig].HwTemp = j;
+ }
+ hwtemps[ta[orig].HwTemp] = 1;
}
}
}
}
+/**
+ * R3xx-R4xx vertex engine does not support the Absolute source operand modifier
+ * and the Saturate opcode modifier. Only Absolute is currently transformed.
+ */
+static int transform_nonnative_modifiers(
+ struct radeon_compiler *c,
+ struct rc_instruction *inst,
+ void* unused)
+{
+ const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned i;
+
+ /* Transform ABS(a) to MAX(a, -a). */
+ for (i = 0; i < opcode->NumSrcRegs; i++) {
+ if (inst->U.I.SrcReg[i].Abs) {
+ struct rc_instruction *new_inst;
+ unsigned temp;
+
+ inst->U.I.SrcReg[i].Abs = 0;
+
+ temp = rc_find_free_temporary(c);
+
+ new_inst = rc_insert_new_instruction(c, inst->Prev);
+ new_inst->U.I.Opcode = RC_OPCODE_MAX;
+ new_inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ new_inst->U.I.DstReg.Index = temp;
+ new_inst->U.I.SrcReg[0] = inst->U.I.SrcReg[i];
+ new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i];
+ new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+
+ memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i]));
+ inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[i].Index = temp;
+ inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW;
+ }
+ }
+ return 1;
+}
/**
* Vertex engine cannot read two inputs or two constants at the same time.
return 1;
}
-static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
+static void rc_vs_add_artificial_outputs(struct radeon_compiler *c, void *user)
{
+ struct r300_vertex_program_compiler * compiler = (struct r300_vertex_program_compiler*)c;
int i;
for(i = 0; i < 32; ++i) {
return 1;
}
-static void debug_program_log(struct r300_vertex_program_compiler* c, const char * where)
+static void transform_negative_addressing(struct r300_vertex_program_compiler *c,
+ struct rc_instruction *arl,
+ struct rc_instruction *end,
+ int min_offset)
{
- if (c->Base.Debug) {
- fprintf(stderr, "Vertex Program: %s\n", where);
- rc_print_program(&c->Base.Program);
+ struct rc_instruction *inst, *add;
+ unsigned const_swizzle;
+
+ /* Transform ARL */
+ add = rc_insert_new_instruction(&c->Base, arl->Prev);
+ add->U.I.Opcode = RC_OPCODE_ADD;
+ add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ add->U.I.DstReg.Index = rc_find_free_temporary(&c->Base);
+ add->U.I.DstReg.WriteMask = RC_MASK_X;
+ add->U.I.SrcReg[0] = arl->U.I.SrcReg[0];
+ add->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+ add->U.I.SrcReg[1].Index = rc_constants_add_immediate_scalar(&c->Base.Program.Constants,
+ min_offset, &const_swizzle);
+ add->U.I.SrcReg[1].Swizzle = const_swizzle;
+
+ arl->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ arl->U.I.SrcReg[0].Index = add->U.I.DstReg.Index;
+ arl->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XXXX;
+
+ /* Rewrite offsets up to and excluding inst. */
+ for (inst = arl->Next; inst != end; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ for (unsigned i = 0; i < opcode->NumSrcRegs; i++)
+ if (inst->U.I.SrcReg[i].RelAddr)
+ inst->U.I.SrcReg[i].Index -= min_offset;
}
}
-
-static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
- .IsNative = &swizzle_is_native,
- .Split = 0 /* should never be called */
-};
-
-
-void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
+static void rc_emulate_negative_addressing(struct radeon_compiler *compiler, void *user)
{
- compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
-
- addArtificialOutputs(compiler);
+ struct r300_vertex_program_compiler * c = (struct r300_vertex_program_compiler*)compiler;
+ struct rc_instruction *inst, *lastARL = NULL;
+ int min_offset = 0;
- debug_program_log(compiler, "before compilation");
+ for (inst = c->Base.Program.Instructions.Next; inst != &c->Base.Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- /* XXX Ideally this should be done only for r3xx, but since
- * we don't have branching support for r5xx, we use the emulation
- * on all chipsets. */
- rc_emulate_branches(&compiler->Base);
+ if (inst->U.I.Opcode == RC_OPCODE_ARL) {
+ if (lastARL != NULL && min_offset < 0)
+ transform_negative_addressing(c, lastARL, inst, min_offset);
- debug_program_log(compiler, "after emulate branches");
+ lastARL = inst;
+ min_offset = 0;
+ continue;
+ }
- {
- struct radeon_program_transformation transformations[] = {
- { &r300_transform_vertex_alu, 0 },
- };
- radeonLocalTransform(&compiler->Base, 1, transformations);
- }
+ for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
+ if (inst->U.I.SrcReg[i].RelAddr &&
+ inst->U.I.SrcReg[i].Index < 0) {
+ /* ARL must precede any indirect addressing. */
+ if (lastARL == NULL) {
+ rc_error(&c->Base, "Vertex shader: Found relative addressing without ARL.");
+ return;
+ }
- debug_program_log(compiler, "after native rewrite");
-
- {
- /* Note: This pass has to be done seperately from ALU rewrite,
- * otherwise non-native ALU instructions with source conflits
- * will not be treated properly.
- */
- struct radeon_program_transformation transformations[] = {
- { &transform_source_conflicts, 0 },
- };
- radeonLocalTransform(&compiler->Base, 1, transformations);
+ if (inst->U.I.SrcReg[i].Index < min_offset)
+ min_offset = inst->U.I.SrcReg[i].Index;
+ }
+ }
}
- debug_program_log(compiler, "after source conflict resolve");
-
- rc_dataflow_deadcode(&compiler->Base, &dataflow_outputs_mark_used, compiler);
-
- debug_program_log(compiler, "after deadcode");
-
- rc_dataflow_swizzles(&compiler->Base);
-
- allocate_temporary_registers(compiler);
-
- debug_program_log(compiler, "after dataflow");
-
- translate_vertex_program(compiler);
-
- rc_constants_copy(&compiler->code->constants, &compiler->Base.Program.Constants);
+ if (lastARL != NULL && min_offset < 0)
+ transform_negative_addressing(c, lastARL, inst, min_offset);
+}
- compiler->code->InputsRead = compiler->Base.Program.InputsRead;
- compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten;
+static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
+ .IsNative = &swizzle_is_native,
+ .Split = 0 /* should never be called */
+};
- if (compiler->Base.Debug) {
- fprintf(stderr, "Final vertex program code:\n");
- r300_vertex_program_dump(compiler->code);
- }
+void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
+{
+ int is_r500 = c->Base.is_r500;
+ int kill_consts = c->Base.remove_unused_constants;
+ int opt = !c->Base.disable_optimizations;
+
+ /* Lists of instruction transformations. */
+ struct radeon_program_transformation alu_rewrite_r500[] = {
+ { &r300_transform_vertex_alu, 0 },
+ { &r300_transform_trig_scale_vertex, 0 },
+ { 0, 0 }
+ };
+
+ struct radeon_program_transformation alu_rewrite_r300[] = {
+ { &r300_transform_vertex_alu, 0 },
+ { &r300_transform_trig_simple, 0 },
+ { 0, 0 }
+ };
+
+ /* Note: These passes have to be done seperately from ALU rewrite,
+ * otherwise non-native ALU instructions with source conflits
+ * or non-native modifiers will not be treated properly.
+ */
+ struct radeon_program_transformation emulate_modifiers[] = {
+ { &transform_nonnative_modifiers, 0 },
+ { 0, 0 }
+ };
+
+ struct radeon_program_transformation resolve_src_conflicts[] = {
+ { &transform_source_conflicts, 0 },
+ { 0, 0 }
+ };
+
+ /* List of compiler passes. */
+ struct radeon_compiler_pass vs_list[] = {
+ /* NAME DUMP PREDICATE FUNCTION PARAM */
+ {"add artificial outputs", 0, 1, rc_vs_add_artificial_outputs, NULL},
+ {"transform loops", 1, 1, rc_transform_loops, NULL},
+ {"emulate branches", 1, !is_r500, rc_emulate_branches, NULL},
+ {"emulate negative addressing", 1, 1, rc_emulate_negative_addressing, NULL},
+ {"native rewrite", 1, is_r500, rc_local_transform, alu_rewrite_r500},
+ {"native rewrite", 1, !is_r500, rc_local_transform, alu_rewrite_r300},
+ {"emulate modifiers", 1, !is_r500, rc_local_transform, emulate_modifiers},
+ {"deadcode", 1, opt, rc_dataflow_deadcode, dataflow_outputs_mark_used},
+ {"dataflow optimize", 1, opt, rc_optimize, NULL},
+ /* This pass must be done after optimizations. */
+ {"source conflict resolve", 1, 1, rc_local_transform, resolve_src_conflicts},
+ {"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL},
+ {"register allocation", 1, opt, allocate_temporary_registers, NULL},
+ {"dead constants", 1, kill_consts, rc_remove_unused_constants, &c->code->constants_remap_table},
+ {"final code validation", 0, 1, rc_validate_final_shader, NULL},
+ {"machine code generation", 0, 1, translate_vertex_program, NULL},
+ {"dump machine code", 0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump, NULL},
+ {NULL, 0, 0, NULL, NULL}
+ };
+
+ c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
+
+ rc_run_compiler(&c->Base, vs_list, "Vertex Program");
+
+ c->code->InputsRead = c->Base.Program.InputsRead;
+ c->code->OutputsWritten = c->Base.Program.OutputsWritten;
+ rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
}