r300/compiler: cleanup rc_run_compiler
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / r3xx_vertprog.c
index 8c3177f1f2673eaebbd2a1b084bec25d9884f704..54f4cf3316dbad87429b0b8572a41f9400ce9e5c 100644 (file)
@@ -26,6 +26,7 @@
 
 #include "../r300_reg.h"
 
+#include "radeon_compiler_util.h"
 #include "radeon_dataflow.h"
 #include "radeon_program_alu.h"
 #include "radeon_swizzle.h"
@@ -391,16 +392,16 @@ static void ei_if(struct r300_vertex_program_compiler * compiler,
        /* Reserve a temporary to use as our predicate stack counter, if we
         * don't already have one. */
        if (!compiler->PredicateMask) {
-               unsigned int writemasks[R300_VS_MAX_TEMPS];
-               memset(writemasks, 0, sizeof(writemasks));
+               unsigned int writemasks[RC_REGISTER_MAX_INDEX];
                struct rc_instruction * inst;
                unsigned int i;
+               memset(writemasks, 0, sizeof(writemasks));
                for(inst = compiler->Base.Program.Instructions.Next;
                                inst != &compiler->Base.Program.Instructions;
                                                        inst = inst->Next) {
                        rc_for_all_writes_mask(inst, mark_write, writemasks);
                }
-               for(i = 0; i < R300_VS_MAX_TEMPS; i++) {
+               for(i = 0; i < compiler->Base.max_temp_regs; i++) {
                        unsigned int mask = ~writemasks[i] & RC_MASK_XYZW;
                        /* Only the W component can be used fo the predicate
                         * stack counter. */
@@ -410,7 +411,7 @@ static void ei_if(struct r300_vertex_program_compiler * compiler,
                                break;
                        }
                }
-               if (i == R300_VS_MAX_TEMPS) {
+               if (i == compiler->Base.max_temp_regs) {
                        rc_error(&compiler->Base, "No free temporary to use for"
                                        " predicate stack counter.\n");
                        return;
@@ -475,18 +476,20 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
 
        compiler->code->pos_end = 0;    /* Not supported yet */
        compiler->code->length = 0;
+       compiler->code->num_temporaries = 0;
 
        compiler->SetHwInputOutput(compiler);
 
        for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
                struct rc_sub_instruction *vpi = &rci->U.I;
                unsigned int *inst = compiler->code->body.d + compiler->code->length;
+               const struct rc_opcode_info *info = rc_get_opcode_info(vpi->Opcode);
 
                /* Skip instructions writing to non-existing destination */
                if (!valid_dst(compiler->code, &vpi->DstReg))
                        continue;
 
-               if (rc_get_opcode_info(vpi->Opcode)->HasDstReg) {
+               if (info->HasDstReg) {
                        /* Relative addressing of destination operands is not supported yet. */
                        if (vpi->DstReg.RelAddr) {
                                rc_error(&compiler->Base, "Vertex program does not support relative "
@@ -501,8 +504,7 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
                        }
                }
 
-               if (compiler->code->length >= R500_VS_MAX_ALU_DWORDS ||
-                   (compiler->code->length >= R300_VS_MAX_ALU_DWORDS && !compiler->Base.is_r500)) {
+               if (compiler->code->length >= c->max_alu_insts * 4) {
                        rc_error(&compiler->Base, "Vertex program has too many instructions\n");
                        return;
                }
@@ -608,7 +610,7 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
                }
 
                default:
-                       rc_error(&compiler->Base, "Unknown opcode %s\n", rc_get_opcode_info(vpi->Opcode)->Name);
+                       rc_error(&compiler->Base, "Unknown opcode %s\n", info->Name);
                        return;
                }
 
@@ -625,6 +627,25 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
                                                << PVS_DST_PRED_SENSE_SHIFT);
                }
 
+               /* Update the number of temporaries. */
+               if (info->HasDstReg && vpi->DstReg.File == RC_FILE_TEMPORARY &&
+                   vpi->DstReg.Index >= compiler->code->num_temporaries)
+                       compiler->code->num_temporaries = vpi->DstReg.Index + 1;
+
+               for (unsigned i = 0; i < info->NumSrcRegs; i++)
+                       if (vpi->SrcReg[i].File == RC_FILE_TEMPORARY &&
+                           vpi->SrcReg[i].Index >= compiler->code->num_temporaries)
+                               compiler->code->num_temporaries = vpi->SrcReg[i].Index + 1;
+
+               if (compiler->PredicateMask)
+                       if (compiler->PredicateIndex >= compiler->code->num_temporaries)
+                               compiler->code->num_temporaries = compiler->PredicateIndex + 1;
+
+               if (compiler->code->num_temporaries > compiler->Base.max_temp_regs) {
+                       rc_error(&compiler->Base, "Too many temporaries.\n");
+                       return;
+               }
+
                compiler->code->length += 4;
 
                if (compiler->Base.Error)
@@ -644,12 +665,15 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
        struct rc_instruction *inst;
        struct rc_instruction *end_loop = NULL;
        unsigned int num_orig_temps = 0;
-       char hwtemps[R300_VS_MAX_TEMPS];
+       char hwtemps[RC_REGISTER_MAX_INDEX];
        struct temporary_allocation * ta;
        unsigned int i, j;
+       struct rc_instruction *last_inst_src_reladdr = NULL;
 
        memset(hwtemps, 0, sizeof(hwtemps));
 
+       rc_recompute_ips(c);
+
        /* Pass 1: Count original temporaries. */
        for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
                const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
@@ -668,9 +692,9 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
                        }
                }
        }
-       compiler->code->num_temporaries = num_orig_temps;
 
-       /* Pass 2: If there is relative addressing of temporaries, we cannot change register indices. Give up. */
+       /* Pass 2: If there is relative addressing of dst temporaries, we cannot change register indices. Give up.
+        * For src temporaries, save the last instruction which uses relative addressing. */
        for (inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
                const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
@@ -681,12 +705,11 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
                for (i = 0; i < opcode->NumSrcRegs; ++i) {
                        if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY &&
                            inst->U.I.SrcReg[i].RelAddr) {
-                               return;
+                               last_inst_src_reladdr = inst;
                        }
                }
        }
 
-       compiler->code->num_temporaries = 0;
        ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
                        sizeof(struct temporary_allocation) * num_orig_temps);
        memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
@@ -720,9 +743,26 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
                }
 
                for (i = 0; i < opcode->NumSrcRegs; ++i) {
-                       if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY)
-                               ta[inst->U.I.SrcReg[i].Index].LastRead =
-                                               end_loop ? end_loop : inst;
+                       if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+                               struct rc_instruction *last_read;
+
+                               /* From "last_inst_src_reladdr", "end_loop", and "inst",
+                                * select the instruction with the highest instruction index (IP).
+                                * Note that "end_loop", if available, has always a higher index than "inst". */
+                               if (last_inst_src_reladdr) {
+                                       if (end_loop) {
+                                               last_read = last_inst_src_reladdr->IP > end_loop->IP ?
+                                                           last_inst_src_reladdr : end_loop;
+                                       } else {
+                                               last_read = last_inst_src_reladdr->IP > inst->IP ?
+                                                           last_inst_src_reladdr : inst;
+                                       }
+                               } else {
+                                       last_read = end_loop ? end_loop : inst;
+                               }
+
+                               ta[inst->U.I.SrcReg[i].Index].LastRead = last_read;
+                       }
                }
        }
 
@@ -730,13 +770,15 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
        for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
                const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
-               for (i = 0; i < opcode->NumSrcRegs; ++i) {
-                       if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
-                               unsigned int orig = inst->U.I.SrcReg[i].Index;
-                               inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
+               if (!last_inst_src_reladdr || last_inst_src_reladdr->IP < inst->IP) {
+                       for (i = 0; i < opcode->NumSrcRegs; ++i) {
+                               if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+                                       unsigned int orig = inst->U.I.SrcReg[i].Index;
+                                       inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
 
-                               if (ta[orig].Allocated && inst == ta[orig].LastRead)
-                                       hwtemps[ta[orig].HwTemp] = 0;
+                                       if (ta[orig].Allocated && inst == ta[orig].LastRead)
+                                               hwtemps[ta[orig].HwTemp] = 0;
+                               }
                        }
                }
 
@@ -745,20 +787,18 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
                                unsigned int orig = inst->U.I.DstReg.Index;
 
                                if (!ta[orig].Allocated) {
-                                       for(j = 0; j < R300_VS_MAX_TEMPS; ++j) {
+                                       for(j = 0; j < c->max_temp_regs; ++j) {
                                                if (!hwtemps[j])
                                                        break;
                                        }
-                                       if (j >= R300_VS_MAX_TEMPS) {
-                                               fprintf(stderr, "Out of hw temporaries\n");
+                                       ta[orig].Allocated = 1;
+                                       if (last_inst_src_reladdr &&
+                                           last_inst_src_reladdr->IP > inst->IP) {
+                                               ta[orig].HwTemp = orig;
                                        } else {
-                                               ta[orig].Allocated = 1;
                                                ta[orig].HwTemp = j;
-                                               hwtemps[j] = 1;
-
-                                               if (j >= compiler->code->num_temporaries)
-                                                       compiler->code->num_temporaries = j + 1;
                                        }
+                                       hwtemps[ta[orig].HwTemp] = 1;
                                }
 
                                inst->U.I.DstReg.Index = ta[orig].HwTemp;
@@ -975,6 +1015,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
 {
        int is_r500 = c->Base.is_r500;
        int kill_consts = c->Base.remove_unused_constants;
+       int opt = !c->Base.disable_optimizations;
 
        /* Lists of instruction transformations. */
        struct radeon_program_transformation alu_rewrite_r500[] = {
@@ -1013,22 +1054,22 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
                {"native rewrite",              1, is_r500,     rc_local_transform,             alu_rewrite_r500},
                {"native rewrite",              1, !is_r500,    rc_local_transform,             alu_rewrite_r300},
                {"emulate modifiers",           1, !is_r500,    rc_local_transform,             emulate_modifiers},
-               {"deadcode",                    1, 1,           rc_dataflow_deadcode,           dataflow_outputs_mark_used},
-               {"dataflow optimize",           1, 1,           rc_optimize,                    NULL},
+               {"deadcode",                    1, opt,         rc_dataflow_deadcode,           dataflow_outputs_mark_used},
+               {"dataflow optimize",           1, opt,         rc_optimize,                    NULL},
                /* This pass must be done after optimizations. */
                {"source conflict resolve",     1, 1,           rc_local_transform,             resolve_src_conflicts},
-               {"dataflow swizzles",           1, 1,           rc_dataflow_swizzles,           NULL},
-               {"register allocation",         1, 1,           allocate_temporary_registers,   NULL},
+               {"register allocation",         1, opt,         allocate_temporary_registers,   NULL},
                {"dead constants",              1, kill_consts, rc_remove_unused_constants,     &c->code->constants_remap_table},
                {"final code validation",       0, 1,           rc_validate_final_shader,       NULL},
                {"machine code generation",     0, 1,           translate_vertex_program,       NULL},
-               {"dump machine code",           0,c->Base.Debug,r300_vertex_program_dump,       NULL},
+               {"dump machine code",           0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump,        NULL},
                {NULL, 0, 0, NULL, NULL}
        };
 
+       c->Base.type = RC_VERTEX_PROGRAM;
        c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
 
-       rc_run_compiler(&c->Base, vs_list, "Vertex Program");
+       rc_run_compiler(&c->Base, vs_list);
 
        c->code->InputsRead = c->Base.Program.InputsRead;
        c->code->OutputsWritten = c->Base.Program.OutputsWritten;