r300/compiler: cleanup rc_run_compiler
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / r3xx_vertprog.c
index b5186809bc39a622976cbc1012df937df5b42f8e..54f4cf3316dbad87429b0b8572a41f9400ce9e5c 100644 (file)
@@ -26,6 +26,7 @@
 
 #include "../r300_reg.h"
 
+#include "radeon_compiler_util.h"
 #include "radeon_dataflow.h"
 #include "radeon_program_alu.h"
 #include "radeon_swizzle.h"
@@ -391,16 +392,16 @@ static void ei_if(struct r300_vertex_program_compiler * compiler,
        /* Reserve a temporary to use as our predicate stack counter, if we
         * don't already have one. */
        if (!compiler->PredicateMask) {
-               unsigned int writemasks[R300_VS_MAX_TEMPS];
-               memset(writemasks, 0, sizeof(writemasks));
+               unsigned int writemasks[RC_REGISTER_MAX_INDEX];
                struct rc_instruction * inst;
                unsigned int i;
+               memset(writemasks, 0, sizeof(writemasks));
                for(inst = compiler->Base.Program.Instructions.Next;
                                inst != &compiler->Base.Program.Instructions;
                                                        inst = inst->Next) {
                        rc_for_all_writes_mask(inst, mark_write, writemasks);
                }
-               for(i = 0; i < R300_VS_MAX_TEMPS; i++) {
+               for(i = 0; i < compiler->Base.max_temp_regs; i++) {
                        unsigned int mask = ~writemasks[i] & RC_MASK_XYZW;
                        /* Only the W component can be used fo the predicate
                         * stack counter. */
@@ -410,7 +411,7 @@ static void ei_if(struct r300_vertex_program_compiler * compiler,
                                break;
                        }
                }
-               if (i == R300_VS_MAX_TEMPS) {
+               if (i == compiler->Base.max_temp_regs) {
                        rc_error(&compiler->Base, "No free temporary to use for"
                                        " predicate stack counter.\n");
                        return;
@@ -462,8 +463,9 @@ static void ei_endif(struct r300_vertex_program_compiler *compiler,
        inst[3] = 0;
 }
 
-static void translate_vertex_program(struct r300_vertex_program_compiler * compiler)
+static void translate_vertex_program(struct radeon_compiler *c, void *user)
 {
+       struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c;
        struct rc_instruction *rci;
 
        struct loop * loops = NULL;
@@ -474,18 +476,20 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
 
        compiler->code->pos_end = 0;    /* Not supported yet */
        compiler->code->length = 0;
+       compiler->code->num_temporaries = 0;
 
        compiler->SetHwInputOutput(compiler);
 
        for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
                struct rc_sub_instruction *vpi = &rci->U.I;
                unsigned int *inst = compiler->code->body.d + compiler->code->length;
+               const struct rc_opcode_info *info = rc_get_opcode_info(vpi->Opcode);
 
                /* Skip instructions writing to non-existing destination */
                if (!valid_dst(compiler->code, &vpi->DstReg))
                        continue;
 
-               if (rc_get_opcode_info(vpi->Opcode)->HasDstReg) {
+               if (info->HasDstReg) {
                        /* Relative addressing of destination operands is not supported yet. */
                        if (vpi->DstReg.RelAddr) {
                                rc_error(&compiler->Base, "Vertex program does not support relative "
@@ -500,8 +504,7 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
                        }
                }
 
-               if (compiler->code->length >= R500_VS_MAX_ALU_DWORDS ||
-                   (compiler->code->length >= R300_VS_MAX_ALU_DWORDS && !compiler->Base.is_r500)) {
+               if (compiler->code->length >= c->max_alu_insts * 4) {
                        rc_error(&compiler->Base, "Vertex program has too many instructions\n");
                        return;
                }
@@ -607,7 +610,7 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
                }
 
                default:
-                       rc_error(&compiler->Base, "Unknown opcode %s\n", rc_get_opcode_info(vpi->Opcode)->Name);
+                       rc_error(&compiler->Base, "Unknown opcode %s\n", info->Name);
                        return;
                }
 
@@ -624,6 +627,25 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
                                                << PVS_DST_PRED_SENSE_SHIFT);
                }
 
+               /* Update the number of temporaries. */
+               if (info->HasDstReg && vpi->DstReg.File == RC_FILE_TEMPORARY &&
+                   vpi->DstReg.Index >= compiler->code->num_temporaries)
+                       compiler->code->num_temporaries = vpi->DstReg.Index + 1;
+
+               for (unsigned i = 0; i < info->NumSrcRegs; i++)
+                       if (vpi->SrcReg[i].File == RC_FILE_TEMPORARY &&
+                           vpi->SrcReg[i].Index >= compiler->code->num_temporaries)
+                               compiler->code->num_temporaries = vpi->SrcReg[i].Index + 1;
+
+               if (compiler->PredicateMask)
+                       if (compiler->PredicateIndex >= compiler->code->num_temporaries)
+                               compiler->code->num_temporaries = compiler->PredicateIndex + 1;
+
+               if (compiler->code->num_temporaries > compiler->Base.max_temp_regs) {
+                       rc_error(&compiler->Base, "Too many temporaries.\n");
+                       return;
+               }
+
                compiler->code->length += 4;
 
                if (compiler->Base.Error)
@@ -637,17 +659,21 @@ struct temporary_allocation {
        struct rc_instruction * LastRead;
 };
 
-static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
+static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
 {
+       struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c;
        struct rc_instruction *inst;
        struct rc_instruction *end_loop = NULL;
        unsigned int num_orig_temps = 0;
-       char hwtemps[R300_VS_MAX_TEMPS];
+       char hwtemps[RC_REGISTER_MAX_INDEX];
        struct temporary_allocation * ta;
        unsigned int i, j;
+       struct rc_instruction *last_inst_src_reladdr = NULL;
 
        memset(hwtemps, 0, sizeof(hwtemps));
 
+       rc_recompute_ips(c);
+
        /* Pass 1: Count original temporaries. */
        for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
                const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
@@ -666,9 +692,9 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
                        }
                }
        }
-       compiler->code->num_temporaries = num_orig_temps;
 
-       /* Pass 2: If there is relative addressing of temporaries, we cannot change register indices. Give up. */
+       /* Pass 2: If there is relative addressing of dst temporaries, we cannot change register indices. Give up.
+        * For src temporaries, save the last instruction which uses relative addressing. */
        for (inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
                const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
@@ -679,12 +705,11 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
                for (i = 0; i < opcode->NumSrcRegs; ++i) {
                        if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY &&
                            inst->U.I.SrcReg[i].RelAddr) {
-                               return;
+                               last_inst_src_reladdr = inst;
                        }
                }
        }
 
-       compiler->code->num_temporaries = 0;
        ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
                        sizeof(struct temporary_allocation) * num_orig_temps);
        memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
@@ -718,9 +743,26 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
                }
 
                for (i = 0; i < opcode->NumSrcRegs; ++i) {
-                       if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY)
-                               ta[inst->U.I.SrcReg[i].Index].LastRead =
-                                               end_loop ? end_loop : inst;
+                       if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+                               struct rc_instruction *last_read;
+
+                               /* From "last_inst_src_reladdr", "end_loop", and "inst",
+                                * select the instruction with the highest instruction index (IP).
+                                * Note that "end_loop", if available, has always a higher index than "inst". */
+                               if (last_inst_src_reladdr) {
+                                       if (end_loop) {
+                                               last_read = last_inst_src_reladdr->IP > end_loop->IP ?
+                                                           last_inst_src_reladdr : end_loop;
+                                       } else {
+                                               last_read = last_inst_src_reladdr->IP > inst->IP ?
+                                                           last_inst_src_reladdr : inst;
+                                       }
+                               } else {
+                                       last_read = end_loop ? end_loop : inst;
+                               }
+
+                               ta[inst->U.I.SrcReg[i].Index].LastRead = last_read;
+                       }
                }
        }
 
@@ -728,13 +770,15 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
        for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
                const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
-               for (i = 0; i < opcode->NumSrcRegs; ++i) {
-                       if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
-                               unsigned int orig = inst->U.I.SrcReg[i].Index;
-                               inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
+               if (!last_inst_src_reladdr || last_inst_src_reladdr->IP < inst->IP) {
+                       for (i = 0; i < opcode->NumSrcRegs; ++i) {
+                               if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+                                       unsigned int orig = inst->U.I.SrcReg[i].Index;
+                                       inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
 
-                               if (ta[orig].Allocated && inst == ta[orig].LastRead)
-                                       hwtemps[ta[orig].HwTemp] = 0;
+                                       if (ta[orig].Allocated && inst == ta[orig].LastRead)
+                                               hwtemps[ta[orig].HwTemp] = 0;
+                               }
                        }
                }
 
@@ -743,20 +787,18 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
                                unsigned int orig = inst->U.I.DstReg.Index;
 
                                if (!ta[orig].Allocated) {
-                                       for(j = 0; j < R300_VS_MAX_TEMPS; ++j) {
+                                       for(j = 0; j < c->max_temp_regs; ++j) {
                                                if (!hwtemps[j])
                                                        break;
                                        }
-                                       if (j >= R300_VS_MAX_TEMPS) {
-                                               fprintf(stderr, "Out of hw temporaries\n");
+                                       ta[orig].Allocated = 1;
+                                       if (last_inst_src_reladdr &&
+                                           last_inst_src_reladdr->IP > inst->IP) {
+                                               ta[orig].HwTemp = orig;
                                        } else {
-                                               ta[orig].Allocated = 1;
                                                ta[orig].HwTemp = j;
-                                               hwtemps[j] = 1;
-
-                                               if (j >= compiler->code->num_temporaries)
-                                                       compiler->code->num_temporaries = j + 1;
                                        }
+                                       hwtemps[ta[orig].HwTemp] = 1;
                                }
 
                                inst->U.I.DstReg.Index = ta[orig].HwTemp;
@@ -849,8 +891,9 @@ static int transform_source_conflicts(
        return 1;
 }
 
-static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
+static void rc_vs_add_artificial_outputs(struct radeon_compiler *c, void *user)
 {
+       struct r300_vertex_program_compiler * compiler = (struct r300_vertex_program_compiler*)c;
        int i;
 
        for(i = 0; i < 32; ++i) {
@@ -926,8 +969,9 @@ static void transform_negative_addressing(struct r300_vertex_program_compiler *c
        }
 }
 
-static void rc_emulate_negative_addressing(struct r300_vertex_program_compiler *c)
+static void rc_emulate_negative_addressing(struct radeon_compiler *compiler, void *user)
 {
+       struct r300_vertex_program_compiler * c = (struct r300_vertex_program_compiler*)compiler;
        struct rc_instruction *inst, *lastARL = NULL;
        int min_offset = 0;
 
@@ -962,143 +1006,72 @@ static void rc_emulate_negative_addressing(struct r300_vertex_program_compiler *
                transform_negative_addressing(c, lastARL, inst, min_offset);
 }
 
-static void debug_program_log(struct r300_vertex_program_compiler* c, const char * where)
-{
-       if (c->Base.Debug) {
-               fprintf(stderr, "Vertex Program: %s\n", where);
-               rc_print_program(&c->Base.Program);
-       }
-}
-
-
 static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
        .IsNative = &swizzle_is_native,
        .Split = 0 /* should never be called */
 };
 
-
 void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
 {
+       int is_r500 = c->Base.is_r500;
+       int kill_consts = c->Base.remove_unused_constants;
+       int opt = !c->Base.disable_optimizations;
+
+       /* Lists of instruction transformations. */
+       struct radeon_program_transformation alu_rewrite_r500[] = {
+               { &r300_transform_vertex_alu, 0 },
+               { &r300_transform_trig_scale_vertex, 0 },
+               { 0, 0 }
+       };
+
+       struct radeon_program_transformation alu_rewrite_r300[] = {
+               { &r300_transform_vertex_alu, 0 },
+               { &r300_transform_trig_simple, 0 },
+               { 0, 0 }
+       };
+
+       /* Note: These passes have to be done seperately from ALU rewrite,
+        * otherwise non-native ALU instructions with source conflits
+        * or non-native modifiers will not be treated properly.
+        */
+       struct radeon_program_transformation emulate_modifiers[] = {
+               { &transform_nonnative_modifiers, 0 },
+               { 0, 0 }
+       };
+
+       struct radeon_program_transformation resolve_src_conflicts[] = {
+               { &transform_source_conflicts, 0 },
+               { 0, 0 }
+       };
+
+       /* List of compiler passes. */
+       struct radeon_compiler_pass vs_list[] = {
+               /* NAME                         DUMP PREDICATE  FUNCTION                        PARAM */
+               {"add artificial outputs",      0, 1,           rc_vs_add_artificial_outputs,   NULL},
+               {"transform loops",             1, 1,           rc_transform_loops,             NULL},
+               {"emulate branches",            1, !is_r500,    rc_emulate_branches,            NULL},
+               {"emulate negative addressing", 1, 1,           rc_emulate_negative_addressing, NULL},
+               {"native rewrite",              1, is_r500,     rc_local_transform,             alu_rewrite_r500},
+               {"native rewrite",              1, !is_r500,    rc_local_transform,             alu_rewrite_r300},
+               {"emulate modifiers",           1, !is_r500,    rc_local_transform,             emulate_modifiers},
+               {"deadcode",                    1, opt,         rc_dataflow_deadcode,           dataflow_outputs_mark_used},
+               {"dataflow optimize",           1, opt,         rc_optimize,                    NULL},
+               /* This pass must be done after optimizations. */
+               {"source conflict resolve",     1, 1,           rc_local_transform,             resolve_src_conflicts},
+               {"register allocation",         1, opt,         allocate_temporary_registers,   NULL},
+               {"dead constants",              1, kill_consts, rc_remove_unused_constants,     &c->code->constants_remap_table},
+               {"final code validation",       0, 1,           rc_validate_final_shader,       NULL},
+               {"machine code generation",     0, 1,           translate_vertex_program,       NULL},
+               {"dump machine code",           0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump,        NULL},
+               {NULL, 0, 0, NULL, NULL}
+       };
+
+       c->Base.type = RC_VERTEX_PROGRAM;
        c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
 
-       addArtificialOutputs(c);
-
-       debug_program_log(c, "before compilation");
-
-       if (c->Base.is_r500)
-               rc_transform_loops(&c->Base, R500_VS_MAX_ALU);
-       else
-               rc_transform_loops(&c->Base, R300_VS_MAX_ALU);
-       if (c->Base.Error)
-               return;
-
-       debug_program_log(c, "after emulate loops");
-
-       if (!c->Base.is_r500) {
-               rc_emulate_branches(&c->Base);
-               if (c->Base.Error)
-                       return;
-               debug_program_log(c, "after emulate branches");
-       }
-
-       rc_emulate_negative_addressing(c);
-
-       debug_program_log(c, "after negative addressing emulation");
-
-       if (c->Base.is_r500) {
-               struct radeon_program_transformation transformations[] = {
-                       { &r300_transform_vertex_alu, 0 },
-                       { &r300_transform_trig_scale_vertex, 0 }
-               };
-               radeonLocalTransform(&c->Base, 2, transformations);
-               if (c->Base.Error)
-                       return;
-
-               debug_program_log(c, "after native rewrite");
-       } else {
-               struct radeon_program_transformation transformations[] = {
-                       { &r300_transform_vertex_alu, 0 },
-                       { &radeonTransformTrigSimple, 0 }
-               };
-               radeonLocalTransform(&c->Base, 2, transformations);
-               if (c->Base.Error)
-                       return;
-
-               debug_program_log(c, "after native rewrite");
-
-               /* Note: This pass has to be done seperately from ALU rewrite,
-                * because it needs to check every instruction.
-                */
-               struct radeon_program_transformation transformations2[] = {
-                       { &transform_nonnative_modifiers, 0 },
-               };
-               radeonLocalTransform(&c->Base, 1, transformations2);
-               if (c->Base.Error)
-                       return;
-
-               debug_program_log(c, "after emulate modifiers");
-       }
-
-       {
-               /* Note: This pass has to be done seperately from ALU rewrite,
-                * otherwise non-native ALU instructions with source conflits
-                * will not be treated properly.
-                */
-               struct radeon_program_transformation transformations[] = {
-                       { &transform_source_conflicts, 0 },
-               };
-               radeonLocalTransform(&c->Base, 1, transformations);
-               if (c->Base.Error)
-                       return;
-       }
-
-       debug_program_log(c, "after source conflict resolve");
-
-       rc_dataflow_deadcode(&c->Base, &dataflow_outputs_mark_used, c);
-       if (c->Base.Error)
-               return;
-
-       debug_program_log(c, "after deadcode");
-
-       rc_dataflow_swizzles(&c->Base);
-       if (c->Base.Error)
-               return;
-
-       debug_program_log(c, "after dataflow");
-
-       allocate_temporary_registers(c);
-       if (c->Base.Error)
-               return;
-
-       debug_program_log(c, "after register allocation");
-
-       if (c->Base.remove_unused_constants) {
-               rc_remove_unused_constants(&c->Base,
-                                          &c->code->constants_remap_table);
-               if (c->Base.Error)
-                       return;
-
-               debug_program_log(c, "after constants cleanup");
-       }
-
-       translate_vertex_program(c);
-       if (c->Base.Error)
-               return;
-
-       rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
+       rc_run_compiler(&c->Base, vs_list);
 
        c->code->InputsRead = c->Base.Program.InputsRead;
        c->code->OutputsWritten = c->Base.Program.OutputsWritten;
-
-       if (c->Base.Debug) {
-               fprintf(stderr, "Final vertex program code:\n");
-               r300_vertex_program_dump(c);
-       }
-
-       /* Check the number of constants. */
-       if (!c->Base.Error &&
-           c->Base.Program.Constants.Count > 256) {
-               rc_error(&c->Base, "Too many constants. Max: 256, Got: %i\n",
-                        c->Base.Program.Constants.Count);
-       }
+       rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
 }