r300/compiler: Handle loops in the register allocator
authorTom Stellard <tstellar@gmail.com>
Mon, 18 Apr 2011 05:33:04 +0000 (22:33 -0700)
committerTom Stellard <tstellar@gmail.com>
Sat, 30 Apr 2011 18:00:16 +0000 (11:00 -0700)
src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
src/mesa/drivers/dri/r300/compiler/radeon_variable.c

index ce7b008e7f10edc17b11d0b5f640a4eac58049ef..3a6b0a7af15efa229942121583de8c5b93a21f0d 100644 (file)
@@ -583,3 +583,25 @@ struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop)
        }
        return NULL;
 }
+
+/**
+ * @return The ENDLOOP instruction that ends the loop started by bgnloop.
+ */
+struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop)
+{
+       unsigned int bgnloop_count = 0;
+       struct rc_instruction * inst;
+       for (inst = bgnloop->Next; inst!=bgnloop; inst = inst->Next) {
+               rc_opcode op = rc_get_flow_control_inst(inst);
+               if (op == RC_OPCODE_BGNLOOP) {
+                       bgnloop_count++;
+               } else if (op == RC_OPCODE_ENDLOOP) {
+                       if (bgnloop_count == 0) {
+                               return inst;
+                       } else {
+                               bgnloop_count--;
+                       }
+               }
+       }
+       return NULL;
+}
index 0ba25aa96278d4628ffa5df3d5e4e199069dfaac..1a0b96242e06d604c560fc90e6eb930397478526 100644 (file)
@@ -76,5 +76,6 @@ unsigned int rc_pair_remove_src(
 rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst);
 
 struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop);
+struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop);
 
 #endif /* RADEON_PROGRAM_UTIL_H */
index 52c0216b64b0965e68eef47d408c1a95df65fd64..fd03c73b3242465a539a92daddc292d6f5a62b1c 100644 (file)
@@ -66,7 +66,7 @@ struct regalloc_state {
        unsigned int NumTemporaries;
 
        unsigned int Simple;
-       unsigned int HasLoop;
+       int LoopEnd;
 };
 
 enum rc_reg_class {
@@ -176,7 +176,8 @@ static void scan_read_callback(void * data, struct rc_instruction * inst,
                }
                reg->Live[i].Used = 1;
                reg->Live[i].Start = 0;
-               reg->Live[i].End = inst->IP;
+               reg->Live[i].End =
+                       s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
        }
 }
 
@@ -509,6 +510,14 @@ static void do_advanced_regalloc(struct regalloc_state * s)
        for (inst = s->C->Program.Instructions.Next;
                                        inst != &s->C->Program.Instructions;
                                        inst = inst->Next) {
+               rc_opcode op = rc_get_flow_control_inst(inst);
+               if (op == RC_OPCODE_BGNLOOP) {
+                       struct rc_instruction * endloop =
+                                                       rc_match_bgnloop(inst);
+                       if (endloop->IP > s->LoopEnd) {
+                               s->LoopEnd = endloop->IP;
+                       }
+               }
                rc_for_all_reads_mask(inst, scan_read_callback, s);
        }
 
@@ -622,7 +631,6 @@ void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
                                (struct r300_fragment_program_compiler*)cc;
        struct regalloc_state s;
        int do_full_regalloc = (int)user;
-       struct rc_instruction * inst;
 
        memset(&s, 0, sizeof(s));
        s.C = cc;
@@ -636,20 +644,10 @@ void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
                        s.NumTemporaries * sizeof(struct register_info));
        memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
 
-       for(inst = cc->Program.Instructions.Next;
-           inst != &cc->Program.Instructions;
-           inst = inst->Next) {
-
-               if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
-                       s.HasLoop = 1;
-                       break;
-               }
-       }
-
        rc_recompute_ips(s.C);
 
        c->AllocateHwInputs(c, &alloc_input_simple, &s);
-       if (!s.HasLoop && do_full_regalloc) {
+       if (do_full_regalloc) {
                do_advanced_regalloc(&s);
        } else {
                s.Simple = 1;
index 91a4d45dc0d71a4629463de7701c75975ab98c2f..082717ed5eb757f6cfb0b892a9aac4da6e59468a 100644 (file)
@@ -164,12 +164,75 @@ void rc_variable_compute_live_intervals(struct rc_variable * var)
 
                for (i = 0; i < var->ReaderCount; i++) {
                        unsigned int chan;
+                       unsigned int chan_start = start;
+                       unsigned int chan_end = var->Readers[i].Inst->IP;
                        unsigned int mask = var->Readers[i].WriteMask;
+                       struct rc_instruction * inst;
+
+                       /* Extend the live interval of T0 to the start of the
+                        * loop for sequences like:
+                        * BGNLOOP
+                        * read T0
+                        * ...
+                        * write T0
+                        * ENDLOOP
+                        */
+                       if (var->Readers[i].Inst->IP < start) {
+                               struct rc_instruction * bgnloop =
+                                       rc_match_endloop(var->Readers[i].Inst);
+                               chan_start = bgnloop->IP;
+                       }
+
+                       /* Extend the live interval of T0 to the start of the
+                        * loop in case there is a BRK instruction in the loop
+                        * (we don't actually check for a BRK instruction we
+                        * assume there is one somewhere in the loop, which
+                        * there usually is) for sequences like:
+                        * BGNLOOP
+                        * ...
+                        * conditional BRK
+                        * ...
+                        * write T0
+                        * ENDLOOP
+                        * read T0
+                        ***************************************************
+                        * Extend the live interval of T0 to the end of the
+                        * loop for sequences like:
+                        * write T0
+                        * BGNLOOP
+                        * ...
+                        * read T0
+                        * ENDLOOP
+                        */
+                       for (inst = var->Inst; inst != var->Readers[i].Inst;
+                                                       inst = inst->Next) {
+                               rc_opcode op = rc_get_flow_control_inst(inst);
+                               if (op == RC_OPCODE_ENDLOOP) {
+                                       struct rc_instruction * bgnloop =
+                                               rc_match_endloop(inst);
+                                       if (bgnloop->IP < chan_start) {
+                                               chan_start = bgnloop->IP;
+                                       }
+                               } else if (op == RC_OPCODE_BGNLOOP) {
+                                       struct rc_instruction * endloop =
+                                               rc_match_bgnloop(inst);
+                                       if (endloop->IP > chan_end) {
+                                               chan_end = endloop->IP;
+                                       }
+                               }
+                       }
+
                        for (chan = 0; chan < 4; chan++) {
                                if ((mask >> chan) & 0x1) {
-                                       var->Live[chan].Start = start;
-                                       var->Live[chan].End =
-                                               var->Readers[i].Inst->IP;
+                                       if (!var->Live[chan].Used
+                                       || chan_start < var->Live[chan].Start) {
+                                               var->Live[chan].Start =
+                                                               chan_start;
+                                       }
+                                       if (!var->Live[chan].Used
+                                       || chan_end > var->Live[chan].End) {
+                                               var->Live[chan].End = chan_end;
+                                       }
                                        var->Live[chan].Used = 1;
                                }
                        }
@@ -197,10 +260,9 @@ static unsigned int readers_intersect(
 
                                return 1;
                        }
-
                        if (reader_a.Inst->Type == RC_INSTRUCTION_PAIR
                                && reader_b.Inst->Type == RC_INSTRUCTION_PAIR
-                               && reader_a.U.P.Arg == reader_b.U.P.Arg) {
+                               && reader_a.U.P.Src == reader_b.U.P.Src) {
 
                                return 1;
                        }
@@ -213,6 +275,7 @@ void rc_variable_add_friend(
        struct rc_variable * var,
        struct rc_variable * friend)
 {
+       assert(var->Dst.Index == friend->Dst.Index);
        while(var->Friend) {
                var = var->Friend;
        }