r300/compiler: Implement simple loop emulation
authorTom Stellard <tstellar@gmail.com>
Fri, 28 May 2010 00:14:51 +0000 (17:14 -0700)
committerMarek Olšák <maraeo@gmail.com>
Fri, 11 Jun 2010 20:06:58 +0000 (22:06 +0200)
The loop emulation unrolls loops as may times as possbile while still
keeping the shader program below the maximum instruction limit.  At this
point, there are no checks for constant conditionals.  This is only enabled
for fragment shaders.

src/gallium/drivers/r300/r300_tgsi_to_rc.c
src/mesa/drivers/dri/r300/compiler/Makefile
src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c [new file with mode: 0644]
src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h [new file with mode: 0644]
src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h

index 89f39af9761aa7823f6aceec7582842229cc0617..5394e04f727c4d8610bf0d5583992ccc63bd5c72 100644 (file)
@@ -105,12 +105,12 @@ static unsigned translate_opcode(unsigned opcode)
      /* case TGSI_OPCODE_DIV: return RC_OPCODE_DIV; */
      /* case TGSI_OPCODE_DP2: return RC_OPCODE_DP2; */
         case TGSI_OPCODE_TXL: return RC_OPCODE_TXL;
-     /* case TGSI_OPCODE_BRK: return RC_OPCODE_BRK; */
+        case TGSI_OPCODE_BRK: return RC_OPCODE_BRK;
         case TGSI_OPCODE_IF: return RC_OPCODE_IF;
-     /* case TGSI_OPCODE_LOOP: return RC_OPCODE_LOOP; */
+        case TGSI_OPCODE_BGNLOOP: return RC_OPCODE_BGNLOOP;
         case TGSI_OPCODE_ELSE: return RC_OPCODE_ELSE;
         case TGSI_OPCODE_ENDIF: return RC_OPCODE_ENDIF;
-     /* case TGSI_OPCODE_ENDLOOP: return RC_OPCODE_ENDLOOP; */
+        case TGSI_OPCODE_ENDLOOP: return RC_OPCODE_ENDLOOP;
      /* case TGSI_OPCODE_PUSHA: return RC_OPCODE_PUSHA; */
      /* case TGSI_OPCODE_POPA: return RC_OPCODE_POPA; */
         case TGSI_OPCODE_CEIL: return RC_OPCODE_CEIL;
index 34d22b45591117f00751ce53910e2d77f2104f59..ff3801dc676e7e107539028dcfdc5f5d91c4875d 100644 (file)
@@ -9,6 +9,7 @@ C_SOURCES = \
                radeon_code.c \
                radeon_compiler.c \
                radeon_emulate_branches.c \
+               radeon_emulate_loops.c \
                radeon_program.c \
                radeon_program_print.c \
                radeon_opcodes.c \
index 7f3b88ed7598dfbe15a9269e0250105c35af20d5..38312658d65ae6595be964b926346eb68d2f051e 100644 (file)
@@ -26,6 +26,7 @@
 
 #include "radeon_dataflow.h"
 #include "radeon_emulate_branches.h"
+#include "radeon_emulate_loops.h"
 #include "radeon_program_alu.h"
 #include "radeon_program_tex.h"
 #include "r300_fragprog.h"
@@ -103,6 +104,15 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
        /* XXX Ideally this should be done only for r3xx, but since
         * we don't have branching support for r5xx, we use the emulation
         * on all chipsets. */
+       
+       if(c->Base.is_r500){
+               rc_emulate_loops(&c->Base, R500_PFS_MAX_INST);
+       }
+       else{
+               rc_emulate_loops(&c->Base, R300_PFS_MAX_ALU_INST);
+       }
+       debug_program_log(c, "after emulate loops");
+       
        rc_emulate_branches(&c->Base);
 
        debug_program_log(c, "after emulate branches");
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
new file mode 100644 (file)
index 0000000..b05ba08
--- /dev/null
@@ -0,0 +1,183 @@
+/*
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ */
+
+#include "radeon_emulate_loops.h"
+
+#include "radeon_compiler.h"
+
+struct emulate_loop_state {
+       struct radeon_compiler * C;
+       struct loop_info * Loops;
+       unsigned int LoopCount;
+       unsigned int LoopReserved;
+};
+
+struct loop_info {
+       struct rc_instruction * BeginLoop;
+       struct rc_instruction * EndLoop;
+};
+
+static unsigned int loop_count_instructions(struct loop_info * loop)
+{
+       unsigned int count = 0;
+       struct rc_instruction * inst = loop->BeginLoop->Next;
+       while(inst != loop->EndLoop){
+               count++;
+               inst = inst->Next;
+       }
+       return count;
+}
+
+static unsigned int loop_calc_iterations(struct loop_info * loop,
+               unsigned int loop_count, unsigned int max_instructions)
+{
+       unsigned int icount = loop_count_instructions(loop);
+       return max_instructions / (loop_count * icount);
+}
+
+static void loop_unroll(struct emulate_loop_state * s,
+                       struct loop_info *loop, unsigned int iterations)
+{
+       unsigned int i;
+       struct rc_instruction * ptr;
+       struct rc_instruction * first = loop->BeginLoop->Next;
+       struct rc_instruction * last = loop->EndLoop->Prev;
+       struct rc_instruction * append_to = last;
+       rc_remove_instruction(loop->BeginLoop);
+       rc_remove_instruction(loop->EndLoop);
+       for( i = 1; i < iterations; i++){
+               for(ptr = first; ptr != last->Next; ptr = ptr->Next){
+                       struct rc_instruction *new = rc_alloc_instruction(s->C);
+                       memcpy(new, ptr, sizeof(struct rc_instruction));
+                       rc_insert_instruction(append_to, new);
+                       append_to = new;
+               }
+       }
+}
+
+/** 
+ * This function prepares a loop to be unrolled by converting it into an if
+ * statement.  Here is an outline of the conversion process:
+ * BGNLOOP;                         -> BGNLOOP;
+ * SGE temp[0], temp[1], temp[2];   -> SLT temp[0], temp[1], temp[2];
+ * IF temp[0];                      -> IF temp[0];
+ * BRK;                             ->
+ * ENDIF;                           -> <Loop Body>
+ * <Loop Body>                      -> ENDIF;
+ * ENDLOOP;                         -> ENDLOOP
+ *
+ * @param inst Pointer to a BGNLOOP instruction.
+ */
+static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
+                                               struct rc_instruction * inst)
+{
+       struct loop_info *loop;
+       struct rc_instruction * ptr;
+
+       memory_pool_array_reserve(&s->C->Pool, struct loop_info,
+                       s->Loops, s->LoopCount, s->LoopReserved, 1);
+
+       loop = &s->Loops[s->LoopCount++];
+       memset(loop, 0, sizeof(struct loop_info));
+
+       loop->BeginLoop = inst;
+       /* Reverse the SGE instruction */
+       ptr = inst->Next;
+       ptr->U.I.Opcode = RC_OPCODE_SLT;
+       while(!loop->EndLoop){
+               struct rc_instruction * endif;
+               if(ptr->Type == RC_INSTRUCTION_NORMAL){
+               }
+               switch(ptr->U.I.Opcode){
+               case RC_OPCODE_BGNLOOP:
+                       /* Nested loop */
+                       ptr = transform_loop(s, ptr);
+                       break;
+               case RC_OPCODE_BRK:
+                       /* The BRK instruction should always be followed by
+                        * an ENDIF.  This ENDIF will eventually replace the
+                        * ENDLOOP insruction. */
+                       endif = ptr->Next;
+                       rc_remove_instruction(ptr);
+                       rc_remove_instruction(endif);
+                       break;
+               case RC_OPCODE_ENDLOOP:
+                       /* Insert the ENDIF before ENDLOOP. */
+                       rc_insert_instruction(ptr->Prev, endif);
+                       loop->EndLoop = ptr;
+                       break;
+               }
+               ptr = ptr->Next;
+       }
+       return ptr;
+}
+
+static void rc_transform_loops(struct emulate_loop_state * s)
+{
+       struct rc_instruction * ptr = s->C->Program.Instructions.Next;
+       while(ptr != &s->C->Program.Instructions) {
+               if(ptr->Type == RC_INSTRUCTION_NORMAL &&
+                                       ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
+                       ptr = transform_loop(s, ptr);
+               }
+               ptr = ptr->Next;
+       }
+}
+
+static void rc_unroll_loops(struct emulate_loop_state *s,
+                                               unsigned int max_instructions)
+{
+       int i;
+       /* Iterate backwards of the list of loops so that loops that nested
+        * loops are unrolled first.
+        */
+       for( i = s->LoopCount - 1; i >= 0; i-- ){
+               unsigned int iterations = loop_calc_iterations(&s->Loops[i],
+                                               s->LoopCount, max_instructions);
+               loop_unroll(s, &s->Loops[i], iterations);
+       }
+}
+
+void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions)
+{
+       struct emulate_loop_state s;
+
+       memset(&s, 0, sizeof(struct emulate_loop_state));
+       s.C = c;
+
+       /* We may need to move these two operations to r3xx_(vert|frag)prog.c
+        * and run the optimization passes between them in order to increase
+        * the number of unrolls we can do for each loop.
+        */
+       rc_transform_loops(&s);
+       
+       rc_unroll_loops(&s, max_instructions);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
new file mode 100644 (file)
index 0000000..ddcf1c0
--- /dev/null
@@ -0,0 +1,12 @@
+
+
+#ifndef RADEON_EMULATE_LOOPS_H
+#define RADEON_EMULATE_LOOPS_H
+
+#define MAX_ITERATIONS 8
+
+struct radeon_compiler;
+
+void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions);
+
+#endif /* RADEON_EMULATE_LOOPS_H */
index d593b3e81ae770ade295626b76f2351892ff2211..1dc16855dc13a7e5bc7df35718e46a1da549342f 100644 (file)
@@ -367,6 +367,24 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
                .IsFlowControl = 1,
                .NumSrcRegs = 0
        },
+       {
+               .Opcode = RC_OPCODE_BGNLOOP,
+               .Name = "BGNLOOP",
+               .IsFlowControl = 1,
+               .NumSrcRegs = 0
+       },
+       {
+               .Opcode = RC_OPCODE_BRK,
+               .Name = "BRK",
+               .IsFlowControl = 1,
+               .NumSrcRegs = 0
+       },
+       {
+               .Opcode = RC_OPCODE_ENDLOOP,
+               .Name = "ENDLOOP",
+               .IsFlowControl = 1,
+               .NumSrcRegs = 0,
+       },
        {
                .Opcode = RC_OPCODE_REPL_ALPHA,
                .Name = "REPL_ALPHA",
index 87a2e23084c518a0fa7173251212f33dcdc1fad3..91c82ac0890e0a6124d4891e8e063d4f10f8a589 100644 (file)
@@ -180,6 +180,12 @@ typedef enum {
 
        /** branch instruction: has no effect */
        RC_OPCODE_ENDIF,
+       
+       RC_OPCODE_BGNLOOP,
+
+       RC_OPCODE_BRK,
+
+       RC_OPCODE_ENDLOOP,
 
        /** special instruction, used in R300-R500 fragment program pair instructions
         * indicates that the result of the alpha operation shall be replicated