r300/compiler: Rewrite register allocator
authorTom Stellard <tstellar@gmail.com>
Tue, 11 Jan 2011 08:05:08 +0000 (00:05 -0800)
committerTom Stellard <tstellar@gmail.com>
Sat, 30 Apr 2011 18:00:15 +0000 (11:00 -0700)
The new allocator uses ra and does swizzle packing.

Also, a data structure (struct rc_variable) and associated functions have
been added for generating UD and DU chains.

19 files changed:
src/gallium/drivers/r300/r300_tgsi_to_rc.c
src/mesa/drivers/dri/r300/compiler/Makefile
src/mesa/drivers/dri/r300/compiler/SConscript
src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
src/mesa/drivers/dri/r300/compiler/radeon_list.c [new file with mode: 0644]
src/mesa/drivers/dri/r300/compiler/radeon_list.h [new file with mode: 0644]
src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
src/mesa/drivers/dri/r300/compiler/radeon_program.h
src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
src/mesa/drivers/dri/r300/compiler/radeon_variable.c [new file with mode: 0644]
src/mesa/drivers/dri/r300/compiler/radeon_variable.h [new file with mode: 0644]

index 6a000cfe2c6d435fd366cf30e6ed7d87d8b5da44..0561ab9bfa4526e5c25efb0e54867738eb40500c 100644 (file)
@@ -266,6 +266,7 @@ static void transform_texture(struct rc_instruction * dst, struct tgsi_instructi
             *shadowSamplers |= 1 << dst->U.I.TexSrcUnit;
             break;
     }
+    dst->U.I.TexSwizzle = RC_SWIZZLE_XYZW;
 }
 
 static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_instruction * src)
index 90bd8e86768a655e82cd09dc22b2257f6b483490..5c9f57b4eac016e9626dd0d4cdf269d8b4aa1fd8 100644 (file)
@@ -24,9 +24,11 @@ C_SOURCES = \
                radeon_dataflow.c \
                radeon_dataflow_deadcode.c \
                radeon_dataflow_swizzles.c \
+               radeon_list.c \
                radeon_optimize.c \
                radeon_remove_constants.c \
                radeon_rename_regs.c \
+               radeon_variable.c \
                r3xx_fragprog.c \
                r300_fragprog.c \
                r300_fragprog_swizzle.c \
@@ -49,6 +51,7 @@ INCLUDES = \
        -I. \
        -I$(TOP)/include \
        -I$(TOP)/src/mesa \
+       -I$(TOP)/src/glsl \
 
 
 ##### TARGETS #####
index d44b745562c6a768d17af84bc26b914b9bb38025..bebb9ebe6236fe6baf6ad19205b000b3325e36cf 100755 (executable)
@@ -31,6 +31,8 @@ r300compiler = env.ConvenienceLibrary(
         'radeon_dataflow.c',
         'radeon_dataflow_deadcode.c',
         'radeon_dataflow_swizzles.c',
+        'radeon_variable.c',
+       'radeon_list.c',
         'r3xx_fragprog.c',
         'r300_fragprog.c',
         'r300_fragprog_swizzle.c',
index 8b73409136f401ab552f34948f3c88963e18fa95..e6fd1fde62ddea8016dddb4d0740451d27dcad86 100644 (file)
@@ -93,7 +93,7 @@ static unsigned int use_source(struct r300_fragment_program_code* code, struct r
 
        if (src.File == RC_FILE_CONSTANT) {
                return src.Index | (1 << 5);
-       } else if (src.File == RC_FILE_TEMPORARY) {
+       } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
                use_temporary(code, src.Index);
                return src.Index & 0x1f;
        }
index cdfda0b3196b88b16e4c06733d4f15ec5c9c2044..ff022a5103753fe4310fd0150613706c007d5eda 100644 (file)
@@ -149,8 +149,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
                {"pair translate",              1, 1,           rc_pair_translate,              NULL},
                {"pair scheduling",             1, 1,           rc_pair_schedule,               NULL},
                {"dead sources",                1, 1,           rc_pair_remove_dead_sources, NULL},
-               {"register allocation",         1, opt,         rc_pair_regalloc,               NULL},
-               {"dumb register allocation",    1, !opt,        rc_pair_regalloc_inputs_only,   NULL},
+               {"register allocation",         1, 1,           rc_pair_regalloc,               opt},
                {"final code validation",       0, 1,           rc_validate_final_shader,       NULL},
                {"machine code generation",     0, is_r500,     r500BuildFragmentProgramHwCode, NULL},
                {"machine code generation",     0, !is_r500,    r300BuildFragmentProgramHwCode, NULL},
index c7f79bc53c7bb56bcac2f96b95582c041f8dd62b..5f2588bdfe5e95c06f9082709cd9e8780d2a2a44 100644 (file)
@@ -207,7 +207,7 @@ static unsigned int use_source(struct r500_fragment_program_code* code, struct r
 
        if (src.File == RC_FILE_CONSTANT) {
                return src.Index | R500_RGB_ADDR0_CONST;
-       } else if (src.File == RC_FILE_TEMPORARY) {
+       } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
                use_temporary(code, src.Index);
                return src.Index;
        }
@@ -407,8 +407,11 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst
        code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
                | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
                | R500_TEX_DST_ADDR(inst->DstReg.Index)
-               | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
-               | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
+               | (GET_SWZ(inst->TexSwizzle, 0) << 24)
+               | (GET_SWZ(inst->TexSwizzle, 1) << 26)
+               | (GET_SWZ(inst->TexSwizzle, 2) << 28)
+               | (GET_SWZ(inst->TexSwizzle, 3) << 30)
+               ;
 
        return 1;
 }
index 15ec4418cb87594f0a628ad557afc17f3527cf5b..ae61d20fb9bdc5b59a2ab29bf0b9e603db335cc0 100644 (file)
@@ -124,6 +124,160 @@ unsigned swizzle_mask(unsigned swizzle, unsigned mask)
        return ret;
 }
 
+static unsigned int srcs_need_rewrite(const struct rc_opcode_info * info)
+{
+       if (info->HasTexture) {
+               return 0;
+       }
+       switch (info->Opcode) {
+               case RC_OPCODE_DP2:
+               case RC_OPCODE_DP3:
+               case RC_OPCODE_DP4:
+               case RC_OPCODE_DDX:
+               case RC_OPCODE_DDY:
+                       return 0;
+               default:
+                       return 1;
+       }
+}
+
+static unsigned int adjust_channels(
+       unsigned int old_swizzle,
+       unsigned int conversion_swizzle)
+{
+       unsigned int i;
+       unsigned int new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
+       for (i = 0; i < 4; i++) {
+               unsigned int new_chan = get_swz(conversion_swizzle, i);
+               if (new_chan == RC_SWIZZLE_UNUSED) {
+                       continue;
+               }
+               SET_SWZ(new_swizzle, new_chan, GET_SWZ(old_swizzle, i));
+       }
+       return new_swizzle;
+}
+
+static unsigned int rewrite_writemask(
+       unsigned int old_mask,
+       unsigned int conversion_swizzle)
+{
+       unsigned int new_mask = 0;
+       unsigned int i;
+
+       for (i = 0; i < 4; i++) {
+               if (!GET_BIT(old_mask, i)
+                  || GET_SWZ(conversion_swizzle, i) == RC_SWIZZLE_UNUSED) {
+                       continue;
+               }
+               new_mask |= (1 << GET_SWZ(conversion_swizzle, i));
+       }
+
+       return new_mask;
+}
+
+/**
+ * This function rewrites the writemask of sub and adjusts the swizzles
+ * of all its source registers based on the conversion_swizzle.
+ * conversion_swizzle represents a mapping of the old writemask to the
+ * new writemask.  For a detailed description of how conversion swizzles
+ * work see rc_rewrite_swizzle().
+ */
+void rc_pair_rewrite_writemask(
+       struct rc_pair_sub_instruction * sub,
+       unsigned int conversion_swizzle)
+{
+       const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+       unsigned int i;
+
+       sub->WriteMask = rewrite_writemask(sub->WriteMask, conversion_swizzle);
+
+       if (!srcs_need_rewrite(info)) {
+               return ;
+       }
+
+       for (i = 0; i < info->NumSrcRegs; i++) {
+               sub->Arg[i].Swizzle =
+                       adjust_channels(sub->Arg[i].Swizzle, conversion_swizzle);
+       }
+}
+
+static void normal_rewrite_writemask_cb(
+       void * userdata,
+       struct rc_instruction * inst,
+       struct rc_src_register * src)
+{
+       unsigned int * new_mask = (unsigned int *)userdata;
+       src->Swizzle = adjust_channels(src->Swizzle, *new_mask);
+}
+
+/**
+ * This function is the same as rc_pair_rewrite_writemask() except it
+ * operates on normal instructions.
+ */
+void rc_normal_rewrite_writemask(
+       struct rc_instruction * inst,
+       unsigned int conversion_swizzle)
+{
+       unsigned int new_mask;
+       struct rc_sub_instruction * sub = &inst->U.I;
+       const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+       sub->DstReg.WriteMask =
+               rewrite_writemask(sub->DstReg.WriteMask, conversion_swizzle);
+
+       if (info->HasTexture) {
+               unsigned int i;
+               assert(sub->TexSwizzle == RC_SWIZZLE_XYZW);
+               for (i = 0; i < 4; i++) {
+                       unsigned int swz = GET_SWZ(conversion_swizzle, i);
+                       if (swz > 3)
+                               continue;
+                       SET_SWZ(sub->TexSwizzle, swz, i);
+               }
+       }
+
+       if (!srcs_need_rewrite(info)) {
+               return;
+       }
+
+       new_mask = sub->DstReg.WriteMask;
+       rc_for_all_reads_src(inst, normal_rewrite_writemask_cb, &new_mask);
+}
+
+/**
+ * This function replaces each value 'swz' in swizzle with the value of
+ * GET_SWZ(conversion_swizzle, swz).  So, if you want to change all the X's
+ * in swizzle to Y, then conversion_swizzle should be Y___ (0xff9).  If you want
+ * to change all the Y's in swizzle to X, then conversion_swizzle should be
+ * _X__ (0xfc7).  If you want to change the Y's to X and the X's to Y, then
+ * conversion swizzle should be YX__ (0xfc1).
+ * @param swizzle The swizzle to change
+ * @param conversion_swizzle Describes the conversion to perform on the swizzle
+ * @return A converted swizzle
+ */
+unsigned int rc_rewrite_swizzle(
+       unsigned int swizzle,
+       unsigned int conversion_swizzle)
+{
+       unsigned int chan;
+       unsigned int out_swizzle = swizzle;
+
+       for (chan = 0; chan < 4; chan++) {
+               unsigned int swz = GET_SWZ(swizzle, chan);
+               unsigned int new_swz;
+               if (swz > 3) {
+                       SET_SWZ(out_swizzle, chan, swz);
+               } else {
+                       new_swz = GET_SWZ(conversion_swizzle, swz);
+                       if (new_swz != RC_SWIZZLE_UNUSED) {
+                               SET_SWZ(out_swizzle, chan, new_swz);
+                       } else {
+                               SET_SWZ(out_swizzle, chan, swz);
+                       }
+               }
+       }
+       return out_swizzle;
+}
+
 /**
  * Left multiplication of a register with a swizzle
  */
@@ -281,3 +435,105 @@ unsigned int rc_inst_can_use_presub(
        return 1;
 }
 
+struct max_data {
+       unsigned int Max;
+       unsigned int HasFileType;
+       rc_register_file File;
+};
+
+static void max_callback(
+       void * userdata,
+       struct rc_instruction * inst,
+       rc_register_file file,
+       unsigned int index,
+       unsigned int mask)
+{
+       struct max_data * d = (struct max_data*)userdata;
+       if (file == d->File && (!d->HasFileType || index > d->Max)) {
+               d->Max = index;
+               d->HasFileType = 1;
+       }
+}
+
+/**
+ * @return The maximum index of the specified register file used by the
+ * program.
+ */
+int rc_get_max_index(
+       struct radeon_compiler * c,
+       rc_register_file file)
+{
+       struct max_data data;
+       data.Max = 0;
+       data.HasFileType = 0;
+       data.File = file;
+       struct rc_instruction * inst;
+       for (inst = c->Program.Instructions.Next;
+                                       inst != &c->Program.Instructions;
+                                       inst = inst->Next) {
+               rc_for_all_reads_mask(inst, max_callback, &data);
+               rc_for_all_writes_mask(inst, max_callback, &data);
+       }
+       if (!data.HasFileType) {
+               return -1;
+       } else {
+               return data.Max;
+       }
+}
+
+static unsigned int get_source_readmask(
+       struct rc_pair_sub_instruction * sub,
+       unsigned int source,
+       unsigned int src_type)
+{
+       unsigned int i;
+       unsigned int readmask = 0;
+       const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+
+       for (i = 0; i < info->NumSrcRegs; i++) {
+               if (sub->Arg[i].Source != source
+                   || src_type != rc_source_type_swz(sub->Arg[i].Swizzle)) {
+                       continue;
+               }
+               readmask |= rc_swizzle_to_writemask(sub->Arg[i].Swizzle);
+       }
+       return readmask;
+}
+
+/**
+ * This function attempts to remove a source from a pair instructions.
+ * @param inst
+ * @param src_type RC_SOURCE_RGB, RC_SOURCE_ALPHA, or both bitwise or'd
+ * @param source The index of the source to remove
+ * @param new_readmask A mask representing the components that are read by
+ * the source that is intended to replace the one you are removing.  If you
+ * want to remove a source only and not replace it, this parameter should be
+ * zero.
+ * @return 1 if the source was successfully removed, 0 if it was not
+ */
+unsigned int rc_pair_remove_src(
+       struct rc_instruction * inst,
+       unsigned int src_type,
+       unsigned int source,
+       unsigned int new_readmask)
+{
+       unsigned int readmask = 0;
+
+       readmask |= get_source_readmask(&inst->U.P.RGB, source, src_type);
+       readmask |= get_source_readmask(&inst->U.P.Alpha, source, src_type);
+
+       if ((new_readmask & readmask) != readmask)
+               return 0;
+
+       if (src_type & RC_SOURCE_RGB) {
+               memset(&inst->U.P.RGB.Src[source], 0,
+                       sizeof(struct rc_pair_instruction_source));
+       }
+
+       if (src_type & RC_SOURCE_ALPHA) {
+               memset(&inst->U.P.Alpha.Src[source], 0,
+                       sizeof(struct rc_pair_instruction_source));
+       }
+
+       return 1;
+}
index dd0f6c66156ce7e5d69e73344ac2aacc60625be8..43ef87720920ddb53d072bbafd66a5403c06c835 100644 (file)
@@ -3,7 +3,10 @@
 #ifndef RADEON_PROGRAM_UTIL_H
 #define RADEON_PROGRAM_UTIL_H
 
+struct radeon_compiler;
 struct rc_instruction;
+struct rc_pair_instruction;
+struct rc_pair_sub_instruction;
 struct rc_src_register;
 
 unsigned int rc_swizzle_to_writemask(unsigned int swz);
@@ -22,6 +25,18 @@ rc_swizzle rc_mask_to_swizzle(unsigned int mask);
 
 unsigned swizzle_mask(unsigned swizzle, unsigned mask);
 
+void rc_pair_rewrite_writemask(
+       struct rc_pair_sub_instruction * sub,
+       unsigned int conversion_swizzle);
+
+void rc_normal_rewrite_writemask(
+       struct rc_instruction * inst,
+       unsigned int conversion_swizzle);
+
+unsigned int rc_rewrite_swizzle(
+       unsigned int swizzle,
+       unsigned int new_mask);
+
 struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg);
 
 void reset_srcreg(struct rc_src_register* reg);
@@ -46,4 +61,14 @@ unsigned int rc_inst_can_use_presub(
        struct rc_src_register presub_src0,
        struct rc_src_register presub_src1);
 
+int rc_get_max_index(
+       struct radeon_compiler * c,
+       rc_register_file file);
+
+unsigned int rc_pair_remove_src(
+       struct rc_instruction * inst,
+       unsigned int src_type,
+       unsigned int source,
+       unsigned int new_readmask);
+
 #endif /* RADEON_PROGRAM_UTIL_H */
index 3c0ab10aa56b1a14638865a462b1e5db9f7b370e..966b7f825f898aad137ede8e2478e116cd56081c 100644 (file)
@@ -151,6 +151,7 @@ static void pair_sub_for_all_args(
                        unsigned int presub_src_count;
                        struct rc_pair_instruction_source * src_array;
                        unsigned int j;
+//                     fprintf(stderr, "Presubtract reader\n");
                        if (src_type & RC_SOURCE_RGB) {
                                presub_type = fullinst->
                                        U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index;
@@ -165,7 +166,9 @@ static void pair_sub_for_all_args(
                        for(j = 0; j < presub_src_count; j++) {
                                cb(userdata, fullinst, &sub->Arg[i],
                                                                &src_array[j]);
+//                             fprintf(stderr, "Callback for presub %u type=%u\n", j, src_type);
                        }
+//                     fprintf(stderr, "Done presubtract reader\n");
                } else {
                        struct rc_pair_instruction_source * src =
                                rc_pair_get_src(&fullinst->U.P, &sub->Arg[i]);
@@ -586,6 +589,8 @@ static void get_readers_pair_read_callback(
                                0 /*Pair Instructions don't use RelAddr*/,
                                src->File, src->Index, arg->Swizzle);
 
+//     fprintf(stderr, "Shared mask = %u for [%u].%u writemask=%u abort=%u exit=%u\n",
+//                     shared_mask, src->Index, arg->Swizzle, d->AliveWriteMask,d->ReaderData->Abort, d->ReaderData->ExitOnAbort);
        if (shared_mask == RC_MASK_NONE)
                return;
 
@@ -775,6 +780,26 @@ static void get_readers_for_single_write(
        }
 }
 
+static void init_get_readers_callback_data(
+       struct get_readers_callback_data * d,
+       struct rc_reader_data * reader_data,
+       struct radeon_compiler * c,
+       rc_read_src_fn read_normal_cb,
+       rc_pair_read_arg_fn read_pair_cb,
+       rc_read_write_mask_fn write_cb)
+{
+       reader_data->Abort = 0;
+       reader_data->ReaderCount = 0;
+       reader_data->ReadersReserved = 0;
+       reader_data->Readers = NULL;
+
+       d->C = c;
+       d->ReaderData = reader_data;
+       d->ReadNormalCB = read_normal_cb;
+       d->ReadPairCB = read_pair_cb;
+       d->WriteCB = write_cb;
+}
+
 /**
  * This function will create a list of readers via the rc_reader_data struct.
  * This function will abort (set the flag data->Abort) and return if it
@@ -823,16 +848,28 @@ void rc_get_readers(
 {
        struct get_readers_callback_data d;
 
-       data->Abort = 0;
-       data->ReaderCount = 0;
-       data->ReadersReserved = 0;
-       data->Readers = NULL;
-
-       d.C = c;
-       d.ReaderData = data;
-       d.ReadNormalCB = read_normal_cb;
-       d.ReadPairCB = read_pair_cb;
-       d.WriteCB = write_cb;
+       init_get_readers_callback_data(&d, data, c, read_normal_cb,
+                                               read_pair_cb, write_cb);
 
        rc_for_all_writes_mask(writer, get_readers_for_single_write, &d);
 }
+
+void rc_get_readers_sub(
+       struct radeon_compiler * c,
+       struct rc_instruction * writer,
+       struct rc_pair_sub_instruction * sub_writer,
+       struct rc_reader_data * data,
+       rc_read_src_fn read_normal_cb,
+       rc_pair_read_arg_fn read_pair_cb,
+       rc_read_write_mask_fn write_cb)
+{
+       struct get_readers_callback_data d;
+
+       init_get_readers_callback_data(&d, data, c, read_normal_cb,
+                                               read_pair_cb, write_cb);
+
+       if (sub_writer->WriteMask) {
+               get_readers_for_single_write(&d, writer, RC_FILE_TEMPORARY,
+                       sub_writer->DestIndex, sub_writer->WriteMask);
+       }
+}
index 1e30cc696953d38414ee776bf2c74aea44c28330..6667ae1c4f3e9bec380da20b7575b087b8e22996 100644 (file)
@@ -37,6 +37,7 @@ struct rc_swizzle_caps;
 struct rc_src_register;
 struct rc_pair_instruction_arg;
 struct rc_pair_instruction_source;
+struct rc_pair_sub_instruction;
 struct rc_compiler;
 
 
@@ -107,6 +108,15 @@ void rc_get_readers(
        rc_read_src_fn read_normal_cb,
        rc_pair_read_arg_fn read_pair_cb,
        rc_read_write_mask_fn write_cb);
+
+void rc_get_readers_sub(
+       struct radeon_compiler * c,
+       struct rc_instruction * writer,
+       struct rc_pair_sub_instruction * sub_writer,
+       struct rc_reader_data * data,
+       rc_read_src_fn read_normal_cb,
+       rc_pair_read_arg_fn read_pair_cb,
+       rc_read_write_mask_fn write_cb);
 /**
  * Compiler passes based on dataflow analysis.
  */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_list.c b/src/mesa/drivers/dri/r300/compiler/radeon_list.c
new file mode 100644 (file)
index 0000000..9b2ba80
--- /dev/null
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_list.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "memory_pool.h"
+
+struct rc_list * rc_list(struct memory_pool * pool, void * item)
+{
+       struct rc_list * new = memory_pool_malloc(pool, sizeof(struct rc_list));
+       new->Item = item;
+       new->Next = NULL;
+       new->Prev = NULL;
+}
+
+void rc_list_add(struct rc_list ** list, struct rc_list * new_value)
+{
+       struct rc_list * temp;
+
+       if (*list == NULL) {
+               *list = new_value;
+               return;
+       }
+
+       for (temp = *list; temp->Next; temp = temp->Next);
+
+       temp->Next = new_value;
+       new_value->Prev = temp;
+}
+
+void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value)
+{
+       if (*list == rm_value) {
+               *list = rm_value->Next;
+               return;
+       }
+
+       rm_value->Prev->Next = rm_value->Next;
+       if (rm_value->Next) {
+               rm_value->Next->Prev = rm_value->Prev;
+       }
+}
+
+unsigned int rc_list_count(struct rc_list * list)
+{
+       unsigned int count = 0;
+       while (list) {
+               count++;
+               list = list->Next;
+       }
+       return count;
+}
+
+void rc_list_print(struct rc_list * list)
+{
+       while(list) {
+               fprintf(stderr, "%p->", list->Item);
+               list = list->Next;
+       }
+       fprintf(stderr, "\n");
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_list.h b/src/mesa/drivers/dri/r300/compiler/radeon_list.h
new file mode 100644 (file)
index 0000000..b3c8f89
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_LIST_H
+#define RADEON_LIST_H
+
+struct memory_pool;
+
+struct rc_list {
+       void * Item;
+       struct rc_list * Prev;
+       struct rc_list * Next;
+};
+
+struct rc_list * rc_list(struct memory_pool * pool, void * item);
+void rc_list_add(struct rc_list ** list, struct rc_list * new_value);
+void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value);
+unsigned int rc_list_count(struct rc_list * list);
+void rc_list_print(struct rc_list * list);
+
+#endif /* RADEON_LIST_H */
+
index d53181e1f75646ff6e0513a16c18a81986f4008f..52c0216b64b0965e68eef47d408c1a95df65fd64 100644 (file)
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
  *
  * All Rights Reserved.
  *
 
 #include <stdio.h>
 
+#include "main/glheader.h"
+#include "program/register_allocate.h"
+#include "ralloc.h"
+
 #include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
 #include "radeon_dataflow.h"
-
+#include "radeon_list.h"
+#include "radeon_variable.h"
 
 #define VERBOSE 0
 
 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
 
 
-struct live_intervals {
-       int Start;
-       int End;
-       struct live_intervals * Next;
-};
 
 struct register_info {
-       struct live_intervals Live;
+       struct live_intervals Live[4];
 
        unsigned int Used:1;
        unsigned int Allocated:1;
        unsigned int File:3;
        unsigned int Index:RC_REGISTER_INDEX_BITS;
-};
-
-struct hardware_register {
-       struct live_intervals * Used;
+       unsigned int Writemask;
 };
 
 struct regalloc_state {
        struct radeon_compiler * C;
 
-       struct register_info Input[RC_REGISTER_MAX_INDEX];
-       struct register_info Temporary[RC_REGISTER_MAX_INDEX];
-
-       struct hardware_register * HwTemporary;
-       unsigned int NumHwTemporaries;
-       /**
-        * If an instruction is inside of a loop, EndLoop will be the
-        * IP of the ENDLOOP instruction, and BeginLoop will be the IP
-        * of the BGNLOOP instruction.  Otherwise, EndLoop and BeginLoop
-        * will be -1.
-        */
-       int EndLoop;
-       int BeginLoop;
+       struct register_info * Input;
+       unsigned int NumInputs;
+
+       struct register_info * Temporary;
+       unsigned int NumTemporaries;
+
+       unsigned int Simple;
+       unsigned int HasLoop;
+};
+
+enum rc_reg_class {
+       RC_REG_CLASS_SINGLE,
+       RC_REG_CLASS_DOUBLE,
+       RC_REG_CLASS_TRIPLE,
+       RC_REG_CLASS_ALPHA,
+       RC_REG_CLASS_SINGLE_PLUS_ALPHA,
+       RC_REG_CLASS_DOUBLE_PLUS_ALPHA,
+       RC_REG_CLASS_TRIPLE_PLUS_ALPHA,
+       RC_REG_CLASS_X,
+       RC_REG_CLASS_Y,
+       RC_REG_CLASS_Z,
+       RC_REG_CLASS_XY,
+       RC_REG_CLASS_YZ,
+       RC_REG_CLASS_XZ,
+       RC_REG_CLASS_XW,
+       RC_REG_CLASS_YW,
+       RC_REG_CLASS_ZW,
+       RC_REG_CLASS_XYW,
+       RC_REG_CLASS_YZW,
+       RC_REG_CLASS_XZW,
+       RC_REG_CLASS_COUNT
+};
+
+struct rc_class {
+       enum rc_reg_class Class;
+
+       unsigned int WritemaskCount;
+
+       /** This is 1 if this class is being used by the register allocator
+        * and 0 otherwise */
+       unsigned int Used;
+
+       /** This is the ID number assigned to this class by ra. */
+       unsigned int Id;
+
+       /** List of writemasks that belong to this class */
+       unsigned int Writemasks[3];
+
+
 };
 
 static void print_live_intervals(struct live_intervals * src)
 {
-       if (!src) {
+       if (!src || !src->Used) {
                DBG("(null)");
                return;
        }
 
-       while(src) {
-               DBG("(%i,%i)", src->Start, src->End);
-               src = src->Next;
-       }
+       DBG("(%i,%i)", src->Start, src->End);
 }
 
-static void add_live_intervals(struct regalloc_state * s,
-               struct live_intervals ** dst, struct live_intervals * src)
+static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
 {
-       struct live_intervals ** dst_backup = dst;
-
        if (VERBOSE) {
-               DBG("add_live_intervals: ");
-               print_live_intervals(*dst);
+               DBG("overlap_live_intervals: ");
+               print_live_intervals(a);
                DBG(" to ");
-               print_live_intervals(src);
+               print_live_intervals(b);
                DBG("\n");
        }
 
-       while(src) {
-               if (*dst && (*dst)->End < src->Start) {
-                       dst = &(*dst)->Next;
-               } else if (!*dst || (*dst)->Start > src->End) {
-                       struct live_intervals * li = memory_pool_malloc(&s->C->Pool, sizeof(*li));
-                       li->Start = src->Start;
-                       li->End = src->End;
-                       li->Next = *dst;
-                       *dst = li;
-                       src = src->Next;
-               } else {
-                       if (src->End > (*dst)->End)
-                               (*dst)->End = src->End;
-                       if (src->Start < (*dst)->Start)
-                               (*dst)->Start = src->Start;
-                       src = src->Next;
-               }
-       }
-
-       if (VERBOSE) {
-               DBG("    result: ");
-               print_live_intervals(*dst_backup);
-               DBG("\n");
-       }
-}
-
-static int overlap_live_intervals(struct live_intervals * dst, struct live_intervals * src)
-{
-       if (VERBOSE) {
-               DBG("overlap_live_intervals: ");
-               print_live_intervals(dst);
-               DBG(" to ");
-               print_live_intervals(src);
-               DBG("\n");
+       if (!a->Used || !b->Used) {
+               DBG("    unused interval\n");
+               return 0;
        }
 
-       while(src && dst) {
-               if (dst->End <= src->Start) {
-                       dst = dst->Next;
-               } else if (dst->End <= src->End) {
+       if (a->Start > b->Start) {
+               if (a->Start < b->End) {
                        DBG("    overlap\n");
                        return 1;
-               } else if (dst->Start < src->End) {
+               }
+       } else if (b->Start > a->Start) {
+               if (b->Start < a->End) {
+                       DBG("    overlap\n");
+                       return 1;
+               }
+       } else { /* a->Start == b->Start */
+               if (a->Start != a->End && b->Start != b->End) {
                        DBG("    overlap\n");
                        return 1;
-               } else {
-                       src = src->Next;
                }
        }
 
@@ -156,92 +157,26 @@ static int overlap_live_intervals(struct live_intervals * dst, struct live_inter
        return 0;
 }
 
-static int try_add_live_intervals(struct regalloc_state * s,
-               struct live_intervals ** dst, struct live_intervals * src)
-{
-       if (overlap_live_intervals(*dst, src))
-               return 0;
-
-       add_live_intervals(s, dst, src);
-       return 1;
-}
-
-static void scan_callback(void * data, struct rc_instruction * inst,
+static void scan_read_callback(void * data, struct rc_instruction * inst,
                rc_register_file file, unsigned int index, unsigned int mask)
 {
        struct regalloc_state * s = data;
        struct register_info * reg;
+       unsigned int i;
 
-       if (file == RC_FILE_TEMPORARY)
-               reg = &s->Temporary[index];
-       else if (file == RC_FILE_INPUT)
-               reg = &s->Input[index];
-       else
+       if (file != RC_FILE_INPUT)
                return;
 
-       if (!reg->Used) {
-               reg->Used = 1;
-               if (file == RC_FILE_INPUT)
-                       reg->Live.Start = -1;
-               else if (s->BeginLoop >= 0)
-                       reg->Live.Start = s->BeginLoop;
-               else
-                       reg->Live.Start = inst->IP;
-               reg->Live.End = inst->IP;
-       } else if (s->EndLoop >= 0)
-               reg->Live.End = s->EndLoop;
-       else if (inst->IP > reg->Live.End)
-               reg->Live.End = inst->IP;
-}
-
-static void compute_live_intervals(struct radeon_compiler *c,
-                                  struct regalloc_state *s)
-{
-       memset(s, 0, sizeof(*s));
-       s->C = c;
-       s->NumHwTemporaries = c->max_temp_regs;
-       s->BeginLoop = -1;
-       s->EndLoop = -1;
-       s->HwTemporary =
-               memory_pool_malloc(&c->Pool,
-                                  s->NumHwTemporaries * sizeof(struct hardware_register));
-       memset(s->HwTemporary, 0, s->NumHwTemporaries * sizeof(struct hardware_register));
-
-       rc_recompute_ips(s->C);
-
-       for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
-           inst != &s->C->Program.Instructions;
-           inst = inst->Next) {
-
-               /* For all instructions inside of a loop, the ENDLOOP
-                * instruction is used as the end of the live interval and
-                * the BGNLOOP instruction is used as the beginning. */
-               if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && s->EndLoop < 0) {
-                       int loops = 1;
-                       struct rc_instruction * tmp;
-                       s->BeginLoop = inst->IP;
-                       for(tmp = inst->Next;
-                                       tmp != &s->C->Program.Instructions;
-                                       tmp = tmp->Next) {
-                               if (tmp->U.I.Opcode == RC_OPCODE_BGNLOOP) {
-                                       loops++;
-                               } else if (tmp->U.I.Opcode
-                                                       == RC_OPCODE_ENDLOOP) {
-                                       if(!--loops) {
-                                               s->EndLoop = tmp->IP;
-                                               break;
-                                       }
-                               }
-                       }
-               }
+       s->Input[index].Used = 1;
+       reg = &s->Input[index];
 
-               if (inst->IP == s->EndLoop) {
-                       s->EndLoop = -1;
-                       s->BeginLoop = -1;
+       for (i = 0; i < 4; i++) {
+               if (!((mask >> i) & 0x1)) {
+                       continue;
                }
-
-               rc_for_all_reads_mask(inst, scan_callback, s);
-               rc_for_all_writes_mask(inst, scan_callback, s);
+               reg->Live[i].Used = 1;
+               reg->Live[i].Start = 0;
+               reg->Live[i].End = inst->IP;
        }
 }
 
@@ -251,7 +186,7 @@ static void remap_register(void * data, struct rc_instruction * inst,
        struct regalloc_state * s = data;
        const struct register_info * reg;
 
-       if (*file == RC_FILE_TEMPORARY)
+       if (*file == RC_FILE_TEMPORARY && s->Simple)
                reg = &s->Temporary[*index];
        else if (*file == RC_FILE_INPUT)
                reg = &s->Input[*index];
@@ -259,106 +194,473 @@ static void remap_register(void * data, struct rc_instruction * inst,
                return;
 
        if (reg->Allocated) {
-               *file = reg->File;
                *index = reg->Index;
        }
 }
 
-static void do_regalloc(struct regalloc_state * s)
+static void alloc_input_simple(void * data, unsigned int input,
+                                                       unsigned int hwreg)
 {
-       /* Simple and stupid greedy register allocation */
-       for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
-               struct register_info * reg = &s->Temporary[index];
+       struct regalloc_state * s = data;
 
-               if (!reg->Used)
-                       continue;
+       if (input >= s->NumInputs)
+               return;
+
+       s->Input[input].Allocated = 1;
+       s->Input[input].File = RC_FILE_TEMPORARY;
+       s->Input[input].Index = hwreg;
+}
 
-               for(unsigned int hwreg = 0; hwreg < s->NumHwTemporaries; ++hwreg) {
-                       if (try_add_live_intervals(s, &s->HwTemporary[hwreg].Used, &reg->Live)) {
-                               reg->Allocated = 1;
-                               reg->File = RC_FILE_TEMPORARY;
-                               reg->Index = hwreg;
-                               goto success;
+/* This functions offsets the temporary register indices by the number
+ * of input registers, because input registers are actually temporaries and
+ * should not occupy the same space.
+ *
+ * This pass is supposed to be used to maintain correct allocation of inputs
+ * if the standard register allocation is disabled. */
+static void do_regalloc_inputs_only(struct regalloc_state * s)
+{
+       for (unsigned i = 0; i < s->NumTemporaries; i++) {
+               s->Temporary[i].Allocated = 1;
+               s->Temporary[i].File = RC_FILE_TEMPORARY;
+               s->Temporary[i].Index = i + s->NumInputs;
+       }
+}
+
+static unsigned int is_derivative(rc_opcode op)
+{
+       return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
+}
+
+static enum rc_reg_class variable_get_class(
+       struct rc_variable * variable,
+       struct rc_class * classes)
+{
+       unsigned int i;
+       unsigned int can_change_writemask= 1;
+       unsigned int writemask = rc_variable_writemask_sum(variable);
+       struct rc_list * readers = rc_variable_readers_union(variable);
+
+       if (!variable->C->is_r500) {
+               unsigned int mask_count = 0;
+               /* The assumption here is that if an instruction has type
+                * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
+                * r300 and r400 can't swizzle the result of a TEX lookup. */
+               if (variable->Inst->Type == RC_INSTRUCTION_NORMAL) {
+                       writemask = RC_MASK_XYZW;
+               }
+               for (i = 0; i < 4; i++) {
+                       if (GET_BIT(writemask, i)) {
+                               mask_count++;
                        }
                }
+               /* XXX We should do swizzle packing for r300 and r400 here.
+                * We need to figure out how not to create non-native
+                * swizzles. */
+               if (mask_count > 1) {
+                       can_change_writemask = 0;
+               }
+       }
 
-               rc_error(s->C, "Ran out of hardware temporaries\n");
-               return;
-
-       success:;
+       if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
+               /* DDX/DDY seem to always fail when their writemasks are
+                * changed.*/
+               if (is_derivative(variable->Inst->U.P.RGB.Opcode)
+                   || is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
+                       can_change_writemask = 0;
+               }
        }
+       for ( ; readers; readers = readers->Next) {
+               struct rc_reader * r = readers->Item;
+               if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
+                       if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
+                               can_change_writemask = 0;
+                               break;
+                       }
+                       /* DDX/DDY also fail when their swizzles are changed. */
+                       if (is_derivative(r->Inst->U.P.RGB.Opcode)
+                           || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
+                               can_change_writemask = 0;
+                               break;
+                       }
+               }
+       }
+       for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
+               unsigned int j;
+               if (!can_change_writemask && classes[i].WritemaskCount > 1) {
+                       continue;
+               }
+               for (j = 0; j < 3; j++) {
+                       if (classes[i].Writemasks[j] == writemask) {
+                               return classes[i].Class;
+                       }
+               }
+       }
+       rc_error(variable->C, "Could not find class for index=%u mask=%u\n",
+                               variable->Dst.Index, writemask);
+       return 0;
+}
 
-       /* Rewrite all instructions based on the translation table we built */
-       for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
-           inst != &s->C->Program.Instructions;
-           inst = inst->Next) {
-               rc_remap_registers(inst, &remap_register, s);
+static unsigned int overlap_live_intervals_array(
+       struct live_intervals * a,
+       struct live_intervals * b)
+{
+       unsigned int a_chan, b_chan;
+       for (a_chan = 0; a_chan < 4; a_chan++) {
+               for (b_chan = 0; b_chan < 4; b_chan++) {
+                       if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
+                                       return 1;
+                       }
+               }
        }
+       return 0;
 }
 
-static void alloc_input(void * data, unsigned int input, unsigned int hwreg)
+static unsigned int reg_get_index(int reg)
 {
-       struct regalloc_state * s = data;
+       return reg / RC_MASK_XYZW;
+}
 
-       if (!s->Input[input].Used)
-               return;
+static unsigned int reg_get_writemask(int reg)
+{
+       return (reg % RC_MASK_XYZW) + 1;
+}
 
-       add_live_intervals(s, &s->HwTemporary[hwreg].Used, &s->Input[input].Live);
+static int get_reg_id(unsigned int index, unsigned int writemask)
+{
+       assert(writemask);
+       if (writemask == 0) {
+               return 0;
+       }
+       return (index * RC_MASK_XYZW) + (writemask - 1);
+}
 
-       s->Input[input].Allocated = 1;
-       s->Input[input].File = RC_FILE_TEMPORARY;
-       s->Input[input].Index = hwreg;
+#if VERBOSE
+static void print_reg(int reg)
+{
+       unsigned int index = reg_get_index(reg);
+       unsigned int mask = reg_get_writemask(reg);
+       fprintf(stderr, "Temp[%u].%c%c%c%c", index,
+               mask & RC_MASK_X ? 'x' : '_',
+               mask & RC_MASK_Y ? 'y' : '_',
+               mask & RC_MASK_Z ? 'z' : '_',
+               mask & RC_MASK_W ? 'w' : '_');
+}
+#endif
 
+static void add_register_conflicts(
+       struct ra_regs * regs,
+       unsigned int max_temp_regs)
+{
+       unsigned int index, a_mask, b_mask;
+       for (index = 0; index < max_temp_regs; index++) {
+               for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
+                       for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
+                                                               b_mask++) {
+                               if (a_mask & b_mask) {
+                                       ra_add_reg_conflict(regs,
+                                               get_reg_id(index, a_mask),
+                                               get_reg_id(index, b_mask));
+                               }
+                       }
+               }
+       }
 }
 
-void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
+static void do_advanced_regalloc(struct regalloc_state * s)
 {
-       struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
-       struct regalloc_state s;
+       struct rc_class rc_class_list [] = {
+               {RC_REG_CLASS_SINGLE, 3, 0, 0,
+                       {RC_MASK_X,
+                        RC_MASK_Y,
+                        RC_MASK_Z}},
+               {RC_REG_CLASS_DOUBLE, 3, 0, 0,
+                       {RC_MASK_X | RC_MASK_Y,
+                        RC_MASK_X | RC_MASK_Z,
+                        RC_MASK_Y | RC_MASK_Z}},
+               {RC_REG_CLASS_TRIPLE, 1, 0, 0,
+                       {RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
+                        RC_MASK_NONE,
+                        RC_MASK_NONE}},
+               {RC_REG_CLASS_ALPHA, 1, 0, 0,
+                       {RC_MASK_W,
+                        RC_MASK_NONE,
+                        RC_MASK_NONE}},
+               {RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 0, 0,
+                       {RC_MASK_X | RC_MASK_W,
+                        RC_MASK_Y | RC_MASK_W,
+                        RC_MASK_Z | RC_MASK_W}},
+               {RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 0, 0,
+                       {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
+                        RC_MASK_X | RC_MASK_Z | RC_MASK_W,
+                        RC_MASK_Y | RC_MASK_Z | RC_MASK_W}},
+               {RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 0, 0,
+                       {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}},
+               {RC_REG_CLASS_X, 1, 0, 0,
+                       {RC_MASK_X,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}},
+               {RC_REG_CLASS_Y, 1, 0, 0,
+                       {RC_MASK_Y,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}},
+               {RC_REG_CLASS_Z, 1, 0, 0,
+                       {RC_MASK_Z,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}},
+               {RC_REG_CLASS_XY, 1, 0, 0,
+                       {RC_MASK_X | RC_MASK_Y,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}},
+               {RC_REG_CLASS_YZ, 1, 0, 0,
+                       {RC_MASK_Y | RC_MASK_Z,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}},
+               {RC_REG_CLASS_XZ, 1, 0, 0,
+                       {RC_MASK_X | RC_MASK_Z,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}},
+               {RC_REG_CLASS_XW, 1, 0, 0,
+                       {RC_MASK_X | RC_MASK_W,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}},
+               {RC_REG_CLASS_YW, 1, 0, 0,
+                       {RC_MASK_Y | RC_MASK_W,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}},
+               {RC_REG_CLASS_ZW, 1, 0, 0,
+                       {RC_MASK_Z | RC_MASK_W,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}},
+               {RC_REG_CLASS_XYW, 1, 0, 0,
+                       {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}},
+               {RC_REG_CLASS_YZW, 1, 0, 0,
+                       {RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}},
+               {RC_REG_CLASS_XZW, 1, 0, 0,
+                       {RC_MASK_X | RC_MASK_Z | RC_MASK_W,
+                       RC_MASK_NONE,
+                       RC_MASK_NONE}}
+       };
+
+       unsigned int i, j, index, input_node, node_count, node_index;
+       unsigned int * node_classes;
+       unsigned int * input_classes;
+       struct rc_instruction * inst;
+       struct rc_list * var_ptr;
+       struct rc_list * variables;
+       struct ra_regs * regs;
+       struct ra_graph * graph;
+
+       /* Allocate the main ra data structure */
+       regs = ra_alloc_reg_set(s->C->max_temp_regs * RC_MASK_XYZW);
+
+       /* Get list of program variables */
+       variables = rc_get_variables(s->C);
+       node_count = rc_list_count(variables);
+       node_classes = memory_pool_malloc(&s->C->Pool,
+                       node_count * sizeof(unsigned int));
+       input_classes = memory_pool_malloc(&s->C->Pool,
+                       s->NumInputs * sizeof(unsigned int));
+
+       for (var_ptr = variables, node_index = 0; var_ptr;
+                                       var_ptr = var_ptr->Next, node_index++) {
+               unsigned int class_index;
+               /* Compute the live intervals */
+               rc_variable_compute_live_intervals(var_ptr->Item);
+
+               class_index = variable_get_class(var_ptr->Item, rc_class_list);
+
+               /* If we haven't used this register class yet, mark it
+                * as used and allocate space for it. */
+               if (!rc_class_list[class_index].Used) {
+                       rc_class_list[class_index].Used = 1;
+                       rc_class_list[class_index].Id = ra_alloc_reg_class(regs);
+               }
 
-       compute_live_intervals(cc, &s);
+               node_classes[node_index] = rc_class_list[class_index].Id;
+       }
 
-       c->AllocateHwInputs(c, &alloc_input, &s);
 
-       do_regalloc(&s);
-}
+       /* Assign registers to the classes */
+       for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
+               struct rc_class class = rc_class_list[i];
+               if (!class.Used) {
+                       continue;
+               }
 
-/* This functions offsets the temporary register indices by the number
- * of input registers, because input registers are actually temporaries and
- * should not occupy the same space.
- *
- * This pass is supposed to be used to maintain correct allocation of inputs
- * if the standard register allocation is disabled. */
-void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user)
-{
-       struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
-       struct regalloc_state s;
-       int temp_reg_offset;
+               for (index = 0; index < s->C->max_temp_regs; index++) {
+                       for (j = 0; j < class.WritemaskCount; j++) {
+                               int reg_id = get_reg_id(index,
+                                                       class.Writemasks[j]);
+                               ra_class_add_reg(regs, class.Id, reg_id);
+                       }
+               }
+       }
+
+       /* Add register conflicts */
+       add_register_conflicts(regs, s->C->max_temp_regs);
+
+       /* Calculate live intervals for input registers */
+       for (inst = s->C->Program.Instructions.Next;
+                                       inst != &s->C->Program.Instructions;
+                                       inst = inst->Next) {
+               rc_for_all_reads_mask(inst, scan_read_callback, s);
+       }
+
+       /* Create classes for input registers */
+       for (i = 0; i < s->NumInputs; i++) {
+               unsigned int chan, class_id, writemask = 0;
+               for (chan = 0; chan < 4; chan++) {
+                       if (s->Input[i].Live[chan].Used) {
+                               writemask |= (1 << chan);
+                       }
+               }
+               s->Input[i].Writemask = writemask;
+               if (!writemask) {
+                       continue;
+               }
+
+               class_id = ra_alloc_reg_class(regs);
+               input_classes[i] = class_id;
+               ra_class_add_reg(regs, class_id,
+                               get_reg_id(s->Input[i].Index, writemask));
+       }
+
+       ra_set_finalize(regs);
+
+       graph = ra_alloc_interference_graph(regs, node_count + s->NumInputs);
 
-       compute_live_intervals(cc, &s);
+       /* Build the interference graph */
+       for (var_ptr = variables, node_index = 0; var_ptr;
+                                       var_ptr = var_ptr->Next,node_index++) {
+               struct rc_list * a, * b;
+               unsigned int b_index;
 
-       c->AllocateHwInputs(c, &alloc_input, &s);
+               ra_set_node_class(graph, node_index, node_classes[node_index]);
 
-       temp_reg_offset = 0;
-       for (unsigned i = 0; i < RC_REGISTER_MAX_INDEX; i++) {
-               if (s.Input[i].Allocated && temp_reg_offset <= s.Input[i].Index)
-                       temp_reg_offset = s.Input[i].Index + 1;
+               for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
+                                               b; b = b->Next, b_index++) {
+                       struct rc_variable * var_a = a->Item;
+                       while (var_a) {
+                               struct rc_variable * var_b = b->Item;
+                               while (var_b) {
+                                       if (overlap_live_intervals_array(var_a->Live, var_b->Live)) {
+                                               ra_add_node_interference(graph,
+                                                       node_index, b_index);
+                                       }
+                                       var_b = var_b->Friend;
+                               }
+                               var_a = var_a->Friend;
+                       }
+               }
        }
 
-       if (temp_reg_offset) {
-               for (unsigned i = 0; i < RC_REGISTER_MAX_INDEX; i++) {
-                       if (s.Temporary[i].Used) {
-                               s.Temporary[i].Allocated = 1;
-                               s.Temporary[i].File = RC_FILE_TEMPORARY;
-                               s.Temporary[i].Index = i + temp_reg_offset;
+       /* Add input registers to the interference graph */
+       for (i = 0, input_node = 0; i< s->NumInputs; i++) {
+               if (!s->Input[i].Writemask) {
+                       continue;
+               }
+               ra_set_node_class(graph, node_count + input_node,
+                                                       input_classes[i]);
+               for (var_ptr = variables, node_index = 0;
+                               var_ptr; var_ptr = var_ptr->Next, node_index++) {
+                       struct rc_variable * var = var_ptr->Item;
+                       if (overlap_live_intervals_array(s->Input[i].Live,
+                                                               var->Live)) {
+                               ra_add_node_interference(graph, node_index,
+                                               node_count + input_node);
                        }
                }
+               /* Manually allocate a register for this input */
+               ra_set_node_reg(graph, node_count + input_node, get_reg_id(
+                               s->Input[i].Index, s->Input[i].Writemask));
+               input_node++;
+       }
+
+       if (!ra_allocate_no_spills(graph)) {
+               rc_error(s->C, "Ran out of hardware temporaries\n");
+               return;
+       }
+
+       /* Rewrite the registers */
+       for (var_ptr = variables, node_index = 0; var_ptr;
+                               var_ptr = var_ptr->Next, node_index++) {
+               int reg = ra_get_node_reg(graph, node_index);
+               unsigned int writemask = reg_get_writemask(reg);
+               unsigned int index = reg_get_index(reg);
+               struct rc_variable * var = var_ptr->Item;
+
+               if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
+                       writemask = rc_variable_writemask_sum(var);
+               }
+
+               if (var->Dst.File == RC_FILE_INPUT) {
+                       continue;
+               }
+               rc_variable_change_dst(var, index, writemask);
+       }
+
+       ralloc_free(graph);
+       ralloc_free(regs);
+}
+
+/**
+ * @param user This parameter should be a pointer to an integer value.  If this
+ * integer value is zero, then a simple register allocator will be used that
+ * only allocates space for input registers (\sa do_regalloc_inputs_only).  If
+ * user is non-zero, then the regular register allocator will be used
+ * (\sa do_regalloc).
+  */
+void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
+{
+       struct r300_fragment_program_compiler *c =
+                               (struct r300_fragment_program_compiler*)cc;
+       struct regalloc_state s;
+       int do_full_regalloc = (int)user;
+       struct rc_instruction * inst;
+
+       memset(&s, 0, sizeof(s));
+       s.C = cc;
+       s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
+       s.Input = memory_pool_malloc(&cc->Pool,
+                       s.NumInputs * sizeof(struct register_info));
+       memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
+
+       s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
+       s.Temporary = memory_pool_malloc(&cc->Pool,
+                       s.NumTemporaries * sizeof(struct register_info));
+       memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
+
+       for(inst = cc->Program.Instructions.Next;
+           inst != &cc->Program.Instructions;
+           inst = inst->Next) {
 
-               /* Rewrite all registers. */
-               for (struct rc_instruction *inst = cc->Program.Instructions.Next;
-                   inst != &cc->Program.Instructions;
-                   inst = inst->Next) {
-                       rc_remap_registers(inst, &remap_register, &s);
+               if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+                       s.HasLoop = 1;
+                       break;
                }
        }
+
+       rc_recompute_ips(s.C);
+
+       c->AllocateHwInputs(c, &alloc_input_simple, &s);
+       if (!s.HasLoop && do_full_regalloc) {
+               do_advanced_regalloc(&s);
+       } else {
+               s.Simple = 1;
+               do_regalloc_inputs_only(&s);
+       }
+
+       /* Rewrite inputs and if we are doing the simple allocation, rewrite
+        * temporaries too. */
+       for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
+                                       inst != &s.C->Program.Instructions;
+                                       inst = inst->Next) {
+               rc_remap_registers(inst, &remap_register, &s);
+       }
 }
index a07f6b63c6ee11dd4e1f845a2ac04ba646fea5b0..b899eccbf53a0852eff374ed6d544e44099bae57 100644 (file)
@@ -108,6 +108,9 @@ struct rc_sub_instruction {
 
        /** True if tex instruction should do shadow comparison */
        unsigned int TexShadow:1;
+
+       /**R500 Only.  How to swizzle the result of a TEX lookup*/
+       unsigned int TexSwizzle:12;
        /*@}*/
 
        /** This holds information about the presubtract operation used by
index 45f79ece5bad4f8f9bc98c04859e7d37fe408eb5..24577333450f5db0633abf244ac5bc1c0c4ca515 100644 (file)
@@ -129,6 +129,7 @@ typedef enum {
 #define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO)
 #define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE)
 #define RC_SWIZZLE_HHHH RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF)
+#define RC_SWIZZLE_UUUU RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED)
 
 /**
  * \name Bitmasks for components of vectors.
index 68874795b8ac1a94ab9fb64a11e88366731d9089..52315957520fac7b9740bdc8de937a13024fb3aa 100644 (file)
@@ -223,3 +223,17 @@ struct rc_pair_instruction_source * rc_pair_get_src(
                return NULL;
        }
 }
+
+int rc_pair_get_src_index(
+       struct rc_pair_instruction * pair_inst,
+       struct rc_pair_instruction_source * src)
+{
+       int i;
+       for (i = 0; i < 3; i++) {
+               if (&pair_inst->RGB.Src[i] == src
+                       || &pair_inst->Alpha.Src[i] == src) {
+                       return i;
+               }
+       }
+       return -1;
+}
index 82d7815aa78ff1fe6523eaf030467e13bec79f4a..a957ea9f7a0b81b34577aaccc20928a2bb06a8c0 100644 (file)
@@ -114,6 +114,10 @@ void rc_pair_foreach_source_that_rgb_reads(
 struct rc_pair_instruction_source * rc_pair_get_src(
        struct rc_pair_instruction * pair_inst,
        struct rc_pair_instruction_arg * arg);
+
+int rc_pair_get_src_index(
+       struct rc_pair_instruction * pair_inst,
+       struct rc_pair_instruction_source * src);
 /*@}*/
 
 
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_variable.c b/src/mesa/drivers/dri/r300/compiler/radeon_variable.c
new file mode 100644 (file)
index 0000000..91a4d45
--- /dev/null
@@ -0,0 +1,434 @@
+/*
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_variable.h"
+
+#include "memory_pool.h"
+#include "radeon_compiler_util.h"
+#include "radeon_dataflow.h"
+#include "radeon_list.h"
+#include "radeon_opcodes.h"
+#include "radeon_program.h"
+
+/**
+ * Rewrite the index and writemask for the destination register of var
+ * and its friends to new_index and new_writemask.  This function also takes
+ * care of rewriting the swizzles for the sources of var.
+ */
+void rc_variable_change_dst(
+       struct rc_variable * var,
+       unsigned int new_index,
+       unsigned int new_writemask)
+{
+       unsigned int new_idx, old_idx;
+       unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
+       struct rc_variable * var_ptr;
+       struct rc_list * readers;
+       unsigned int old_mask = rc_variable_writemask_sum(var);
+
+       new_idx = 0;
+       for (old_idx = 0; old_idx < 4; old_idx++) {
+               if (!GET_BIT(old_mask, old_idx))
+                       continue;
+               for ( ; new_idx < 4; new_idx++) {
+                       if (GET_BIT(new_writemask, new_idx)) {
+                               SET_SWZ(conversion_swizzle, old_idx, new_idx);
+                               new_idx++;
+                               break;
+                       }
+               }
+       }
+
+       for (var_ptr = var; var_ptr; var_ptr = var_ptr->Friend) {
+               if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
+                       rc_normal_rewrite_writemask(var_ptr->Inst,
+                                                       conversion_swizzle);
+                       var_ptr->Inst->U.I.DstReg.Index = new_index;
+               } else {
+                       struct rc_pair_sub_instruction * sub;
+                       if (var_ptr->Dst.WriteMask == RC_MASK_W) {
+                               assert(new_writemask & RC_MASK_W);
+                               sub = &var_ptr->Inst->U.P.Alpha;
+                       } else {
+                               sub = &var_ptr->Inst->U.P.RGB;
+                               rc_pair_rewrite_writemask(sub,
+                                                       conversion_swizzle);
+                       }
+                       sub->DestIndex = new_index;
+               }
+       }
+
+       readers = rc_variable_readers_union(var);
+
+       for ( ; readers; readers = readers->Next) {
+               struct rc_reader * reader = readers->Item;
+               if (reader->Inst->Type == RC_INSTRUCTION_NORMAL) {
+                       reader->U.I.Src->Index = new_index;
+                       reader->U.I.Src->Swizzle = rc_rewrite_swizzle(
+                               reader->U.I.Src->Swizzle, conversion_swizzle);
+               } else {
+                       struct rc_pair_instruction * pair_inst =
+                                                       &reader->Inst->U.P;
+                       unsigned int src_type = rc_source_type_swz(
+                                                       reader->U.P.Arg->Swizzle);
+
+                       int src_index = reader->U.P.Arg->Source;
+                       if (src_index == RC_PAIR_PRESUB_SRC) {
+                               src_index = rc_pair_get_src_index(
+                                               pair_inst, reader->U.P.Src);
+                       }
+                       /* Try to delete the old src, it is OK if this fails,
+                        * because rc_pair_alloc_source might be able to
+                        * find a source the ca be reused.
+                        */
+                       if (rc_pair_remove_src(reader->Inst, src_type,
+                                                       src_index, old_mask)) {
+                               /* Reuse the source index of the source that
+                                * was just deleted and set its register
+                                * index.  We can't use rc_pair_alloc_source
+                                * for this becuase it might return a source
+                                * index that is already being used. */
+                               if (src_type & RC_SOURCE_RGB) {
+                                       pair_inst->RGB.Src[src_index]
+                                               .Used = 1;
+                                       pair_inst->RGB.Src[src_index]
+                                               .Index = new_index;
+                                       pair_inst->RGB.Src[src_index]
+                                               .File = RC_FILE_TEMPORARY;
+                               }
+                               if (src_type & RC_SOURCE_ALPHA) {
+                                       pair_inst->Alpha.Src[src_index]
+                                               .Used = 1;
+                                       pair_inst->Alpha.Src[src_index]
+                                               .Index = new_index;
+                                       pair_inst->Alpha.Src[src_index]
+                                               .File = RC_FILE_TEMPORARY;
+                               }
+                       } else {
+                               src_index = rc_pair_alloc_source(
+                                               &reader->Inst->U.P,
+                                               src_type & RC_SOURCE_RGB,
+                                               src_type & RC_SOURCE_ALPHA,
+                                               RC_FILE_TEMPORARY,
+                                               new_index);
+                               if (src_index < 0) {
+                                       rc_error(var->C, "Rewrite of inst %u failed "
+                                               "Can't allocate source for "
+                                               "Inst %u src_type=%x "
+                                               "new_index=%u new_mask=%u\n",
+                                               var->Inst->IP, reader->Inst->IP, src_type, new_index, new_writemask);
+                                               continue;
+                               }
+                       }
+                       reader->U.P.Arg->Swizzle = rc_rewrite_swizzle(
+                               reader->U.P.Arg->Swizzle, conversion_swizzle);
+                       if (reader->U.P.Arg->Source != RC_PAIR_PRESUB_SRC) {
+                               reader->U.P.Arg->Source = src_index;
+                       }
+               }
+       }
+}
+
+/**
+ * Compute the live intervals for var and its friends.
+ */
+void rc_variable_compute_live_intervals(struct rc_variable * var)
+{
+       while(var) {
+               unsigned int i;
+               unsigned int start = var->Inst->IP;
+
+               for (i = 0; i < var->ReaderCount; i++) {
+                       unsigned int chan;
+                       unsigned int mask = var->Readers[i].WriteMask;
+                       for (chan = 0; chan < 4; chan++) {
+                               if ((mask >> chan) & 0x1) {
+                                       var->Live[chan].Start = start;
+                                       var->Live[chan].End =
+                                               var->Readers[i].Inst->IP;
+                                       var->Live[chan].Used = 1;
+                               }
+                       }
+               }
+               var = var->Friend;
+       }
+}
+
+/**
+ * @return 1 if a and b share a reader
+ * @return 0 if they do not
+ */
+static unsigned int readers_intersect(
+       struct rc_variable * a,
+       struct rc_variable * b)
+{
+       unsigned int a_index, b_index;
+       for (a_index = 0; a_index < a->ReaderCount; a_index++) {
+               struct rc_reader reader_a = a->Readers[a_index];
+               for (b_index = 0; b_index < b->ReaderCount; b_index++) {
+                       struct rc_reader reader_b = b->Readers[b_index];
+                       if (reader_a.Inst->Type == RC_INSTRUCTION_NORMAL
+                               && reader_b.Inst->Type == RC_INSTRUCTION_NORMAL
+                               && reader_a.U.I.Src == reader_b.U.I.Src) {
+
+                               return 1;
+                       }
+
+                       if (reader_a.Inst->Type == RC_INSTRUCTION_PAIR
+                               && reader_b.Inst->Type == RC_INSTRUCTION_PAIR
+                               && reader_a.U.P.Arg == reader_b.U.P.Arg) {
+
+                               return 1;
+                       }
+               }
+       }
+       return 0;
+}
+
+void rc_variable_add_friend(
+       struct rc_variable * var,
+       struct rc_variable * friend)
+{
+       while(var->Friend) {
+               var = var->Friend;
+       }
+       var->Friend = friend;
+}
+
+struct rc_variable * rc_variable(
+       struct radeon_compiler * c,
+       unsigned int DstFile,
+       unsigned int DstIndex,
+       unsigned int DstWriteMask,
+       struct rc_reader_data * reader_data)
+{
+       struct rc_variable * new =
+                       memory_pool_malloc(&c->Pool, sizeof(struct rc_variable));
+       memset(new, 0, sizeof(struct rc_variable));
+       new->C = c;
+       new->Dst.File = DstFile;
+       new->Dst.Index = DstIndex;
+       new->Dst.WriteMask = DstWriteMask;
+       if (reader_data) {
+               new->Inst = reader_data->Writer;
+               new->ReaderCount = reader_data->ReaderCount;
+               new->Readers = reader_data->Readers;
+       }
+       return new;
+}
+
+static void get_variable_helper(
+       struct rc_list ** aborted_list,
+       struct rc_list ** variable_list,
+       unsigned int aborted,
+       struct rc_variable * variable)
+{
+       if (aborted) {
+               rc_list_add(aborted_list, rc_list(&variable->C->Pool, variable));
+       } else {
+               rc_list_add(variable_list, rc_list(&variable->C->Pool, variable));
+       }
+}
+
+static void get_variable_pair_helper(
+       struct rc_list ** aborted_list,
+       struct rc_list ** variable_list,
+       struct radeon_compiler * c,
+       struct rc_instruction * inst,
+       struct rc_pair_sub_instruction * sub_inst)
+{
+       struct rc_reader_data reader_data;
+       struct rc_variable * new_var;
+       rc_register_file file;
+       unsigned int writemask;
+
+       if (sub_inst->Opcode == RC_OPCODE_NOP) {
+               return;
+       }
+       memset(&reader_data, 0, sizeof(struct rc_reader_data));
+       rc_get_readers_sub(c, inst, sub_inst, &reader_data, NULL, NULL, NULL);
+
+       if (reader_data.ReaderCount == 0) {
+               return;
+       }
+
+       if (sub_inst->WriteMask) {
+               file = RC_FILE_TEMPORARY;
+               writemask = sub_inst->WriteMask;
+       } else if (sub_inst->OutputWriteMask) {
+               file = RC_FILE_OUTPUT;
+               writemask = sub_inst->OutputWriteMask;
+       } else {
+               writemask = 0;
+               file = RC_FILE_NONE;
+       }
+       new_var = rc_variable(c, file, sub_inst->DestIndex, writemask,
+                                                               &reader_data);
+       get_variable_helper(aborted_list, variable_list, reader_data.Abort,
+                                                               new_var);
+}
+
+/**
+ * Generate a list of variables used by the shader program.  Each instruction
+ * that writes to a register is considered a variable.  The struct rc_variable
+ * data structure includes a list of readers and is essentially a
+ * definition-use chain.  Any two variables that share a reader are considered
+ * "friends" and they are linked together via the Friend attribute.
+ */
+struct rc_list * rc_get_variables(struct radeon_compiler * c)
+{
+       struct rc_instruction * inst;
+       struct rc_list * aborted_list = NULL;
+       struct rc_list * variable_list = NULL;
+       struct rc_list * var_ptr;
+       struct rc_list * search_ptr;
+
+       for (inst = c->Program.Instructions.Next;
+                                       inst != &c->Program.Instructions;
+                                       inst = inst->Next) {
+               struct rc_reader_data reader_data;
+               struct rc_variable * new_var;
+               memset(&reader_data, 0, sizeof(reader_data));
+
+               if (inst->Type == RC_INSTRUCTION_NORMAL) {
+                       rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL);
+                       if (reader_data.ReaderCount == 0) {
+                               continue;
+                       }
+                       new_var = rc_variable(c, inst->U.I.DstReg.File,
+                               inst->U.I.DstReg.Index,
+                               inst->U.I.DstReg.WriteMask, &reader_data);
+                       get_variable_helper(&aborted_list, &variable_list,
+                                               reader_data.Abort, new_var);
+               } else {
+                       get_variable_pair_helper(&aborted_list, &variable_list,
+                                       c, inst, &inst->U.P.RGB);
+                       get_variable_pair_helper(&aborted_list, &variable_list,
+                                       c, inst, &inst->U.P.Alpha);
+               }
+       }
+
+       /* The aborted_list contains a list of variables that might share a
+        * reader with another variable.  We need to search through this list
+        * and pair together variables that do share the same reader.
+        */
+       while (aborted_list) {
+               struct rc_list * search_ptr_next;
+               var_ptr = aborted_list;
+
+               search_ptr = var_ptr->Next;
+               while(search_ptr) {
+                       search_ptr_next = search_ptr->Next;
+                       if (readers_intersect(var_ptr->Item, search_ptr->Item)){
+                               rc_list_remove(&aborted_list, search_ptr);
+                               rc_variable_add_friend(var_ptr->Item,
+                                                       search_ptr->Item);
+                       }
+                       search_ptr = search_ptr_next;
+               }
+               rc_list_remove(&aborted_list, var_ptr);
+               rc_list_add(&variable_list, rc_list(
+                       &((struct rc_variable*)(var_ptr->Item))->C->Pool,
+                       var_ptr->Item));
+       }
+       return variable_list;
+}
+
+/**
+ * @return The bitwise or of the writemasks of a variable and all of its
+ * friends.
+ */
+unsigned int rc_variable_writemask_sum(struct rc_variable * var)
+{
+       unsigned int writemask = 0;
+       while(var) {
+               writemask |= var->Dst.WriteMask;
+               var = var->Friend;
+       }
+       return writemask;
+}
+
+/*
+ * @return A list of readers for a variable and its friends.  Readers
+ * that read from two different variable friends are only included once in
+ * this list.
+ */
+struct rc_list * rc_variable_readers_union(struct rc_variable * var)
+{
+       struct rc_list * list = NULL;
+       while (var) {
+               unsigned int i;
+               for (i = 0; i < var->ReaderCount; i++) {
+                       struct rc_list * temp;
+                       struct rc_reader * a = &var->Readers[i];
+                       unsigned int match = 0;
+                       for (temp = list; temp; temp = temp->Next) {
+                               struct rc_reader * b = temp->Item;
+                               if (a->Inst->Type != b->Inst->Type) {
+                                       continue;
+                               }
+                               if (a->Inst->Type == RC_INSTRUCTION_NORMAL) {
+                                       if (a->U.I.Src == b->U.I.Src) {
+                                               match = 1;
+                                               break;
+                                       }
+                               }
+                               if (a->Inst->Type == RC_INSTRUCTION_PAIR) {
+                                       if (a->U.P.Arg == b->U.P.Arg
+                                           && a->U.P.Src == b->U.P.Src) {
+                                               match = 1;
+                                               break;
+                                       }
+                               }
+                       }
+                       if (match) {
+                               continue;
+                       }
+                       rc_list_add(&list, rc_list(&var->C->Pool, a));
+               }
+               var = var->Friend;
+       }
+       return list;
+}
+
+void rc_variable_print(struct rc_variable * var)
+{
+       unsigned int i;
+       while (var) {
+               fprintf(stderr, "%u: TEMP[%u].%u: ",
+                       var->Inst->IP, var->Dst.Index, var->Dst.WriteMask);
+               for (i = 0; i < 4; i++) {
+                       fprintf(stderr, "chan %u: start=%u end=%u ", i,
+                                       var->Live[i].Start, var->Live[i].End);
+               }
+               fprintf(stderr, "%u readers\n", var->ReaderCount);
+               if (var->Friend) {
+                       fprintf(stderr, "Friend: \n\t");
+               }
+               var = var->Friend;
+       }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_variable.h b/src/mesa/drivers/dri/r300/compiler/radeon_variable.h
new file mode 100644 (file)
index 0000000..b8fbcaa
--- /dev/null
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_VARIABLE_H
+#define RADEON_VARIABLE_H
+
+#include "radeon_compiler.h"
+
+struct radeon_compiler;
+struct rc_list;
+struct rc_reader_data;
+struct rc_readers;
+
+struct live_intervals {
+       int Start;
+       int End;
+       int Used;
+};
+
+struct rc_variable {
+       struct radeon_compiler * C;
+       struct rc_dst_register Dst;
+
+       struct rc_instruction * Inst;
+       unsigned int ReaderCount;
+       struct rc_reader * Readers;
+       struct live_intervals Live[4];
+
+       /* A friend is a variable that shares a reader with another variable.
+        */
+       struct rc_variable * Friend;
+};
+
+void rc_variable_change_dst(
+       struct rc_variable * var,
+       unsigned int new_index,
+       unsigned int new_writemask);
+
+void rc_variable_compute_live_intervals(struct rc_variable * var);
+
+void rc_variable_add_friend(
+       struct rc_variable * var,
+       struct rc_variable * friend);
+
+struct rc_variable * rc_variable(
+       struct radeon_compiler * c,
+       unsigned int DstFile,
+       unsigned int DstIndex,
+       unsigned int DstWriteMask,
+       struct rc_reader_data * reader_data);
+
+struct rc_list * rc_get_variables(struct radeon_compiler * c);
+
+unsigned int rc_variable_writemask_sum(struct rc_variable * var);
+
+struct rc_list * rc_variable_readers_union(struct rc_variable * var);
+
+void rc_variable_print(struct rc_variable * var);
+
+#endif /* RADEON_VARIABLE_H */