r300/compiler: Use presubtract operations as much as possible
authorTom Stellard <tstellar@gmail.com>
Tue, 9 Nov 2010 02:49:44 +0000 (18:49 -0800)
committerTom Stellard <tstellar@gmail.com>
Mon, 22 Nov 2010 02:48:31 +0000 (18:48 -0800)
Previously, presubtract operations where only being used by instructions
with less than three source source registers.

src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h

index 97f4c75849268a137f63faa42a6ba2a6bf1e2218..2b8d284ce9fda110ef65df6bfcb6e044b5b5f094 100644 (file)
@@ -31,6 +31,8 @@
 
 #include "radeon_compiler_util.h"
 
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
 /**
  */
 unsigned int rc_swizzle_to_writemask(unsigned int swz)
@@ -59,3 +61,123 @@ unsigned int rc_src_reads_dst_mask(
        }
        return dst_mask & rc_swizzle_to_writemask(src_swz);
 }
+
+unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels)
+{
+       unsigned int chan;
+       unsigned int swz = RC_SWIZZLE_UNUSED;
+       unsigned int ret = RC_SOURCE_NONE;
+
+       for(chan = 0; chan < channels; chan++) {
+               swz = GET_SWZ(swizzle, chan);
+               if (swz == RC_SWIZZLE_W) {
+                       ret |= RC_SOURCE_ALPHA;
+               } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
+                                               || swz == RC_SWIZZLE_Z) {
+                       ret |= RC_SOURCE_RGB;
+               }
+       }
+       return ret;
+}
+
+unsigned int rc_source_type_mask(unsigned int mask)
+{
+       unsigned int ret = RC_SOURCE_NONE;
+
+       if (mask & RC_MASK_XYZ)
+               ret |= RC_SOURCE_RGB;
+
+       if (mask & RC_MASK_W)
+               ret |= RC_SOURCE_ALPHA;
+
+       return ret;
+}
+
+struct can_use_presub_data {
+       struct rc_src_register RemoveSrcs[3];
+       unsigned int RGBCount;
+       unsigned int AlphaCount;
+};
+
+static void can_use_presub_read_cb(
+       void * userdata,
+       struct rc_instruction * inst,
+       rc_register_file file,
+       unsigned int index,
+       unsigned int mask)
+{
+       struct can_use_presub_data * d = userdata;
+       unsigned int src_type = rc_source_type_mask(mask);
+       unsigned int i;
+
+       if (file == RC_FILE_NONE)
+               return;
+
+       for(i = 0; i < 3; i++) {
+               if (d->RemoveSrcs[i].File == file
+                   && d->RemoveSrcs[i].Index == index) {
+                       src_type &=
+                               ~rc_source_type_swz(d->RemoveSrcs[i].Swizzle, 4);
+               }
+       }
+
+       if (src_type & RC_SOURCE_RGB)
+               d->RGBCount++;
+
+       if (src_type & RC_SOURCE_ALPHA)
+               d->AlphaCount++;
+}
+
+unsigned int rc_inst_can_use_presub(
+       struct rc_instruction * inst,
+       rc_presubtract_op presub_op,
+       unsigned int presub_writemask,
+       struct rc_src_register replace_reg,
+       struct rc_src_register presub_src0,
+       struct rc_src_register presub_src1)
+{
+       struct can_use_presub_data d;
+       unsigned int num_presub_srcs;
+       unsigned int presub_src_type = rc_source_type_mask(presub_writemask);
+       const struct rc_opcode_info * info =
+                                       rc_get_opcode_info(inst->U.I.Opcode);
+
+       if (presub_op == RC_PRESUB_NONE) {
+               return 1;
+       }
+
+       if (info->HasTexture) {
+               return 0;
+       }
+
+       /* We can't use more than one presubtract value in an
+        * instruction, unless the two prsubtract operations
+        * are the same and read from the same registers.
+        * XXX For now we will limit instructions to only one presubtract
+        * value.*/
+       if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
+               return 0;
+       }
+
+       memset(&d, 0, sizeof(d));
+       d.RemoveSrcs[0] = replace_reg;
+       d.RemoveSrcs[1] = presub_src0;
+       d.RemoveSrcs[2] = presub_src1;
+
+       rc_for_all_reads_mask(inst, can_use_presub_read_cb, &d);
+
+       num_presub_srcs = rc_presubtract_src_reg_count(presub_op);
+
+       if ((presub_src_type & RC_SOURCE_RGB)
+                                       && d.RGBCount + num_presub_srcs > 3) {
+               return 0;
+       }
+
+       if ((presub_src_type & RC_SOURCE_ALPHA)
+                                       && d.AlphaCount + num_presub_srcs > 3) {
+               return 0;
+       }
+
+       return 1;
+}
+
index 1a14e7cb0efcd72956bdf0225b71558af00e70bc..e50dfbd4fb9949feb45fe59a7cd6ddd167bdab0c 100644 (file)
@@ -3,6 +3,9 @@
 #ifndef RADEON_PROGRAM_UTIL_H
 #define RADEON_PROGRAM_UTIL_H
 
+struct rc_instruction;
+struct rc_src_register;
+
 unsigned int rc_swizzle_to_writemask(unsigned int swz);
 
 unsigned int rc_src_reads_dst_mask(
@@ -13,4 +16,16 @@ unsigned int rc_src_reads_dst_mask(
                unsigned int dst_idx,
                unsigned int dst_mask);
 
+unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels);
+
+unsigned int rc_source_type_mask(unsigned int mask);
+
+unsigned int rc_inst_can_use_presub(
+       struct rc_instruction * inst,
+       rc_presubtract_op presub_op,
+       unsigned int presub_writemask,
+       struct rc_src_register replace_reg,
+       struct rc_src_register presub_src0,
+       struct rc_src_register presub_src1);
+
 #endif /* RADEON_PROGRAM_UTIL_H */
index 600be8b8863b67fb81229fc621d6e027398ec1ab..9df07edf2b1a704818a4b31fa94a10626116751a 100644 (file)
@@ -139,9 +139,17 @@ static void pair_sub_for_all_args(
        const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
 
        for(i = 0; i < info->NumSrcRegs; i++) {
-               unsigned int src_type = rc_source_type_that_arg_reads(
-                               sub->Arg[i].Source, sub->Arg[i].Swizzle);
-               if (src_type == RC_PAIR_SOURCE_NONE)
+               unsigned int src_type;
+               unsigned int channels = 0;
+               if (&fullinst->U.P.RGB == sub)
+                       channels = 3;
+               else if (&fullinst->U.P.Alpha == sub)
+                       channels = 1;
+
+               assert(channels > 0);
+               src_type = rc_source_type_swz(sub->Arg[i].Swizzle, channels);
+
+               if (src_type == RC_SOURCE_NONE)
                        continue;
 
                if (sub->Arg[i].Source == RC_PAIR_PRESUB_SRC) {
@@ -149,7 +157,7 @@ static void pair_sub_for_all_args(
                        unsigned int presub_src_count;
                        struct rc_pair_instruction_source * src_array;
                        unsigned int j;
-                       if (src_type & RC_PAIR_SOURCE_RGB) {
+                       if (src_type & RC_SOURCE_RGB) {
                                presub_type = fullinst->
                                        U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index;
                                src_array = fullinst->U.P.RGB.Src;
index 2eb548474fe07c327a75005f84c347e254e13fe0..27b10ffbd615a3c196b1cb38a07ebce136dca6e7 100644 (file)
@@ -71,12 +71,13 @@ static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
 {
        rc_register_file file = src->File;
        struct rc_reader_data * reader_data = data;
-       const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
 
-       /* It is possible to do copy propigation in this situation,
-        * just not right now, see peephole_add_presub_inv() */
-       if (reader_data->Writer->U.I.PreSub.Opcode != RC_PRESUB_NONE &&
-                       (info->NumSrcRegs > 2 || info->HasTexture)) {
+       if(!rc_inst_can_use_presub(inst,
+                               reader_data->Writer->U.I.PreSub.Opcode,
+                               rc_swizzle_to_writemask(src->Swizzle),
+                               *src,
+                               reader_data->Writer->U.I.PreSub.SrcReg[0],
+                               reader_data->Writer->U.I.PreSub.SrcReg[1])) {
                reader_data->Abort = 1;
                return;
        }
@@ -424,24 +425,13 @@ static void presub_scan_read(
        struct rc_src_register * src)
 {
        struct rc_reader_data * reader_data = data;
-       const struct rc_opcode_info * info =
-                                       rc_get_opcode_info(inst->U.I.Opcode);
-       /* XXX: There are some situations where instructions
-        * with more than 2 src registers can use the
-        * presubtract select, but to keep things simple we
-        * will disable presubtract on these instructions for
-        * now. */
-       if (info->NumSrcRegs > 2 || info->HasTexture) {
-               reader_data->Abort = 1;
-               return;
-       }
+       rc_presubtract_op * presub_opcode = reader_data->CbData;
 
-       /* We can't use more than one presubtract value in an
-        * instruction, unless the two prsubtract operations
-        * are the same and read from the same registers.
-        * XXX For now we will limit instructions to only one presubtract
-        * value.*/
-       if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
+       if (!rc_inst_can_use_presub(inst, *presub_opcode,
+                       reader_data->Writer->U.I.DstReg.WriteMask,
+                       *src,
+                       reader_data->Writer->U.I.SrcReg[0],
+                       reader_data->Writer->U.I.SrcReg[1])) {
                reader_data->Abort = 1;
                return;
        }
@@ -455,7 +445,9 @@ static int presub_helper(
 {
        struct rc_reader_data reader_data;
        unsigned int i;
+       rc_presubtract_op cb_op = presub_opcode;
 
+       reader_data.CbData = &cb_op;
        rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
                                                is_src_clobbered_scan_write);
 
index cce253f11ebc8b5a75512e9c54928592d8f509ee..cbb5ef6237e98962534fe1b33847c2f20e0e5f2e 100644 (file)
@@ -30,6 +30,7 @@
 #include <stdio.h>
 
 #include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
 #include "radeon_dataflow.h"
 
 
@@ -301,12 +302,12 @@ static int merge_presub_sources(
        assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
 
        switch(type) {
-       case RC_PAIR_SOURCE_RGB:
+       case RC_SOURCE_RGB:
                is_rgb = 1;
                is_alpha = 0;
                dst_sub = &dst_full->RGB;
                break;
-       case RC_PAIR_SOURCE_ALPHA:
+       case RC_SOURCE_ALPHA:
                is_rgb = 0;
                is_alpha = 1;
                dst_sub = &dst_full->Alpha;
@@ -347,6 +348,8 @@ static int merge_presub_sources(
                                continue;
                        free_source = rc_pair_alloc_source(dst_full, is_rgb,
                                        is_alpha, temp.File, temp.Index);
+                       if (free_source < 0)
+                               return 0;
                        one_way = 1;
                } else {
                        dst_sub->Src[free_source] = temp;
@@ -362,11 +365,11 @@ static int merge_presub_sources(
                for(arg = 0; arg < info->NumSrcRegs; arg++) {
                        /*If this arg does not read from an rgb source,
                         * do nothing. */
-                       if (!(rc_source_type_that_arg_reads(
-                               dst_full->RGB.Arg[arg].Source,
-                               dst_full->RGB.Arg[arg].Swizzle) & type)) {
+                       if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle,
+                                                               3) & type)) {
                                continue;
                        }
+
                        if (dst_full->RGB.Arg[arg].Source == srcp_src)
                                dst_full->RGB.Arg[arg].Source = free_source;
                        /* We need to do this just in case register
@@ -398,13 +401,13 @@ static int destructive_merge_instructions(
 
        /* Merge the rgb presubtract registers. */
        if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
-               if (!merge_presub_sources(rgb, alpha->RGB, RC_PAIR_SOURCE_RGB)) {
+               if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
                        return 0;
                }
        }
        /* Merge the alpha presubtract registers */
        if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
-               if(!merge_presub_sources(rgb,  alpha->Alpha, RC_PAIR_SOURCE_ALPHA)){
+               if(!merge_presub_sources(rgb,  alpha->Alpha, RC_SOURCE_ALPHA)){
                        return 0;
                }
        }
index 9dcd44c522dbc24a72ded4df6bddd5f4caf917de..45f79ece5bad4f8f9bc98c04859e7d37fe408eb5 100644 (file)
@@ -181,4 +181,9 @@ static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){
                return 0;
        }
 }
+
+#define RC_SOURCE_NONE  0x0
+#define RC_SOURCE_RGB   0x1
+#define RC_SOURCE_ALPHA 0x2
+
 #endif /* RADEON_PROGRAM_CONSTANTS_H */
index 582d73b61f4ebf6f1bbc807df6865cc82b6f913d..5905d26e521b087550dd1b8fe32d01037a17ffc4 100644 (file)
@@ -27,6 +27,8 @@
 
 #include "radeon_program_pair.h"
 
+#include "radeon_compiler_util.h"
+
 #include <stdlib.h>
 
 /**
@@ -205,37 +207,35 @@ void rc_pair_foreach_source_that_rgb_reads(
        }
 }
 
-/*return 0 for rgb, 1 for alpha -1 for error. */
-
-unsigned int rc_source_type_that_arg_reads(
-       unsigned int source,
-       unsigned int swizzle)
-{
-       unsigned int chan;
-       unsigned int swz = RC_SWIZZLE_UNUSED;
-       unsigned int ret = RC_PAIR_SOURCE_NONE;
-
-       for(chan = 0; chan < 3; chan++) {
-               swz = GET_SWZ(swizzle, chan);
-               if (swz == RC_SWIZZLE_W) {
-                       ret |= RC_PAIR_SOURCE_ALPHA;
-               } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
-                                               || swz == RC_SWIZZLE_Z) {
-                       ret |= RC_PAIR_SOURCE_RGB;
-               }
-       }
-       return ret;
-}
-
 struct rc_pair_instruction_source * rc_pair_get_src(
        struct rc_pair_instruction * pair_inst,
        struct rc_pair_instruction_arg * arg)
 {
-       unsigned int type = rc_source_type_that_arg_reads(arg->Source,
-                                                               arg->Swizzle);
-       if (type & RC_PAIR_SOURCE_RGB) {
+       unsigned int i, type;
+       unsigned int channels = 0;
+
+       for(i = 0; i < 3; i++) {
+               if (arg == pair_inst->RGB.Arg + i) {
+                       channels = 3;
+                       break;
+               }
+       }
+
+       if (channels == 0) {
+               for (i = 0; i < 3; i++) {
+                       if (arg == pair_inst->Alpha.Arg + i) {
+                               channels = 1;
+                               break;
+                       }
+               }
+       }
+
+       assert(channels > 0);
+       type = rc_source_type_swz(arg->Swizzle, channels);
+
+       if (type & RC_SOURCE_RGB) {
                return &pair_inst->RGB.Src[arg->Source];
-       } else if (type & RC_PAIR_SOURCE_ALPHA) {
+       } else if (type & RC_SOURCE_ALPHA) {
                return &pair_inst->Alpha.Src[arg->Source];
        } else {
                return NULL;
index 54ca56762b18adc096e68ba6b548cea9134e6803..ccf7a0070cdae07ebd7e6f845216e5c257c9a49f 100644 (file)
@@ -55,10 +55,6 @@ struct radeon_compiler;
  */
 #define RC_PAIR_PRESUB_SRC 3
 
-#define RC_PAIR_SOURCE_NONE  0x0
-#define RC_PAIR_SOURCE_RGB   0x1
-#define RC_PAIR_SOURCE_ALPHA 0x2
-
 struct rc_pair_instruction_source {
        unsigned int Used:1;
        unsigned int File:3;
@@ -115,10 +111,6 @@ void rc_pair_foreach_source_that_rgb_reads(
        void * data,
        rc_pair_foreach_src_fn cb);
 
-unsigned int rc_source_type_that_arg_reads(
-       unsigned int source,
-       unsigned int swizzle);
-
 struct rc_pair_instruction_source * rc_pair_get_src(
        struct rc_pair_instruction * pair_inst,
        struct rc_pair_instruction_arg * arg);