r300/compiler: Limit instructions to 3 source selects
authorTom Stellard <tstellar@gmail.com>
Sun, 24 Apr 2011 04:27:34 +0000 (21:27 -0700)
committerTom Stellard <tstellar@gmail.com>
Wed, 11 May 2011 23:16:29 +0000 (16:16 -0700)
Some presubtract conversions were generating more than 3 source
selects.

https://bugs.freedesktop.org/show_bug.cgi?id=36527

Note: This is a candidate for the 7.10 branch.

src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
src/mesa/drivers/dri/r300/compiler/tests/radeon_compiler_util_tests.c

index b077e7b7d657bdc273cde302445233fdb8903107..c8001ba8c1863e9e71cc654d0707bac2982c022b 100644 (file)
@@ -359,53 +359,71 @@ unsigned int rc_source_type_mask(unsigned int mask)
        return ret;
 }
 
+struct src_select {
+       rc_register_file File;
+       int Index;
+       unsigned int SrcType;
+};
+
 struct can_use_presub_data {
-       struct rc_src_register RemoveSrcs[3];
-       unsigned int RGBCount;
-       unsigned int AlphaCount;
+       struct src_select Selects[5];
+       unsigned int SelectCount;
+       const struct rc_src_register * ReplaceReg;
+       unsigned int ReplaceRemoved;
 };
 
+static void can_use_presub_data_add_select(
+       struct can_use_presub_data * data,
+       rc_register_file file,
+       unsigned int index,
+       unsigned int src_type)
+{
+       struct src_select * select;
+
+       select = &data->Selects[data->SelectCount++];
+       select->File = file;
+       select->Index = index;
+       select->SrcType = src_type;
+}
+
+/**
+ * This callback function counts the number of sources in inst that are
+ * different from the sources in can_use_presub_data->RemoveSrcs.
+ */
 static void can_use_presub_read_cb(
        void * userdata,
        struct rc_instruction * inst,
-       rc_register_file file,
-       unsigned int index,
-       unsigned int mask)
+       struct rc_src_register * src)
 {
        struct can_use_presub_data * d = userdata;
-       unsigned int src_type = rc_source_type_mask(mask);
-       unsigned int i;
 
-       if (file == RC_FILE_NONE)
+       if (!d->ReplaceRemoved && src == d->ReplaceReg) {
+               d->ReplaceRemoved = 1;
                return;
-
-       for(i = 0; i < 3; i++) {
-               if (d->RemoveSrcs[i].File == file
-                   && d->RemoveSrcs[i].Index == index) {
-                       src_type &=
-                               ~rc_source_type_swz(d->RemoveSrcs[i].Swizzle);
-               }
        }
 
-       if (src_type & RC_SOURCE_RGB)
-               d->RGBCount++;
+       if (src->File == RC_FILE_NONE)
+               return;
 
-       if (src_type & RC_SOURCE_ALPHA)
-               d->AlphaCount++;
+       can_use_presub_data_add_select(d, src->File, src->Index,
+                                       rc_source_type_swz(src->Swizzle));
 }
 
 unsigned int rc_inst_can_use_presub(
        struct rc_instruction * inst,
        rc_presubtract_op presub_op,
        unsigned int presub_writemask,
-       struct rc_src_register replace_reg,
-       struct rc_src_register presub_src0,
-       struct rc_src_register presub_src1)
+       const struct rc_src_register * replace_reg,
+       const struct rc_src_register * presub_src0,
+       const struct rc_src_register * presub_src1)
 {
        struct can_use_presub_data d;
        unsigned int num_presub_srcs;
+       unsigned int i;
        const struct rc_opcode_info * info =
                                        rc_get_opcode_info(inst->U.I.Opcode);
+       int rgb_count = 0, alpha_count = 0;
+       unsigned int src_type0, src_type1;
 
        if (presub_op == RC_PRESUB_NONE) {
                return 1;
@@ -425,15 +443,62 @@ unsigned int rc_inst_can_use_presub(
        }
 
        memset(&d, 0, sizeof(d));
-       d.RemoveSrcs[0] = replace_reg;
-       d.RemoveSrcs[1] = presub_src0;
-       d.RemoveSrcs[2] = presub_src1;
+       d.ReplaceReg = replace_reg;
 
-       rc_for_all_reads_mask(inst, can_use_presub_read_cb, &d);
+       rc_for_all_reads_src(inst, can_use_presub_read_cb, &d);
 
        num_presub_srcs = rc_presubtract_src_reg_count(presub_op);
 
-       if (d.RGBCount + num_presub_srcs > 3 || d.AlphaCount + num_presub_srcs > 3) {
+       src_type0 = rc_source_type_swz(presub_src0->Swizzle);
+       can_use_presub_data_add_select(&d,
+               presub_src0->File,
+               presub_src0->Index,
+               src_type0);
+
+       if (num_presub_srcs > 1) {
+               src_type1 = rc_source_type_swz(presub_src1->Swizzle);
+               can_use_presub_data_add_select(&d,
+                       presub_src1->File,
+                       presub_src1->Index,
+                       src_type1);
+
+               /* Even if both of the presub sources read from the same
+                * register, we still need to use 2 different source selects
+                * for them, so we need to increment the count to compensate.
+                */
+               if (presub_src0->File == presub_src1->File
+                   && presub_src0->Index == presub_src1->Index) {
+                       if (src_type0 & src_type1 & RC_SOURCE_RGB) {
+                               rgb_count++;
+                       }
+                       if (src_type0 & src_type1 & RC_SOURCE_ALPHA) {
+                               alpha_count++;
+                       }
+               }
+       }
+
+       /* Count the number of source selects for Alpha and RGB.  If we
+        * encounter two of the same source selects then we can ignore the
+        * first one. */
+       for (i = 0; i < d.SelectCount; i++) {
+               unsigned int j;
+               unsigned int src_type = d.Selects[i].SrcType;
+               for (j = i + 1; j < d.SelectCount; j++) {
+                       if (d.Selects[i].File == d.Selects[j].File
+                           && d.Selects[i].Index == d.Selects[j].Index) {
+                               src_type &= ~d.Selects[j].SrcType;
+                       }
+               }
+               if (src_type & RC_SOURCE_RGB) {
+                       rgb_count++;
+               }
+
+               if (src_type & RC_SOURCE_ALPHA) {
+                       alpha_count++;
+               }
+       }
+
+       if (rgb_count > 3 || alpha_count > 3) {
                return 0;
        }
 
index 2af289dfabdd923956c3482e9fa3b3e11e3ed4dd..3730aa888c0b7a4217e6b22f1152c73e9671cb90 100644 (file)
@@ -63,9 +63,9 @@ unsigned int rc_inst_can_use_presub(
        struct rc_instruction * inst,
        rc_presubtract_op presub_op,
        unsigned int presub_writemask,
-       struct rc_src_register replace_reg,
-       struct rc_src_register presub_src0,
-       struct rc_src_register presub_src1);
+       const struct rc_src_register * replace_reg,
+       const struct rc_src_register * presub_src0,
+       const struct rc_src_register * presub_src1);
 
 int rc_get_max_index(
        struct radeon_compiler * c,
index 5b4fba808732e8c14496389b38ac456670813f4b..53ab5fbbbd9bb12ba9a9127747ce9441d4b27c7c 100644 (file)
@@ -70,9 +70,9 @@ static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
        if(!rc_inst_can_use_presub(inst,
                                reader_data->Writer->U.I.PreSub.Opcode,
                                rc_swizzle_to_writemask(src->Swizzle),
-                               *src,
-                               reader_data->Writer->U.I.PreSub.SrcReg[0],
-                               reader_data->Writer->U.I.PreSub.SrcReg[1])) {
+                               src,
+                               &reader_data->Writer->U.I.PreSub.SrcReg[0],
+                               &reader_data->Writer->U.I.PreSub.SrcReg[1])) {
                reader_data->Abort = 1;
                return;
        }
@@ -437,9 +437,9 @@ static void presub_scan_read(
 
        if (!rc_inst_can_use_presub(inst, *presub_opcode,
                        reader_data->Writer->U.I.DstReg.WriteMask,
-                       *src,
-                       reader_data->Writer->U.I.SrcReg[0],
-                       reader_data->Writer->U.I.SrcReg[1])) {
+                       src,
+                       &reader_data->Writer->U.I.SrcReg[0],
+                       &reader_data->Writer->U.I.SrcReg[1])) {
                reader_data->Abort = 1;
                return;
        }
index be5036ba674a5057d33e347e841c5a777ccb5855..a2e3f2ab2e54fcf8253f5cf7f4b3a6a99fb8c035 100644 (file)
@@ -22,8 +22,8 @@ static void test_rc_inst_can_use_presub(
        init_rc_normal_instruction(&replace_inst, replace_str);
 
        ret = rc_inst_can_use_presub(&replace_inst, RC_PRESUB_ADD, 0,
-                       replace_inst.U.I.SrcReg[0],
-                       add_inst.U.I.SrcReg[0], add_inst.U.I.SrcReg[1]);
+                       &replace_inst.U.I.SrcReg[0],
+                       &add_inst.U.I.SrcReg[0], &add_inst.U.I.SrcReg[1]);
 
        test_check(result, ret == expected);
 }