r300/compiler: Enable swizzle packing in the allocator for r300 and r400
authorTom Stellard <tstellar@gmail.com>
Tue, 26 Apr 2011 07:29:39 +0000 (00:29 -0700)
committerTom Stellard <tstellar@gmail.com>
Sat, 30 Apr 2011 18:00:16 +0000 (11:00 -0700)
src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h
src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
src/mesa/drivers/dri/r300/compiler/radeon_variable.c

index 5223aaa71a4e4bc717a3d3463554e34b5bee3c7d..603818f78fc13c89c3bc86eb1aa47949c1559c44 100644 (file)
@@ -87,6 +87,18 @@ static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle)
        return 0;
 }
 
+/**
+ * Determines if the given swizzle is valid for r300/r400.  In most situations
+ * it is better to use r300_swizzle_is_native() which can be accesed via
+ * struct radeon_compiler *c; c->SwizzleCaps->IsNative().
+ */
+int r300_swizzle_is_native_basic(unsigned int swizzle)
+{
+       if(lookup_native_swizzle(swizzle))
+               return 1;
+       else
+               return 0;
+}
 
 /**
  * Check whether the given instruction supports the swizzle and negate
index 118476af1324d99bc6fbf1fccf5f3d7ac35ef640..f2635be140ddfd64446f91e513e3d08e626538cf 100644 (file)
@@ -34,5 +34,6 @@ extern struct rc_swizzle_caps r300_swizzle_caps;
 
 unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle);
 unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle);
+int r300_swizzle_is_native_basic(unsigned int swizzle);
 
 #endif /* __R300_FRAGPROG_SWIZZLE_H_ */
index 3a6b0a7af15efa229942121583de8c5b93a21f0d..b077e7b7d657bdc273cde302445233fdb8903107 100644 (file)
@@ -141,7 +141,11 @@ static unsigned int srcs_need_rewrite(const struct rc_opcode_info * info)
        }
 }
 
-static unsigned int adjust_channels(
+/**
+ * @return A swizzle the results from converting old_swizzle using
+ * conversion_swizzle
+ */
+unsigned int rc_adjust_channels(
        unsigned int old_swizzle,
        unsigned int conversion_swizzle)
 {
@@ -197,7 +201,8 @@ void rc_pair_rewrite_writemask(
 
        for (i = 0; i < info->NumSrcRegs; i++) {
                sub->Arg[i].Swizzle =
-                       adjust_channels(sub->Arg[i].Swizzle, conversion_swizzle);
+                       rc_adjust_channels(sub->Arg[i].Swizzle,
+                                               conversion_swizzle);
        }
 }
 
@@ -207,7 +212,7 @@ static void normal_rewrite_writemask_cb(
        struct rc_src_register * src)
 {
        unsigned int * new_mask = (unsigned int *)userdata;
-       src->Swizzle = adjust_channels(src->Swizzle, *new_mask);
+       src->Swizzle = rc_adjust_channels(src->Swizzle, *new_mask);
 }
 
 /**
@@ -605,3 +610,27 @@ struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop)
        }
        return NULL;
 }
+
+/**
+ * @return A conversion swizzle for converting from old_mask->new_mask
+ */
+unsigned int rc_make_conversion_swizzle(
+       unsigned int old_mask,
+       unsigned int new_mask)
+{
+       unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
+       unsigned int old_idx;
+       unsigned int new_idx = 0;
+       for (old_idx = 0; old_idx < 4; old_idx++) {
+               if (!GET_BIT(old_mask, old_idx))
+                       continue;
+               for ( ; new_idx < 4; new_idx++) {
+                       if (GET_BIT(new_mask, new_idx)) {
+                               SET_SWZ(conversion_swizzle, old_idx, new_idx);
+                               new_idx++;
+                               break;
+                       }
+               }
+       }
+       return conversion_swizzle;
+}
index 1a0b96242e06d604c560fc90e6eb930397478526..2af289dfabdd923956c3482e9fa3b3e11e3ed4dd 100644 (file)
@@ -27,6 +27,10 @@ rc_swizzle rc_mask_to_swizzle(unsigned int mask);
 
 unsigned swizzle_mask(unsigned swizzle, unsigned mask);
 
+unsigned int rc_adjust_channels(
+       unsigned int old_swizzle,
+       unsigned int conversion_swizzle);
+
 void rc_pair_rewrite_writemask(
        struct rc_pair_sub_instruction * sub,
        unsigned int conversion_swizzle);
@@ -78,4 +82,8 @@ rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst);
 struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop);
 struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop);
 
+unsigned int rc_make_conversion_swizzle(
+       unsigned int old_mask,
+       unsigned int new_mask);
+
 #endif /* RADEON_PROGRAM_UTIL_H */
index fd03c73b3242465a539a92daddc292d6f5a62b1c..828c7533a01861c46163ccb6daf80bb80575a9c9 100644 (file)
@@ -34,6 +34,7 @@
 #include "program/register_allocate.h"
 #include "ralloc.h"
 
+#include "r300_fragprog_swizzle.h"
 #include "radeon_compiler.h"
 #include "radeon_compiler_util.h"
 #include "radeon_dataflow.h"
@@ -232,6 +233,26 @@ static unsigned int is_derivative(rc_opcode op)
        return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
 }
 
+static int find_class(
+       struct rc_class * classes,
+       unsigned int writemask,
+       unsigned int max_writemask_count)
+{
+       unsigned int i;
+       for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
+               unsigned int j;
+               if (classes[i].WritemaskCount > max_writemask_count) {
+                       continue;
+               }
+               for (j = 0; j < 3; j++) {
+                       if (classes[i].Writemasks[j] == writemask) {
+                               return i;
+                       }
+               }
+       }
+       return -1;
+}
+
 static enum rc_reg_class variable_get_class(
        struct rc_variable * variable,
        struct rc_class * classes)
@@ -240,25 +261,48 @@ static enum rc_reg_class variable_get_class(
        unsigned int can_change_writemask= 1;
        unsigned int writemask = rc_variable_writemask_sum(variable);
        struct rc_list * readers = rc_variable_readers_union(variable);
+       int class_index;
 
        if (!variable->C->is_r500) {
-               unsigned int mask_count = 0;
+               struct rc_class c;
                /* The assumption here is that if an instruction has type
                 * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
                 * r300 and r400 can't swizzle the result of a TEX lookup. */
                if (variable->Inst->Type == RC_INSTRUCTION_NORMAL) {
                        writemask = RC_MASK_XYZW;
                }
-               for (i = 0; i < 4; i++) {
-                       if (GET_BIT(writemask, i)) {
-                               mask_count++;
-                       }
+
+               /* Check if it is possible to do swizzle packing for r300/r400
+                * without creating non-native swizzles. */
+               class_index = find_class(classes, writemask, 3);
+               if (class_index < 0) {
+                       goto error;
                }
-               /* XXX We should do swizzle packing for r300 and r400 here.
-                * We need to figure out how not to create non-native
-                * swizzles. */
-               if (mask_count > 1) {
-                       can_change_writemask = 0;
+               c = classes[class_index];
+               for (i = 0; i < c.WritemaskCount; i++) {
+                       int j;
+                       unsigned int conversion_swizzle =
+                                               rc_make_conversion_swizzle(
+                                               writemask, c.Writemasks[i]);
+                       for (j = 0; j < variable->ReaderCount; j++) {
+                               unsigned int old_swizzle;
+                               unsigned int new_swizzle;
+                               struct rc_reader r = variable->Readers[j];
+                               if (r.Inst->Type == RC_INSTRUCTION_PAIR ) {
+                                       old_swizzle = r.U.P.Arg->Swizzle;
+                               } else {
+                                       old_swizzle = r.U.I.Src->Swizzle;
+                               }
+                               new_swizzle = rc_adjust_channels(
+                                       old_swizzle, conversion_swizzle);
+                               if (!r300_swizzle_is_native_basic(new_swizzle)) {
+                                       can_change_writemask = 0;
+                                       break;
+                               }
+                       }
+                       if (!can_change_writemask) {
+                               break;
+                       }
                }
        }
 
@@ -285,20 +329,18 @@ static enum rc_reg_class variable_get_class(
                        }
                }
        }
-       for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
-               unsigned int j;
-               if (!can_change_writemask && classes[i].WritemaskCount > 1) {
-                       continue;
-               }
-               for (j = 0; j < 3; j++) {
-                       if (classes[i].Writemasks[j] == writemask) {
-                               return classes[i].Class;
-                       }
-               }
-       }
-       rc_error(variable->C, "Could not find class for index=%u mask=%u\n",
+
+       class_index = find_class(classes, writemask,
+                                               can_change_writemask ? 3 : 1);
+       if (class_index > -1) {
+               return classes[class_index].Class;
+       } else {
+error:
+               rc_error(variable->C,
+                               "Could not find class for index=%u mask=%u\n",
                                variable->Dst.Index, writemask);
-       return 0;
+               return 0;
+       }
 }
 
 static unsigned int overlap_live_intervals_array(
index 082717ed5eb757f6cfb0b892a9aac4da6e59468a..16fa5d28902dd33e70d3216aab9dbcf7bd62b0ff 100644 (file)
@@ -44,24 +44,11 @@ void rc_variable_change_dst(
        unsigned int new_index,
        unsigned int new_writemask)
 {
-       unsigned int new_idx, old_idx;
-       unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
        struct rc_variable * var_ptr;
        struct rc_list * readers;
        unsigned int old_mask = rc_variable_writemask_sum(var);
-
-       new_idx = 0;
-       for (old_idx = 0; old_idx < 4; old_idx++) {
-               if (!GET_BIT(old_mask, old_idx))
-                       continue;
-               for ( ; new_idx < 4; new_idx++) {
-                       if (GET_BIT(new_writemask, new_idx)) {
-                               SET_SWZ(conversion_swizzle, old_idx, new_idx);
-                               new_idx++;
-                               break;
-                       }
-               }
-       }
+       unsigned int conversion_swizzle =
+                       rc_make_conversion_swizzle(old_mask, new_writemask);
 
        for (var_ptr = var; var_ptr; var_ptr = var_ptr->Friend) {
                if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {