r300/compiler: Add support for inline literals
authorTom Stellard <tstellar@gmail.com>
Sat, 14 Jan 2012 13:08:33 +0000 (08:08 -0500)
committerTom Stellard <thomas.stellard@amd.com>
Mon, 5 Mar 2012 00:36:27 +0000 (19:36 -0500)
On R500 chips, shader instructions can take 7-bit (3-bit mantissa, 4-bit
exponent) floating point values as inputs in place of registers.

12 files changed:
src/gallium/drivers/r300/Makefile.sources
src/gallium/drivers/r300/compiler/r3xx_fragprog.c
src/gallium/drivers/r300/compiler/r500_fragprog.c
src/gallium/drivers/r300/compiler/r500_fragprog_emit.c
src/gallium/drivers/r300/compiler/radeon_compiler.c
src/gallium/drivers/r300/compiler/radeon_compiler.h
src/gallium/drivers/r300/compiler/radeon_dataflow.h
src/gallium/drivers/r300/compiler/radeon_inline_literals.c [new file with mode: 0644]
src/gallium/drivers/r300/compiler/radeon_pair_translate.c
src/gallium/drivers/r300/compiler/radeon_program_constants.h
src/gallium/drivers/r300/compiler/radeon_program_pair.h
src/gallium/drivers/r300/compiler/radeon_program_print.c

index 90105d6e694c81570bad35986ed650714b03e753..e27b14e5702cb3f832b8136836ade162cef8501d 100644 (file)
@@ -28,6 +28,7 @@ C_SOURCES := \
        compiler/radeon_compiler_util.c \
        compiler/radeon_emulate_branches.c \
        compiler/radeon_emulate_loops.c \
+       compiler/radeon_inline_literals.c \
        compiler/radeon_program.c \
        compiler/radeon_program_print.c \
        compiler/radeon_opcodes.c \
index 8fea4db94ea8c6216d8575d929306ce987409958..8ef2d24fc99f416a98fdbd598ee6551c2293f59f 100644 (file)
@@ -125,6 +125,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
                {"emulate loops",               1, !is_r500,    rc_emulate_loops,               NULL},
                {"register rename",             1, !is_r500 || opt,             rc_rename_regs,                 NULL},
                {"dataflow optimize",           1, opt,         rc_optimize,                    NULL},
+               {"inline literals",             1, is_r500 && opt,              rc_inline_literals,                     NULL},
                {"dataflow swizzles",           1, 1,           rc_dataflow_swizzles,           NULL},
                {"dead constants",              1, 1,           rc_remove_unused_constants,     &c->code->constants_remap_table},
                {"pair translate",              1, 1,           rc_pair_translate,              NULL},
index cf99f5e453891a8acbe25ebf24cd73ae601886af..499aa92423bb22ae32127a3ebacd7386d1747428 100644 (file)
@@ -218,6 +218,8 @@ static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
                        return 1;
 
                return 0;
+       } else if (reg.File == RC_FILE_INLINE) {
+               return 1;
        } else {
                /* ALU instructions support almost everything */
                relevant = 0;
index 87b96d15079b1af32d8437a3ec3062d925f6f4d3..f6b6c0f9c0605d4bc8cd9d501dc1aedb4d89f78c 100644 (file)
@@ -210,6 +210,8 @@ static unsigned int use_source(struct r500_fragment_program_code* code, struct r
        } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
                use_temporary(code, src.Index);
                return src.Index;
+       } else if (src.File == RC_FILE_INLINE) {
+               return src.Index | (1 << 7);
        }
 
        return 0;
index 986e3b77403343bc71b1d4d4d97401d58b4e7f60..4d4eb6458412bc5b8591a28fa39c1ea8c4a99967 100644 (file)
@@ -357,21 +357,22 @@ void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
 static void reg_count_callback(void * userdata, struct rc_instruction * inst,
                rc_register_file file, unsigned int index, unsigned int mask)
 {
-       int *max_reg = userdata;
+       struct rc_program_stats *s = userdata;
        if (file == RC_FILE_TEMPORARY)
-               (int)index > *max_reg ? *max_reg = index : 0;
+               (int)index > s->num_temp_regs ? s->num_temp_regs = index : 0;
+       if (file == RC_FILE_INLINE)
+               s->num_inline_literals++;
 }
 
 void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
 {
-       int max_reg = -1;
        struct rc_instruction * tmp;
        memset(s, 0, sizeof(*s));
 
        for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
                                                        tmp = tmp->Next){
                const struct rc_opcode_info * info;
-               rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg);
+               rc_for_all_reads_mask(tmp, reg_count_callback, s);
                if (tmp->Type == RC_INSTRUCTION_NORMAL) {
                        info = rc_get_opcode_info(tmp->U.I.Opcode);
                        if (info->Opcode == RC_OPCODE_BEGIN_TEX)
@@ -405,7 +406,9 @@ void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
                        s->num_tex_insts++;
                s->num_insts++;
        }
-       s->num_temp_regs = max_reg + 1;
+       /* Increment here because the reg_count_callback store the max
+        * temporary reg index in s->nun_temp_regs. */
+       s->num_temp_regs++;
 }
 
 static void print_stats(struct radeon_compiler * c)
@@ -437,10 +440,11 @@ static void print_stats(struct radeon_compiler * c)
                               "~%4u Presub Operations\n"
                               "~%4u OMOD Operations\n"
                               "~%4u Temporary Registers\n"
+                              "~%4u Inline Literals\n"
                               "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
                               s.num_insts, s.num_rgb_insts, s.num_alpha_insts,
                               s.num_fc_insts, s.num_tex_insts, s.num_presub_ops,
-                              s.num_omod_ops, s.num_temp_regs);
+                              s.num_omod_ops, s.num_temp_regs, s.num_inline_literals);
                break;
        default:
                assert(0);
index ac9691c816f2d375a8111ad4caf4aedda8ddf07a..e7ccbb732d125da6176dc4f281acd8ef7c7ac3fc 100644 (file)
@@ -161,6 +161,7 @@ struct rc_program_stats {
        unsigned num_presub_ops;
        unsigned num_temp_regs;
        unsigned num_omod_ops;
+       unsigned num_inline_literals;
 };
 
 void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s);
index d8a627258ea3d81195d8d96bf73e2f10c81a1c48..bb8d48206e26e987e303d034e83b1dd7b8278a2b 100644 (file)
@@ -130,5 +130,6 @@ void rc_dataflow_swizzles(struct radeon_compiler * c, void *user);
 /*@}*/
 
 void rc_optimize(struct radeon_compiler * c, void *user);
+void rc_inline_literals(struct radeon_compiler *c, void *user);
 
 #endif /* RADEON_DATAFLOW_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_inline_literals.c b/src/gallium/drivers/r300/compiler/radeon_inline_literals.c
new file mode 100644 (file)
index 0000000..568a3d6
--- /dev/null
@@ -0,0 +1,140 @@
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+#include "radeon_dataflow.h"
+#include "radeon_program.h"
+#include "radeon_program_constants.h"
+#include <stdio.h>
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+/* IEEE-754:
+ * 22:0 mantissa
+ * 30:23 exponent
+ * 31 sign
+ *
+ * R300:
+ * 0:2 mantissa
+ * 3:6 exponent (bias 7)
+ */
+static int ieee_754_to_r300_float(float f, unsigned char *r300_float_out)
+{
+       unsigned float_bits = *((unsigned *)&f);
+       /* XXX: Handle big-endian */
+       unsigned mantissa = float_bits &         0x007fffff;
+       unsigned biased_exponent = (float_bits & 0x7f800000) >> 23;
+       unsigned negate = !!(float_bits &         0x80000000);
+       int exponent = biased_exponent - 127;
+       unsigned mantissa_mask = 0xff8fffff;
+       unsigned r300_exponent, r300_mantissa;
+
+       DBG("Converting %f (0x%x) to 7-bit:\n", f, float_bits);
+       DBG("Raw exponent = %d\n", exponent);
+
+       if (exponent < -7 || exponent > 8) {
+               DBG("Failed exponent out of range\n\n");
+               return 0;
+       }
+
+       if (mantissa & mantissa_mask) {
+               DBG("Failed mantisa has too many bits:\n"
+                       "manitssa=0x%x mantissa_mask=0x%x, and=0x%x\n\n",
+                       mantissa, mantissa_mask,
+                       mantissa & mantissa_mask);
+               return 0;
+       }
+
+       r300_exponent = exponent + 7;
+       r300_mantissa = (mantissa & ~mantissa_mask) >> 20;
+       *r300_float_out = r300_mantissa | (r300_exponent << 3);
+
+       DBG("Success! r300_float = 0x%x\n\n", *r300_float_out);
+
+       if (negate)
+               return -1;
+       else
+               return 1;
+}
+
+void rc_inline_literals(struct radeon_compiler *c, void *user)
+{
+       struct rc_instruction * inst;
+
+       for(inst = c->Program.Instructions.Next;
+                                       inst != &c->Program.Instructions;
+                                       inst = inst->Next) {
+               const struct rc_opcode_info * info =
+                                       rc_get_opcode_info(inst->U.I.Opcode);
+
+               unsigned src_idx;
+               struct rc_constant * constant;
+               float float_value;
+               unsigned char r300_float;
+               int ret;
+
+               /* XXX: Handle presub */
+
+               /* We aren't using rc_for_all_reads_src here, because presub
+                * sources need to be handled differently. */
+               for (src_idx = 0; src_idx < info->NumSrcRegs; src_idx++) {
+                       unsigned new_swizzle;
+                       unsigned use_literal = 0;
+                       unsigned negate_mask = 0;
+                       unsigned swz, chan;
+                       struct rc_src_register * src_reg =
+                                               &inst->U.I.SrcReg[src_idx];
+                       swz = RC_SWIZZLE_UNUSED;
+                       if (src_reg->File != RC_FILE_CONSTANT) {
+                               continue;
+                       }
+                       constant =
+                               &c->Program.Constants.Constants[src_reg->Index];
+                       if (constant->Type != RC_CONSTANT_IMMEDIATE) {
+                               continue;
+                       }
+                       new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
+                       for (chan = 0; chan < 4; chan++) {
+                               unsigned char r300_float_tmp;
+                               swz = GET_SWZ(src_reg->Swizzle, chan);
+                               if (swz == RC_SWIZZLE_UNUSED) {
+                                       continue;
+                               }
+                               float_value = constant->u.Immediate[swz];
+                               ret = ieee_754_to_r300_float(float_value,
+                                                               &r300_float_tmp);
+                               if (!ret || (use_literal &&
+                                               r300_float != r300_float_tmp)) {
+                                       use_literal = 0;
+                                       break;
+                               }
+
+                               if (ret == -1 && src_reg->Abs) {
+                                       use_literal = 0;
+                                       break;
+                               }
+
+                               if (!use_literal) {
+                                       r300_float = r300_float_tmp;
+                                       use_literal = 1;
+                               }
+
+                               /* Use RC_SWIZZLE_W for the inline constant, so
+                                * it will become one of the alpha sources. */
+                               SET_SWZ(new_swizzle, chan, RC_SWIZZLE_W);
+                               if (ret == -1) {
+                                       negate_mask |= (1 << chan);
+                               }
+                       }
+
+                       if (!use_literal) {
+                               continue;
+                       }
+                       src_reg->File = RC_FILE_INLINE;
+                       src_reg->Index = r300_float;
+                       src_reg->Swizzle = new_swizzle;
+                       src_reg->Negate = src_reg->Negate ^ negate_mask;
+               }
+       }
+}
index 7d9c8d1fab6b682f33dd8acede8fd58af702ff65..c6050bdf4ddc10ff9683d4ff865514a3f406bff0 100644 (file)
@@ -268,7 +268,15 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
                        pair->Alpha.Arg[i].Source = source;
                        pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
                        pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
-                       pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W);
+
+                       if (istranscendent) {
+                               pair->Alpha.Arg[i].Negate =
+                                       !!(inst->SrcReg[i].Negate &
+                                                       inst->DstReg.WriteMask);
+                       } else {
+                               pair->Alpha.Arg[i].Negate =
+                                       !!(inst->SrcReg[i].Negate & RC_MASK_W);
+                       }
                }
        }
 
index 4f59c47815e5d2cb4bdcd48f9ce7fe6684275a00..c07c492b0c990091942ae64eadd31a4321c2f712 100644 (file)
@@ -85,7 +85,12 @@ typedef enum {
         * Indicates this register should use the result of the presubtract
         * operation.
         */
-       RC_FILE_PRESUB
+       RC_FILE_PRESUB,
+
+       /**
+        * Indicates that the source index has been encoded as a 7-bit float.
+        */
+       RC_FILE_INLINE
 } rc_register_file;
 
 enum {
index b6eb0ebe47d48f0d4e675d06149354ec8734a605..085ff994501a5f017d603c3c6acf8fdb496a8590 100644 (file)
@@ -57,7 +57,7 @@ struct radeon_compiler;
 
 struct rc_pair_instruction_source {
        unsigned int Used:1;
-       unsigned int File:3;
+       unsigned int File:4;
        unsigned int Index:RC_REGISTER_INDEX_BITS;
 };
 
index dc40d7fd4f296795df2faeb697cfd4660a477ba2..e3d2104b2504fa537280bc4cac79950fbe39019a 100644 (file)
@@ -109,6 +109,22 @@ static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func fun
        }
 }
 
+static void rc_print_inline_float(FILE * f, int index)
+{
+       int r300_exponent = (index >> 3) & 0xf;
+       unsigned r300_mantissa = index & 0x7;
+       unsigned float_exponent;
+       unsigned real_float;
+       float * print_float = (float*) &real_float;
+
+       r300_exponent -= 7;
+       float_exponent = r300_exponent + 127;
+       real_float = (r300_mantissa << 20) | (float_exponent << 23);
+
+       fprintf(f, "%f (0x%x)", *print_float, index);
+
+}
+
 static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr)
 {
        if (file == RC_FILE_NONE) {
@@ -118,6 +134,8 @@ static void rc_print_register(FILE * f, rc_register_file file, int index, unsign
                case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break;
                default: fprintf(f, "special[%i]", index); break;
                }
+       } else if (file == RC_FILE_INLINE) {
+               rc_print_inline_float(f, index);
        } else {
                const char * filename;
                switch(file) {