compiler/radeon_compiler_util.c \
compiler/radeon_emulate_branches.c \
compiler/radeon_emulate_loops.c \
+ compiler/radeon_inline_literals.c \
compiler/radeon_program.c \
compiler/radeon_program_print.c \
compiler/radeon_opcodes.c \
{"emulate loops", 1, !is_r500, rc_emulate_loops, NULL},
{"register rename", 1, !is_r500 || opt, rc_rename_regs, NULL},
{"dataflow optimize", 1, opt, rc_optimize, NULL},
+ {"inline literals", 1, is_r500 && opt, rc_inline_literals, NULL},
{"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL},
{"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table},
{"pair translate", 1, 1, rc_pair_translate, NULL},
return 1;
return 0;
+ } else if (reg.File == RC_FILE_INLINE) {
+ return 1;
} else {
/* ALU instructions support almost everything */
relevant = 0;
} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
use_temporary(code, src.Index);
return src.Index;
+ } else if (src.File == RC_FILE_INLINE) {
+ return src.Index | (1 << 7);
}
return 0;
static void reg_count_callback(void * userdata, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
- int *max_reg = userdata;
+ struct rc_program_stats *s = userdata;
if (file == RC_FILE_TEMPORARY)
- (int)index > *max_reg ? *max_reg = index : 0;
+ (int)index > s->num_temp_regs ? s->num_temp_regs = index : 0;
+ if (file == RC_FILE_INLINE)
+ s->num_inline_literals++;
}
void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
{
- int max_reg = -1;
struct rc_instruction * tmp;
memset(s, 0, sizeof(*s));
for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
tmp = tmp->Next){
const struct rc_opcode_info * info;
- rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg);
+ rc_for_all_reads_mask(tmp, reg_count_callback, s);
if (tmp->Type == RC_INSTRUCTION_NORMAL) {
info = rc_get_opcode_info(tmp->U.I.Opcode);
if (info->Opcode == RC_OPCODE_BEGIN_TEX)
s->num_tex_insts++;
s->num_insts++;
}
- s->num_temp_regs = max_reg + 1;
+ /* Increment here because the reg_count_callback store the max
+ * temporary reg index in s->nun_temp_regs. */
+ s->num_temp_regs++;
}
static void print_stats(struct radeon_compiler * c)
"~%4u Presub Operations\n"
"~%4u OMOD Operations\n"
"~%4u Temporary Registers\n"
+ "~%4u Inline Literals\n"
"~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
s.num_insts, s.num_rgb_insts, s.num_alpha_insts,
s.num_fc_insts, s.num_tex_insts, s.num_presub_ops,
- s.num_omod_ops, s.num_temp_regs);
+ s.num_omod_ops, s.num_temp_regs, s.num_inline_literals);
break;
default:
assert(0);
unsigned num_presub_ops;
unsigned num_temp_regs;
unsigned num_omod_ops;
+ unsigned num_inline_literals;
};
void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s);
/*@}*/
void rc_optimize(struct radeon_compiler * c, void *user);
+void rc_inline_literals(struct radeon_compiler *c, void *user);
#endif /* RADEON_DATAFLOW_H */
--- /dev/null
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+#include "radeon_dataflow.h"
+#include "radeon_program.h"
+#include "radeon_program_constants.h"
+#include <stdio.h>
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+/* IEEE-754:
+ * 22:0 mantissa
+ * 30:23 exponent
+ * 31 sign
+ *
+ * R300:
+ * 0:2 mantissa
+ * 3:6 exponent (bias 7)
+ */
+static int ieee_754_to_r300_float(float f, unsigned char *r300_float_out)
+{
+ unsigned float_bits = *((unsigned *)&f);
+ /* XXX: Handle big-endian */
+ unsigned mantissa = float_bits & 0x007fffff;
+ unsigned biased_exponent = (float_bits & 0x7f800000) >> 23;
+ unsigned negate = !!(float_bits & 0x80000000);
+ int exponent = biased_exponent - 127;
+ unsigned mantissa_mask = 0xff8fffff;
+ unsigned r300_exponent, r300_mantissa;
+
+ DBG("Converting %f (0x%x) to 7-bit:\n", f, float_bits);
+ DBG("Raw exponent = %d\n", exponent);
+
+ if (exponent < -7 || exponent > 8) {
+ DBG("Failed exponent out of range\n\n");
+ return 0;
+ }
+
+ if (mantissa & mantissa_mask) {
+ DBG("Failed mantisa has too many bits:\n"
+ "manitssa=0x%x mantissa_mask=0x%x, and=0x%x\n\n",
+ mantissa, mantissa_mask,
+ mantissa & mantissa_mask);
+ return 0;
+ }
+
+ r300_exponent = exponent + 7;
+ r300_mantissa = (mantissa & ~mantissa_mask) >> 20;
+ *r300_float_out = r300_mantissa | (r300_exponent << 3);
+
+ DBG("Success! r300_float = 0x%x\n\n", *r300_float_out);
+
+ if (negate)
+ return -1;
+ else
+ return 1;
+}
+
+void rc_inline_literals(struct radeon_compiler *c, void *user)
+{
+ struct rc_instruction * inst;
+
+ for(inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ const struct rc_opcode_info * info =
+ rc_get_opcode_info(inst->U.I.Opcode);
+
+ unsigned src_idx;
+ struct rc_constant * constant;
+ float float_value;
+ unsigned char r300_float;
+ int ret;
+
+ /* XXX: Handle presub */
+
+ /* We aren't using rc_for_all_reads_src here, because presub
+ * sources need to be handled differently. */
+ for (src_idx = 0; src_idx < info->NumSrcRegs; src_idx++) {
+ unsigned new_swizzle;
+ unsigned use_literal = 0;
+ unsigned negate_mask = 0;
+ unsigned swz, chan;
+ struct rc_src_register * src_reg =
+ &inst->U.I.SrcReg[src_idx];
+ swz = RC_SWIZZLE_UNUSED;
+ if (src_reg->File != RC_FILE_CONSTANT) {
+ continue;
+ }
+ constant =
+ &c->Program.Constants.Constants[src_reg->Index];
+ if (constant->Type != RC_CONSTANT_IMMEDIATE) {
+ continue;
+ }
+ new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
+ for (chan = 0; chan < 4; chan++) {
+ unsigned char r300_float_tmp;
+ swz = GET_SWZ(src_reg->Swizzle, chan);
+ if (swz == RC_SWIZZLE_UNUSED) {
+ continue;
+ }
+ float_value = constant->u.Immediate[swz];
+ ret = ieee_754_to_r300_float(float_value,
+ &r300_float_tmp);
+ if (!ret || (use_literal &&
+ r300_float != r300_float_tmp)) {
+ use_literal = 0;
+ break;
+ }
+
+ if (ret == -1 && src_reg->Abs) {
+ use_literal = 0;
+ break;
+ }
+
+ if (!use_literal) {
+ r300_float = r300_float_tmp;
+ use_literal = 1;
+ }
+
+ /* Use RC_SWIZZLE_W for the inline constant, so
+ * it will become one of the alpha sources. */
+ SET_SWZ(new_swizzle, chan, RC_SWIZZLE_W);
+ if (ret == -1) {
+ negate_mask |= (1 << chan);
+ }
+ }
+
+ if (!use_literal) {
+ continue;
+ }
+ src_reg->File = RC_FILE_INLINE;
+ src_reg->Index = r300_float;
+ src_reg->Swizzle = new_swizzle;
+ src_reg->Negate = src_reg->Negate ^ negate_mask;
+ }
+ }
+}
pair->Alpha.Arg[i].Source = source;
pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
- pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W);
+
+ if (istranscendent) {
+ pair->Alpha.Arg[i].Negate =
+ !!(inst->SrcReg[i].Negate &
+ inst->DstReg.WriteMask);
+ } else {
+ pair->Alpha.Arg[i].Negate =
+ !!(inst->SrcReg[i].Negate & RC_MASK_W);
+ }
}
}
* Indicates this register should use the result of the presubtract
* operation.
*/
- RC_FILE_PRESUB
+ RC_FILE_PRESUB,
+
+ /**
+ * Indicates that the source index has been encoded as a 7-bit float.
+ */
+ RC_FILE_INLINE
} rc_register_file;
enum {
struct rc_pair_instruction_source {
unsigned int Used:1;
- unsigned int File:3;
+ unsigned int File:4;
unsigned int Index:RC_REGISTER_INDEX_BITS;
};
}
}
+static void rc_print_inline_float(FILE * f, int index)
+{
+ int r300_exponent = (index >> 3) & 0xf;
+ unsigned r300_mantissa = index & 0x7;
+ unsigned float_exponent;
+ unsigned real_float;
+ float * print_float = (float*) &real_float;
+
+ r300_exponent -= 7;
+ float_exponent = r300_exponent + 127;
+ real_float = (r300_mantissa << 20) | (float_exponent << 23);
+
+ fprintf(f, "%f (0x%x)", *print_float, index);
+
+}
+
static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr)
{
if (file == RC_FILE_NONE) {
case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break;
default: fprintf(f, "special[%i]", index); break;
}
+ } else if (file == RC_FILE_INLINE) {
+ rc_print_inline_float(f, index);
} else {
const char * filename;
switch(file) {