if (inst->Nop)
code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP;
+ /* Handle Output Modifier
+ * According to the r300 docs, there is no RC_OMOD_DISABLE for r300 */
+ if (inst->RGB.Omod) {
+ if (inst->RGB.Omod == RC_OMOD_DISABLE) {
+ rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
+ }
+ code->alu.inst[ip].rgb_inst |=
+ (inst->RGB.Omod << R300_ALU_OUTC_MOD_SHIFT);
+ }
+ if (inst->Alpha.Omod) {
+ if (inst->Alpha.Omod == RC_OMOD_DISABLE) {
+ rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
+ }
+ code->alu.inst[ip].alpha_inst |=
+ (inst->Alpha.Omod << R300_ALU_OUTC_MOD_SHIFT);
+ }
return 1;
}
break;
}
+ /* Set the output modifier */
+ code->inst[ip].inst3 |= inst->RGB.Omod << R500_ALU_RGB_OMOD_SHIFT;
+ code->inst[ip].inst4 |= inst->Alpha.Omod << R500_ALPHA_OMOD_SHIFT;
+
code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
s->num_alpha_insts++;
if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
s->num_rgb_insts++;
+ if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 &&
+ tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) {
+ s->num_omod_ops++;
+ }
+ if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 &&
+ tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) {
+ s->num_omod_ops++;
+ }
info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
}
if (info->IsFlowControl)
"~%4u Flow Control Instructions\n"
"~%4u Texture Instructions\n"
"~%4u Presub Operations\n"
+ "~%4u OMOD Operations\n"
"~%4u Temporary Registers\n"
"~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
s.num_insts, s.num_rgb_insts, s.num_alpha_insts,
s.num_fc_insts, s.num_tex_insts, s.num_presub_ops,
- s.num_temp_regs);
+ s.num_omod_ops, s.num_temp_regs);
break;
default:
assert(0);
unsigned is_r500:1;
unsigned has_half_swizzles:1;
unsigned has_presub:1;
+ unsigned has_omod:1;
unsigned disable_optimizations:1;
unsigned max_temp_regs;
unsigned max_constants;
unsigned num_alpha_insts;
unsigned num_presub_ops;
unsigned num_temp_regs;
+ unsigned num_omod_ops;
};
void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s);
#include "radeon_compiler.h"
#include "radeon_compiler_util.h"
+#include "radeon_list.h"
#include "radeon_swizzle.h"
+#include "radeon_variable.h"
struct src_clobbered_reads_cb_data {
rc_register_file File;
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
|| inst->U.I.SaturateMode
- || inst->U.I.WriteALUResult) {
+ || inst->U.I.WriteALUResult
+ || inst->U.I.Omod) {
return 0;
}
return 0;
}
+struct peephole_mul_cb_data {
+ struct rc_dst_register * Writer;
+ unsigned int Clobbered;
+};
+
+static void omod_filter_reader_cb(
+ void * userdata,
+ struct rc_instruction * inst,
+ rc_register_file file,
+ unsigned int index,
+ unsigned int mask)
+{
+ struct peephole_mul_cb_data * d = userdata;
+ if (rc_src_reads_dst_mask(file, mask, index,
+ d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) {
+
+ d->Clobbered = 1;
+ }
+}
+
+static int peephole_mul_omod(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst_mul,
+ struct rc_list * var_list)
+{
+ unsigned int chan, swz, i;
+ int const_index = -1;
+ int temp_index = -1;
+ float const_value;
+ rc_omod_op omod_op = RC_OMOD_DISABLE;
+ struct rc_list * writer_list;
+ struct rc_variable * var;
+ struct peephole_mul_cb_data cb_data;
+
+ for (i = 0; i < 2; i++) {
+ unsigned int j;
+ if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT
+ && inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) {
+ return 0;
+ }
+ if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+ if (temp_index != -1) {
+ /* The instruction has two temp sources */
+ return 0;
+ } else {
+ temp_index = i;
+ continue;
+ }
+ }
+ /* If we get this far Src[i] must be a constant src */
+ if (inst_mul->U.I.SrcReg[i].Negate) {
+ return 0;
+ }
+ /* The constant src needs to read from the same swizzle */
+ swz = RC_SWIZZLE_UNUSED;
+ chan = 0;
+ for (j = 0; j < 4; j++) {
+ unsigned int j_swz =
+ GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j);
+ if (j_swz == RC_SWIZZLE_UNUSED) {
+ continue;
+ }
+ if (swz == RC_SWIZZLE_UNUSED) {
+ swz = j_swz;
+ chan = j;
+ } else if (j_swz != swz) {
+ return 0;
+ }
+ }
+
+ if (const_index != -1) {
+ /* The instruction has two constant sources */
+ return 0;
+ } else {
+ const_index = i;
+ }
+ }
+
+ if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File,
+ inst_mul->U.I.SrcReg[const_index].Index)) {
+ return 0;
+ }
+ const_value = rc_get_constant_value(c,
+ inst_mul->U.I.SrcReg[const_index].Index,
+ inst_mul->U.I.SrcReg[const_index].Swizzle,
+ inst_mul->U.I.SrcReg[const_index].Negate,
+ chan);
+
+ if (const_value == 2.0f) {
+ omod_op = RC_OMOD_MUL_2;
+ } else if (const_value == 4.0f) {
+ omod_op = RC_OMOD_MUL_4;
+ } else if (const_value == 8.0f) {
+ omod_op = RC_OMOD_MUL_8;
+ } else if (const_value == (1.0f / 2.0f)) {
+ omod_op = RC_OMOD_DIV_2;
+ } else if (const_value == (1.0f / 4.0f)) {
+ omod_op = RC_OMOD_DIV_4;
+ } else if (const_value == (1.0f / 8.0f)) {
+ omod_op = RC_OMOD_DIV_8;
+ } else {
+ return 0;
+ }
+
+ writer_list = rc_variable_list_get_writers_one_reader(var_list,
+ RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]);
+
+ if (!writer_list) {
+ return 0;
+ }
+
+ cb_data.Clobbered = 0;
+ cb_data.Writer = &inst_mul->U.I.DstReg;
+ for (var = writer_list->Item; var; var = var->Friend) {
+ struct rc_instruction * inst;
+ const struct rc_opcode_info * info = rc_get_opcode_info(
+ var->Inst->U.I.Opcode);
+ if (info->HasTexture) {
+ return 0;
+ }
+ if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) {
+ return 0;
+ }
+ for (inst = inst_mul->Prev; inst != var->Inst;
+ inst = inst->Prev) {
+ rc_for_all_reads_mask(inst, omod_filter_reader_cb,
+ &cb_data);
+ if (cb_data.Clobbered) {
+ break;
+ }
+ }
+ }
+
+ if (cb_data.Clobbered) {
+ return 0;
+ }
+
+ /* Rewrite the instructions */
+ for (var = writer_list->Item; var; var = var->Friend) {
+ struct rc_variable * writer = writer_list->Item;
+ writer->Inst->U.I.Omod = omod_op;
+ writer->Inst->U.I.DstReg = inst_mul->U.I.DstReg;
+ writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode;
+ }
+
+ rc_remove_instruction(inst_mul);
+
+ return 1;
+}
+
/**
* @return
* 0 if inst is still part of the program.
void rc_optimize(struct radeon_compiler * c, void *user)
{
struct rc_instruction * inst = c->Program.Instructions.Next;
+ struct rc_list * var_list;
while(inst != &c->Program.Instructions) {
struct rc_instruction * cur = inst;
inst = inst->Next;
/* cur may no longer be part of the program */
}
}
+
+ if (!c->has_omod) {
+ return;
+ }
+
+ inst = c->Program.Instructions.Next;
+ while(inst != &c->Program.Instructions) {
+ struct rc_instruction * cur = inst;
+ inst = inst->Next;
+ if (cur->U.I.Opcode == RC_OPCODE_MUL) {
+ var_list = rc_get_variables(c);
+ peephole_mul_omod(c, cur, var_list);
+ }
+ }
}
rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
rgb->Alpha.Saturate = alpha->Alpha.Saturate;
+ rgb->Alpha.Omod = alpha->Alpha.Omod;
/* Merge ALU result writing */
if (alpha->WriteALUResult) {
pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
+ pair_inst->Alpha.Omod = pair_inst->RGB.Omod;
memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg,
sizeof(pair_inst->Alpha.Arg));
/* Move the swizzles into the first chan */
}
}
+ if (needrgb) {
+ pair->RGB.Omod = inst->Omod;
+ }
+ if (needalpha) {
+ pair->Alpha.Omod = inst->Omod;
+ }
+
if (inst->WriteALUResult) {
pair->WriteALUResult = inst->WriteALUResult;
pair->ALUResultCompare = inst->ALUResultCompare;
/** This holds information about the presubtract operation used by
* this instruction. */
struct rc_presub_instruction PreSub;
+
+ rc_omod_op Omod;
};
typedef enum {
RC_PRESUB_INV
} rc_presubtract_op;
+typedef enum {
+ RC_OMOD_MUL_1,
+ RC_OMOD_MUL_2,
+ RC_OMOD_MUL_4,
+ RC_OMOD_MUL_8,
+ RC_OMOD_DIV_2,
+ RC_OMOD_DIV_4,
+ RC_OMOD_DIV_8,
+ RC_OMOD_DISABLE
+} rc_omod_op;
+
static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){
switch(op){
case RC_PRESUB_BIAS:
unsigned int OutputWriteMask:3;
unsigned int DepthWriteMask:1;
unsigned int Saturate:1;
+ unsigned int Omod:3;
struct rc_pair_instruction_source Src[4];
struct rc_pair_instruction_arg Arg[3];
}
}
+static void print_omod_op(FILE * f, rc_omod_op op)
+{
+ const char * omod_str;
+
+ switch(op) {
+ case RC_OMOD_MUL_1:
+ case RC_OMOD_DISABLE:
+ return;
+ case RC_OMOD_MUL_2:
+ omod_str = "* 2";
+ break;
+ case RC_OMOD_MUL_4:
+ omod_str = "* 4";
+ break;
+ case RC_OMOD_MUL_8:
+ omod_str = "* 8";
+ break;
+ case RC_OMOD_DIV_2:
+ omod_str = "/ 2";
+ break;
+ case RC_OMOD_DIV_4:
+ omod_str = "/ 4";
+ break;
+ case RC_OMOD_DIV_8:
+ omod_str = "/ 8";
+ break;
+ default:
+ return;
+ }
+ fprintf(f, " %s", omod_str);
+}
+
static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs)
{
if (func == RC_COMPARE_FUNC_NEVER) {
if (opcode->HasDstReg) {
fprintf(f, " ");
rc_print_dst_register(f, inst->U.I.DstReg);
+ print_omod_op(f, inst->U.I.Omod);
if (opcode->NumSrcRegs)
fprintf(f, ",");
}
if (inst->WriteALUResult == RC_ALURESULT_X)
fprintf(f, " aluresult");
+ print_omod_op(f, inst->RGB.Omod);
+
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
const char* abs = inst->RGB.Arg[arg].Abs ? "|" : "";
const char* neg = inst->RGB.Arg[arg].Negate ? "-" : "";
if (inst->WriteALUResult == RC_ALURESULT_W)
fprintf(f, " aluresult");
+ print_omod_op(f, inst->Alpha.Omod);
+
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : "";
const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : "";
return writer_list;
}
+struct rc_list * rc_variable_list_get_writers_one_reader(
+ struct rc_list * var_list,
+ unsigned int src_type,
+ void * src)
+{
+ struct rc_list * writer_list =
+ rc_variable_list_get_writers(var_list, src_type, src);
+ struct rc_list * reader_list =
+ rc_variable_readers_union(writer_list->Item);
+ if (rc_list_count(reader_list) > 1) {
+ return NULL;
+ } else {
+ return writer_list;
+ }
+}
+
void rc_variable_print(struct rc_variable * var)
{
unsigned int i;
unsigned int src_type,
void * src);
+struct rc_list * rc_variable_list_get_writers_one_reader(
+ struct rc_list * var_list,
+ unsigned int src_type,
+ void * src);
+
void rc_variable_print(struct rc_variable * var);
#endif /* RADEON_VARIABLE_H */
compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
compiler.Base.has_half_swizzles = TRUE;
compiler.Base.has_presub = TRUE;
+ compiler.Base.has_omod = TRUE;
compiler.Base.max_temp_regs =
compiler.Base.is_r500 ? 128 : (compiler.Base.is_r400 ? 64 : 32);
compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32;
# define R300_ALU_OUTC_FRC (9 << 23)
# define R300_ALU_OUTC_REPL_ALPHA (10 << 23)
-# define R300_ALU_OUTC_MOD_NOP (0 << 27)
-# define R300_ALU_OUTC_MOD_MUL2 (1 << 27)
-# define R300_ALU_OUTC_MOD_MUL4 (2 << 27)
-# define R300_ALU_OUTC_MOD_MUL8 (3 << 27)
-# define R300_ALU_OUTC_MOD_DIV2 (4 << 27)
-# define R300_ALU_OUTC_MOD_DIV4 (5 << 27)
-# define R300_ALU_OUTC_MOD_DIV8 (6 << 27)
+# define R300_ALU_OUTC_MOD_SHIFT 27
+# define R300_ALU_OUTC_MOD_NOP (0 << R300_ALU_OUTC_MOD_SHIFT)
+# define R300_ALU_OUTC_MOD_MUL2 (1 << R300_ALU_OUTC_MOD_SHIFT)
+# define R300_ALU_OUTC_MOD_MUL4 (2 << R300_ALU_OUTC_MOD_SHIFT)
+# define R300_ALU_OUTC_MOD_MUL8 (3 << R300_ALU_OUTC_MOD_SHIFT)
+# define R300_ALU_OUTC_MOD_DIV2 (4 << R300_ALU_OUTC_MOD_SHIFT)
+# define R300_ALU_OUTC_MOD_DIV4 (5 << R300_ALU_OUTC_MOD_SHIFT)
+# define R300_ALU_OUTC_MOD_DIV8 (6 << R300_ALU_OUTC_MOD_SHIFT)
# define R300_ALU_OUTC_CLAMP (1 << 30)
# define R300_ALU_INSERT_NOP (1 << 31)
# define R500_ALPHA_MOD_B_NEG (1 << 24)
# define R500_ALPHA_MOD_B_ABS (2 << 24)
# define R500_ALPHA_MOD_B_NAB (3 << 24)
-# define R500_ALPHA_OMOD_IDENTITY (0 << 26)
-# define R500_ALPHA_OMOD_MUL_2 (1 << 26)
-# define R500_ALPHA_OMOD_MUL_4 (2 << 26)
-# define R500_ALPHA_OMOD_MUL_8 (3 << 26)
-# define R500_ALPHA_OMOD_DIV_2 (4 << 26)
-# define R500_ALPHA_OMOD_DIV_4 (5 << 26)
-# define R500_ALPHA_OMOD_DIV_8 (6 << 26)
-# define R500_ALPHA_OMOD_DISABLE (7 << 26)
+# define R500_ALPHA_OMOD_SHIFT 26
+# define R500_ALPHA_OMOD_IDENTITY (0 << R500_ALPHA_OMOD_SHIFT)
+# define R500_ALPHA_OMOD_MUL_2 (1 << R500_ALPHA_OMOD_SHIFT)
+# define R500_ALPHA_OMOD_MUL_4 (2 << R500_ALPHA_OMOD_SHIFT)
+# define R500_ALPHA_OMOD_MUL_8 (3 << R500_ALPHA_OMOD_SHIFT)
+# define R500_ALPHA_OMOD_DIV_2 (4 << R500_ALPHA_OMOD_SHIFT)
+# define R500_ALPHA_OMOD_DIV_4 (5 << R500_ALPHA_OMOD_SHIFT)
+# define R500_ALPHA_OMOD_DIV_8 (6 << R500_ALPHA_OMOD_SHIFT)
+# define R500_ALPHA_OMOD_DISABLE (7 << R500_ALPHA_OMOD_SHIFT)
# define R500_ALPHA_TARGET(x) ((x) << 29)
# define R500_ALPHA_W_OMASK (1 << 31)
#define R500_US_ALU_ALPHA_ADDR_0 0x9800
# define R500_ALU_RGB_MOD_B_NEG (1 << 24)
# define R500_ALU_RGB_MOD_B_ABS (2 << 24)
# define R500_ALU_RGB_MOD_B_NAB (3 << 24)
-# define R500_ALU_RGB_OMOD_IDENTITY (0 << 26)
-# define R500_ALU_RGB_OMOD_MUL_2 (1 << 26)
-# define R500_ALU_RGB_OMOD_MUL_4 (2 << 26)
-# define R500_ALU_RGB_OMOD_MUL_8 (3 << 26)
-# define R500_ALU_RGB_OMOD_DIV_2 (4 << 26)
-# define R500_ALU_RGB_OMOD_DIV_4 (5 << 26)
-# define R500_ALU_RGB_OMOD_DIV_8 (6 << 26)
-# define R500_ALU_RGB_OMOD_DISABLE (7 << 26)
+# define R500_ALU_RGB_OMOD_SHIFT 26
+# define R500_ALU_RGB_OMOD_IDENTITY (0 << R500_ALU_RGB_OMOD_SHIFT)
+# define R500_ALU_RGB_OMOD_MUL_2 (1 << R500_ALU_RGB_OMOD_SHIFT)
+# define R500_ALU_RGB_OMOD_MUL_4 (2 << R500_ALU_RGB_OMOD_SHIFT)
+# define R500_ALU_RGB_OMOD_MUL_8 (3 << R500_ALU_RGB_OMOD_SHIFT)
+# define R500_ALU_RGB_OMOD_DIV_2 (4 << R500_ALU_RGB_OMOD_SHIFT)
+# define R500_ALU_RGB_OMOD_DIV_4 (5 << R500_ALU_RGB_OMOD_SHIFT)
+# define R500_ALU_RGB_OMOD_DIV_8 (6 << R500_ALU_RGB_OMOD_SHIFT)
+# define R500_ALU_RGB_OMOD_DISABLE (7 << R500_ALU_RGB_OMOD_SHIFT)
# define R500_ALU_RGB_TARGET(x) ((x) << 29)
# define R500_ALU_RGB_WMASK (1 << 31)
#define R500_US_ALU_RGB_ADDR_0 0x9000
compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
compiler.Base.has_half_swizzles = FALSE;
compiler.Base.has_presub = FALSE;
+ compiler.Base.has_omod = FALSE;
compiler.Base.max_temp_regs = 32;
compiler.Base.max_constants = 256;
compiler.Base.max_alu_insts = r300->screen->caps.is_r500 ? 1024 : 256;