#include "util/u_string.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_exec.h"
-static boolean same_src_dst_reg(struct i915_full_src_register* s1, struct i915_full_dst_register* d1)
+struct i915_optimize_context
+{
+ int first_write[TGSI_EXEC_NUM_TEMPS];
+ int last_read[TGSI_EXEC_NUM_TEMPS];
+};
+
+static boolean same_src_dst_reg(struct i915_full_src_register *s1, struct i915_full_dst_register *d1)
{
return (s1->Register.File == d1->Register.File &&
s1->Register.Indirect == d1->Register.Indirect &&
s1->Register.Index == d1->Register.Index);
}
-static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2)
+static boolean same_dst_reg(struct i915_full_dst_register *d1, struct i915_full_dst_register *d2)
{
return (d1->Register.File == d2->Register.File &&
d1->Register.Indirect == d2->Register.Indirect &&
d1->Register.Index == d2->Register.Index);
}
-static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_src_register* d2)
+static boolean same_src_reg(struct i915_full_src_register *d1, struct i915_full_src_register *d2)
{
return (d1->Register.File == d2->Register.File &&
d1->Register.Indirect == d2->Register.Indirect &&
d1->Register.Negate == d2->Register.Negate);
}
-static boolean has_destination(unsigned opcode)
+const static struct {
+ boolean is_texture;
+ boolean commutes;
+ unsigned neutral_element;
+ unsigned num_dst;
+ unsigned num_src;
+} op_table [TGSI_OPCODE_LAST] = {
+ [ TGSI_OPCODE_ABS ] = { false, false, 0, 1, 1 },
+ [ TGSI_OPCODE_ADD ] = { false, true, TGSI_SWIZZLE_ZERO, 1, 2 },
+ [ TGSI_OPCODE_CEIL ] = { false, false, 0, 1, 1 },
+ [ TGSI_OPCODE_CMP ] = { false, false, 0, 1, 2 },
+ [ TGSI_OPCODE_COS ] = { false, false, 0, 1, 1 },
+ [ TGSI_OPCODE_DDX ] = { false, false, 0, 1, 0 },
+ [ TGSI_OPCODE_DDY ] = { false, false, 0, 1, 0 },
+ [ TGSI_OPCODE_DP2 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
+ [ TGSI_OPCODE_DP3 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
+ [ TGSI_OPCODE_DP4 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
+ [ TGSI_OPCODE_DPH ] = { false, false, 0, 1, 2 },
+ [ TGSI_OPCODE_DST ] = { false, false, 0, 1, 2 },
+ [ TGSI_OPCODE_END ] = { false, false, 0, 0, 0 },
+ [ TGSI_OPCODE_EX2 ] = { false, false, 0, 1, 1 },
+ [ TGSI_OPCODE_FLR ] = { false, false, 0, 1, 1 },
+ [ TGSI_OPCODE_FRC ] = { false, false, 0, 1, 1 },
+ [ TGSI_OPCODE_KILL_IF ] = { false, false, 0, 0, 1 },
+ [ TGSI_OPCODE_KILL ] = { false, false, 0, 0, 0 },
+ [ TGSI_OPCODE_LG2 ] = { false, false, 0, 1, 1 },
+ [ TGSI_OPCODE_LIT ] = { false, false, 0, 1, 1 },
+ [ TGSI_OPCODE_LRP ] = { false, false, 0, 1, 3 },
+ [ TGSI_OPCODE_MAX ] = { false, false, 0, 1, 2 },
+ [ TGSI_OPCODE_MAD ] = { false, false, 0, 1, 3 },
+ [ TGSI_OPCODE_MIN ] = { false, false, 0, 1, 2 },
+ [ TGSI_OPCODE_MOV ] = { false, false, 0, 1, 1 },
+ [ TGSI_OPCODE_MUL ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
+ [ TGSI_OPCODE_NOP ] = { false, false, 0, 0, 0 },
+ [ TGSI_OPCODE_POW ] = { false, false, 0, 1, 2 },
+ [ TGSI_OPCODE_RCP ] = { false, false, 0, 1, 1 },
+ [ TGSI_OPCODE_RET ] = { false, false, 0, 0, 0 },
+ [ TGSI_OPCODE_RSQ ] = { false, false, 0, 1, 1 },
+ [ TGSI_OPCODE_SCS ] = { false, false, 0, 1, 1 },
+ [ TGSI_OPCODE_SEQ ] = { false, false, 0, 1, 2 },
+ [ TGSI_OPCODE_SGE ] = { false, false, 0, 1, 2 },
+ [ TGSI_OPCODE_SGT ] = { false, false, 0, 1, 2 },
+ [ TGSI_OPCODE_SIN ] = { false, false, 0, 1, 1 },
+ [ TGSI_OPCODE_SLE ] = { false, false, 0, 1, 2 },
+ [ TGSI_OPCODE_SLT ] = { false, false, 0, 1, 2 },
+ [ TGSI_OPCODE_SNE ] = { false, false, 0, 1, 2 },
+ [ TGSI_OPCODE_SSG ] = { false, false, 0, 1, 1 },
+ [ TGSI_OPCODE_SUB ] = { false, false, 0, 1, 2 },
+ [ TGSI_OPCODE_TEX ] = { true, false, 0, 1, 2 },
+ [ TGSI_OPCODE_TRUNC ] = { false, false, 0, 1, 1 },
+ [ TGSI_OPCODE_TXB ] = { true, false, 0, 1, 2 },
+ [ TGSI_OPCODE_TXP ] = { true, false, 0, 1, 2 },
+ [ TGSI_OPCODE_XPD ] = { false, false, 0, 1, 2 },
+};
+
+static boolean op_has_dst(unsigned opcode)
+{
+ return (op_table[opcode].num_dst > 0);
+}
+
+static int op_num_dst(unsigned opcode)
+{
+ return op_table[opcode].num_dst;
+}
+
+static int op_num_src(unsigned opcode)
+{
+ return op_table[opcode].num_src;
+}
+
+static boolean op_commutes(unsigned opcode)
+{
+ return op_table[opcode].commutes;
+}
+
+static unsigned mask_for_unswizzled(int num_components)
{
- return (opcode != TGSI_OPCODE_NOP &&
- opcode != TGSI_OPCODE_KILL_IF &&
- opcode != TGSI_OPCODE_KILL &&
- opcode != TGSI_OPCODE_END &&
- opcode != TGSI_OPCODE_RET);
+ unsigned mask = 0;
+ switch(num_components)
+ {
+ case 4:
+ mask |= TGSI_WRITEMASK_W;
+ case 3:
+ mask |= TGSI_WRITEMASK_Z;
+ case 2:
+ mask |= TGSI_WRITEMASK_Y;
+ case 1:
+ mask |= TGSI_WRITEMASK_X;
+ }
+ return mask;
}
-static boolean is_unswizzled(struct i915_full_src_register* r,
+static boolean is_unswizzled(struct i915_full_src_register *r,
unsigned write_mask)
{
if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)
return TRUE;
}
-static boolean op_commutes(unsigned opcode)
+static boolean op_is_texture(unsigned opcode)
{
- switch(opcode)
- {
- case TGSI_OPCODE_ADD:
- case TGSI_OPCODE_MUL:
- case TGSI_OPCODE_DP2:
- case TGSI_OPCODE_DP3:
- case TGSI_OPCODE_DP4:
- return TRUE;
- }
- return FALSE;
+ return op_table[opcode].is_texture;
}
static unsigned op_neutral_element(unsigned opcode)
{
- switch(opcode)
- {
- case TGSI_OPCODE_ADD:
- return TGSI_SWIZZLE_ZERO;
- case TGSI_OPCODE_MUL:
- case TGSI_OPCODE_DP2:
- case TGSI_OPCODE_DP3:
- case TGSI_OPCODE_DP4:
- return TGSI_SWIZZLE_ONE;
+ unsigned ne = op_table[opcode].neutral_element;
+ if (!ne) {
+ debug_printf("No neutral element for opcode %d\n",opcode);
+ ne = TGSI_SWIZZLE_ZERO;
}
-
- debug_printf("Unknown opcode %d\n",opcode);
- return TGSI_SWIZZLE_ZERO;
+ return ne;
}
/*
* Sets the swizzle to the neutral element for the operation for the bits
* of writemask which are set, swizzle to identity otherwise.
*/
-static void set_neutral_element_swizzle(struct i915_full_src_register* r,
+static void set_neutral_element_swizzle(struct i915_full_src_register *r,
unsigned write_mask,
unsigned neutral)
{
r->Register.SwizzleW = TGSI_SWIZZLE_W;
}
-static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i)
+static void copy_src_reg(struct i915_src_register *o, const struct tgsi_src_register *i)
{
o->File = i->File;
o->Indirect = i->Indirect;
o->Negate = i->Negate;
}
-static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i)
+static void copy_dst_reg(struct i915_dst_register *o, const struct tgsi_dst_register *i)
{
o->File = i->File;
o->WriteMask = i->WriteMask;
o->Index = i->Index;
}
-static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i)
+static void copy_instruction(struct i915_full_instruction *o, const struct tgsi_full_instruction *i)
{
memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
}
-static void copy_token(union i915_full_token* o, union tgsi_full_token* i)
+static void copy_token(union i915_full_token *o, union tgsi_full_token *i)
{
if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
memcpy(o, i, sizeof(*o));
}
+static void liveness_mark_written(struct i915_optimize_context *ctx,
+ struct i915_full_dst_register *dst_reg,
+ int pos)
+{
+ int dst_reg_index;
+ if (dst_reg->Register.File == TGSI_FILE_TEMPORARY) {
+ dst_reg_index = dst_reg->Register.Index;
+ assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
+ /* dead -> live transition */
+ if (ctx->first_write[dst_reg_index] != -1)
+ ctx->first_write[dst_reg_index] = pos;
+ }
+}
+
+static void liveness_mark_read(struct i915_optimize_context *ctx,
+ struct i915_full_src_register *src_reg,
+ int pos)
+{
+ int src_reg_index;
+ if (src_reg->Register.File == TGSI_FILE_TEMPORARY) {
+ src_reg_index = src_reg->Register.Index;
+ assert(src_reg_index < TGSI_EXEC_NUM_TEMPS);
+ /* live -> dead transition */
+ if (ctx->last_read[src_reg_index] != -1)
+ ctx->last_read[src_reg_index] = pos;
+ }
+}
+
+static void liveness_analysis(struct i915_optimize_context *ctx,
+ struct i915_token_list *tokens)
+{
+ struct i915_full_dst_register *dst_reg;
+ struct i915_full_src_register *src_reg;
+ union i915_full_token *current;
+ unsigned opcode;
+ int num_dst, num_src;
+ int i = 0;
+
+ for(i = 0; i < TGSI_EXEC_NUM_TEMPS; i++)
+ {
+ ctx->first_write[i] = -1;
+ ctx->last_read[i] = -1;
+ }
+
+ for(i = 0; i < tokens->NumTokens; i++)
+ {
+ current = &tokens->Tokens[i];
+
+ if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
+ continue;
+
+ opcode = current->FullInstruction.Instruction.Opcode;
+ num_dst = op_num_dst(opcode);
+
+ switch(num_dst)
+ {
+ case 1:
+ dst_reg = ¤t->FullInstruction.Dst[0];
+ liveness_mark_written(ctx, dst_reg, i);
+ case 0:
+ break;
+ default:
+ debug_printf("Op %d has %d dst regs\n", opcode, num_dst);
+ break;
+ }
+ }
+
+ for(i = tokens->NumTokens - 1; i >= 0; i--)
+ {
+ current = &tokens->Tokens[i];
+
+ if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
+ continue;
+
+ opcode = current->FullInstruction.Instruction.Opcode;
+ num_src = op_num_src(opcode);
+
+ switch(num_src)
+ {
+ case 3:
+ src_reg = ¤t->FullInstruction.Src[2];
+ liveness_mark_read(ctx, src_reg, i);
+ case 2:
+ src_reg = ¤t->FullInstruction.Src[1];
+ liveness_mark_read(ctx, src_reg, i);
+ case 1:
+ src_reg = ¤t->FullInstruction.Src[0];
+ liveness_mark_read(ctx, src_reg, i);
+ case 0:
+ break;
+ default:
+ debug_printf("Op %d has %d src regs\n", opcode, num_src);
+ break;
+ }
+ }
+}
+
+static int unused_from(struct i915_optimize_context *ctx, struct i915_full_dst_register *dst_reg, int from)
+{
+ int dst_reg_index = dst_reg->Register.Index;
+ assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
+ return (from >= ctx->last_read[dst_reg_index]);
+}
+
+/* Returns a mask with the components used for a texture access instruction */
+static unsigned i915_tex_mask(union i915_full_token *instr)
+{
+ unsigned mask;
+
+ /* Get the number of coords */
+ mask = mask_for_unswizzled(i915_num_coords(instr->FullInstruction.Texture.Texture));
+
+ /* Add the W component if projective */
+ if (instr->FullInstruction.Instruction.Opcode == TGSI_OPCODE_TXP)
+ mask |= TGSI_WRITEMASK_W;
+
+ return mask;
+}
+
+static boolean target_is_texture2d(uint tex)
+{
+ switch (tex) {
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+
+/*
+ * Optimize away useless indirect texture reads:
+ * MOV TEMP[0].xy, IN[0].xyyy
+ * TEX TEMP[1], TEMP[0], SAMP[0], 2D
+ * into:
+ * TEX TEMP[1], IN[0], SAMP[0], 2D
+ *
+ * note: this only seems to work on 2D/RECT textures, but not SHAADOW2D/1D/..
+ */
+static void i915_fpc_optimize_mov_before_tex(struct i915_optimize_context *ctx,
+ struct i915_token_list *tokens,
+ int index)
+{
+ union i915_full_token *current = &tokens->Tokens[index - 1];
+ union i915_full_token *next = &tokens->Tokens[index];
+
+ if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
+ next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
+ current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
+ op_is_texture(next->FullInstruction.Instruction.Opcode) &&
+ target_is_texture2d(next->FullInstruction.Texture.Texture) &&
+ same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) &&
+ is_unswizzled(¤t->FullInstruction.Src[0], i915_tex_mask(next)) &&
+ unused_from(ctx, ¤t->FullInstruction.Dst[0], index))
+ {
+ memcpy(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0], sizeof(struct i915_src_register));
+ current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
+ }
+}
+
+/*
+ * Optimize away things like:
+ * MOV TEMP[0].xy, TEMP[1].xyyy (first write for TEMP[0])
+ * MOV TEMP[0].w, TEMP[1].wwww (last write for TEMP[0])
+ * into:
+ * NOP
+ * MOV OUT[0].xyw, TEMP[1].xyww
+ */
+static void i915_fpc_optimize_mov_after_mov(union i915_full_token *current, union i915_full_token *next)
+{
+ struct i915_full_src_register *src_reg1, *src_reg2;
+ struct i915_full_dst_register *dst_reg1, *dst_reg2;
+ unsigned swizzle_x, swizzle_y, swizzle_z, swizzle_w;
+
+ if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
+ next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
+ current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
+ next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
+ current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
+ same_dst_reg(&next->FullInstruction.Dst[0], ¤t->FullInstruction.Dst[0]) &&
+ same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0]) &&
+ !same_src_dst_reg(¤t->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) )
+ {
+ src_reg1 = ¤t->FullInstruction.Src[0];
+ dst_reg1 = ¤t->FullInstruction.Dst[0];
+ src_reg2 = &next->FullInstruction.Src[0];
+ dst_reg2 = &next->FullInstruction.Dst[0];
+
+ /* Start with swizzles from the first mov */
+ swizzle_x = src_reg1->Register.SwizzleX;
+ swizzle_y = src_reg1->Register.SwizzleY;
+ swizzle_z = src_reg1->Register.SwizzleZ;
+ swizzle_w = src_reg1->Register.SwizzleW;
+
+ /* Pile the second mov on top */
+ if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_X)
+ swizzle_x = src_reg2->Register.SwizzleX;
+ if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Y)
+ swizzle_y = src_reg2->Register.SwizzleY;
+ if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Z)
+ swizzle_z = src_reg2->Register.SwizzleZ;
+ if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_W)
+ swizzle_w = src_reg2->Register.SwizzleW;
+
+ dst_reg2->Register.WriteMask |= dst_reg1->Register.WriteMask;
+ src_reg2->Register.SwizzleX = swizzle_x;
+ src_reg2->Register.SwizzleY = swizzle_y;
+ src_reg2->Register.SwizzleZ = swizzle_z;
+ src_reg2->Register.SwizzleW = swizzle_w;
+
+ current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
+
+ return;
+ }
+}
+
/*
* Optimize away things like:
* MUL OUT[0].xyz, TEMP[1], TEMP[2]
* MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
* This is useful for optimizing texenv.
*/
-static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, union i915_full_token* next)
+static void i915_fpc_optimize_mov_after_alu(union i915_full_token *current, union i915_full_token *next)
{
if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
* into:
* NOP
*/
-static boolean i915_fpc_useless_mov(union tgsi_full_token* tgsi_current)
+static boolean i915_fpc_useless_mov(union tgsi_full_token *tgsi_current)
{
union i915_full_token current;
copy_token(¤t , tgsi_current);
if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
- has_destination(current.FullInstruction.Instruction.Opcode) &&
+ op_has_dst(current.FullInstruction.Instruction.Opcode) &&
current.FullInstruction.Instruction.Saturate == TGSI_SAT_NONE &&
current.FullInstruction.Src[0].Register.Absolute == 0 &&
current.FullInstruction.Src[0].Register.Negate == 0 &&
* into:
* *** OUT[0], TEMP[1], TEMP[2]
*/
-static void i915_fpc_optimize_useless_mov_after_inst(union i915_full_token* current, union i915_full_token* next)
+static void i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context *ctx,
+ struct i915_token_list *tokens,
+ int index)
{
+ union i915_full_token *current = &tokens->Tokens[index - 1];
+ union i915_full_token *next = &tokens->Tokens[index];
+
+ // &out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
- has_destination(current->FullInstruction.Instruction.Opcode) &&
+ op_has_dst(current->FullInstruction.Instruction.Opcode) &&
next->FullInstruction.Instruction.Saturate == TGSI_SAT_NONE &&
next->FullInstruction.Src[0].Register.Absolute == 0 &&
next->FullInstruction.Src[0].Register.Negate == 0 &&
- next->FullInstruction.Dst[0].Register.File == TGSI_FILE_OUTPUT &&
+ unused_from(ctx, ¤t->FullInstruction.Dst[0], index) &&
+ current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZW &&
is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) &&
current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask &&
same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) )
{
struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
struct tgsi_parse_context parse;
+ struct i915_optimize_context *ctx;
int i = 0;
+ ctx = malloc(sizeof(*ctx));
+
out_tokens->NumTokens = 0;
/* Count the tokens */
copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
- if (i > 0) {
- i915_fpc_optimize_useless_mov_after_inst(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
- i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
- }
i++;
}
tgsi_parse_free (&parse);
+ liveness_analysis(ctx, out_tokens);
+
+ i = 1;
+ while( i < out_tokens->NumTokens) {
+ i915_fpc_optimize_useless_mov_after_inst(ctx, out_tokens, i);
+ i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
+ i915_fpc_optimize_mov_after_mov(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
+ i915_fpc_optimize_mov_before_tex(ctx, out_tokens, i);
+ i++;
+ }
+
+ free(ctx);
+
return out_tokens;
}
-void i915_optimize_free(struct i915_token_list* tokens)
+void i915_optimize_free(struct i915_token_list *tokens)
{
free(tokens->Tokens);
free(tokens);