From 4e861ac4a1f1bdbd28f66e3f15eb45aa45a47bad Mon Sep 17 00:00:00 2001 From: =?utf8?q?St=C3=A9phane=20Marchesin?= Date: Wed, 4 Sep 2013 12:03:10 -0700 Subject: [PATCH] i915g: Add more optimizations This patch adds liveness analysis to i915g and a couple optimizations which benefit from it. One interesting optimization turns (fake) indirect texture accesses into direct texture accesses (the i915 supports a maximum of 4 indirect texture accesses). Among other things this fixes a bunch of piglit tests. --- src/gallium/drivers/i915/i915_fpc.h | 4 +- src/gallium/drivers/i915/i915_fpc_optimize.c | 408 +++++++++++++++--- src/gallium/drivers/i915/i915_fpc_translate.c | 17 +- 3 files changed, 371 insertions(+), 58 deletions(-) diff --git a/src/gallium/drivers/i915/i915_fpc.h b/src/gallium/drivers/i915/i915_fpc.h index e915822f4a9..ff740d41617 100644 --- a/src/gallium/drivers/i915/i915_fpc.h +++ b/src/gallium/drivers/i915/i915_fpc.h @@ -322,6 +322,8 @@ struct i915_token_list extern struct i915_token_list* i915_optimize(const struct tgsi_token *tokens); -extern void i915_optimize_free(struct i915_token_list* tokens); +extern void i915_optimize_free(struct i915_token_list *tokens); + +extern uint i915_num_coords(uint tex); #endif diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c index ff27d5066b6..e0134a7c4ee 100644 --- a/src/gallium/drivers/i915/i915_fpc_optimize.c +++ b/src/gallium/drivers/i915/i915_fpc_optimize.c @@ -35,8 +35,15 @@ #include "util/u_string.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_exec.h" -static boolean same_src_dst_reg(struct i915_full_src_register* s1, struct i915_full_dst_register* d1) +struct i915_optimize_context +{ + int first_write[TGSI_EXEC_NUM_TEMPS]; + int last_read[TGSI_EXEC_NUM_TEMPS]; +}; + +static boolean same_src_dst_reg(struct i915_full_src_register *s1, struct i915_full_dst_register *d1) { return (s1->Register.File == d1->Register.File && s1->Register.Indirect == d1->Register.Indirect && @@ -44,7 +51,7 @@ static boolean same_src_dst_reg(struct i915_full_src_register* s1, struct i915_f s1->Register.Index == d1->Register.Index); } -static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2) +static boolean same_dst_reg(struct i915_full_dst_register *d1, struct i915_full_dst_register *d2) { return (d1->Register.File == d2->Register.File && d1->Register.Indirect == d2->Register.Indirect && @@ -52,7 +59,7 @@ static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_ d1->Register.Index == d2->Register.Index); } -static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_src_register* d2) +static boolean same_src_reg(struct i915_full_src_register *d1, struct i915_full_src_register *d2) { return (d1->Register.File == d2->Register.File && d1->Register.Indirect == d2->Register.Indirect && @@ -62,16 +69,99 @@ static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_ d1->Register.Negate == d2->Register.Negate); } -static boolean has_destination(unsigned opcode) +const static struct { + boolean is_texture; + boolean commutes; + unsigned neutral_element; + unsigned num_dst; + unsigned num_src; +} op_table [TGSI_OPCODE_LAST] = { + [ TGSI_OPCODE_ABS ] = { false, false, 0, 1, 1 }, + [ TGSI_OPCODE_ADD ] = { false, true, TGSI_SWIZZLE_ZERO, 1, 2 }, + [ TGSI_OPCODE_CEIL ] = { false, false, 0, 1, 1 }, + [ TGSI_OPCODE_CMP ] = { false, false, 0, 1, 2 }, + [ TGSI_OPCODE_COS ] = { false, false, 0, 1, 1 }, + [ TGSI_OPCODE_DDX ] = { false, false, 0, 1, 0 }, + [ TGSI_OPCODE_DDY ] = { false, false, 0, 1, 0 }, + [ TGSI_OPCODE_DP2 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 }, + [ TGSI_OPCODE_DP3 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 }, + [ TGSI_OPCODE_DP4 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 }, + [ TGSI_OPCODE_DPH ] = { false, false, 0, 1, 2 }, + [ TGSI_OPCODE_DST ] = { false, false, 0, 1, 2 }, + [ TGSI_OPCODE_END ] = { false, false, 0, 0, 0 }, + [ TGSI_OPCODE_EX2 ] = { false, false, 0, 1, 1 }, + [ TGSI_OPCODE_FLR ] = { false, false, 0, 1, 1 }, + [ TGSI_OPCODE_FRC ] = { false, false, 0, 1, 1 }, + [ TGSI_OPCODE_KILL_IF ] = { false, false, 0, 0, 1 }, + [ TGSI_OPCODE_KILL ] = { false, false, 0, 0, 0 }, + [ TGSI_OPCODE_LG2 ] = { false, false, 0, 1, 1 }, + [ TGSI_OPCODE_LIT ] = { false, false, 0, 1, 1 }, + [ TGSI_OPCODE_LRP ] = { false, false, 0, 1, 3 }, + [ TGSI_OPCODE_MAX ] = { false, false, 0, 1, 2 }, + [ TGSI_OPCODE_MAD ] = { false, false, 0, 1, 3 }, + [ TGSI_OPCODE_MIN ] = { false, false, 0, 1, 2 }, + [ TGSI_OPCODE_MOV ] = { false, false, 0, 1, 1 }, + [ TGSI_OPCODE_MUL ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 }, + [ TGSI_OPCODE_NOP ] = { false, false, 0, 0, 0 }, + [ TGSI_OPCODE_POW ] = { false, false, 0, 1, 2 }, + [ TGSI_OPCODE_RCP ] = { false, false, 0, 1, 1 }, + [ TGSI_OPCODE_RET ] = { false, false, 0, 0, 0 }, + [ TGSI_OPCODE_RSQ ] = { false, false, 0, 1, 1 }, + [ TGSI_OPCODE_SCS ] = { false, false, 0, 1, 1 }, + [ TGSI_OPCODE_SEQ ] = { false, false, 0, 1, 2 }, + [ TGSI_OPCODE_SGE ] = { false, false, 0, 1, 2 }, + [ TGSI_OPCODE_SGT ] = { false, false, 0, 1, 2 }, + [ TGSI_OPCODE_SIN ] = { false, false, 0, 1, 1 }, + [ TGSI_OPCODE_SLE ] = { false, false, 0, 1, 2 }, + [ TGSI_OPCODE_SLT ] = { false, false, 0, 1, 2 }, + [ TGSI_OPCODE_SNE ] = { false, false, 0, 1, 2 }, + [ TGSI_OPCODE_SSG ] = { false, false, 0, 1, 1 }, + [ TGSI_OPCODE_SUB ] = { false, false, 0, 1, 2 }, + [ TGSI_OPCODE_TEX ] = { true, false, 0, 1, 2 }, + [ TGSI_OPCODE_TRUNC ] = { false, false, 0, 1, 1 }, + [ TGSI_OPCODE_TXB ] = { true, false, 0, 1, 2 }, + [ TGSI_OPCODE_TXP ] = { true, false, 0, 1, 2 }, + [ TGSI_OPCODE_XPD ] = { false, false, 0, 1, 2 }, +}; + +static boolean op_has_dst(unsigned opcode) +{ + return (op_table[opcode].num_dst > 0); +} + +static int op_num_dst(unsigned opcode) +{ + return op_table[opcode].num_dst; +} + +static int op_num_src(unsigned opcode) +{ + return op_table[opcode].num_src; +} + +static boolean op_commutes(unsigned opcode) +{ + return op_table[opcode].commutes; +} + +static unsigned mask_for_unswizzled(int num_components) { - return (opcode != TGSI_OPCODE_NOP && - opcode != TGSI_OPCODE_KILL_IF && - opcode != TGSI_OPCODE_KILL && - opcode != TGSI_OPCODE_END && - opcode != TGSI_OPCODE_RET); + unsigned mask = 0; + switch(num_components) + { + case 4: + mask |= TGSI_WRITEMASK_W; + case 3: + mask |= TGSI_WRITEMASK_Z; + case 2: + mask |= TGSI_WRITEMASK_Y; + case 1: + mask |= TGSI_WRITEMASK_X; + } + return mask; } -static boolean is_unswizzled(struct i915_full_src_register* r, +static boolean is_unswizzled(struct i915_full_src_register *r, unsigned write_mask) { if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X) @@ -85,42 +175,26 @@ static boolean is_unswizzled(struct i915_full_src_register* r, return TRUE; } -static boolean op_commutes(unsigned opcode) +static boolean op_is_texture(unsigned opcode) { - switch(opcode) - { - case TGSI_OPCODE_ADD: - case TGSI_OPCODE_MUL: - case TGSI_OPCODE_DP2: - case TGSI_OPCODE_DP3: - case TGSI_OPCODE_DP4: - return TRUE; - } - return FALSE; + return op_table[opcode].is_texture; } static unsigned op_neutral_element(unsigned opcode) { - switch(opcode) - { - case TGSI_OPCODE_ADD: - return TGSI_SWIZZLE_ZERO; - case TGSI_OPCODE_MUL: - case TGSI_OPCODE_DP2: - case TGSI_OPCODE_DP3: - case TGSI_OPCODE_DP4: - return TGSI_SWIZZLE_ONE; + unsigned ne = op_table[opcode].neutral_element; + if (!ne) { + debug_printf("No neutral element for opcode %d\n",opcode); + ne = TGSI_SWIZZLE_ZERO; } - - debug_printf("Unknown opcode %d\n",opcode); - return TGSI_SWIZZLE_ZERO; + return ne; } /* * Sets the swizzle to the neutral element for the operation for the bits * of writemask which are set, swizzle to identity otherwise. */ -static void set_neutral_element_swizzle(struct i915_full_src_register* r, +static void set_neutral_element_swizzle(struct i915_full_src_register *r, unsigned write_mask, unsigned neutral) { @@ -145,7 +219,7 @@ static void set_neutral_element_swizzle(struct i915_full_src_register* r, r->Register.SwizzleW = TGSI_SWIZZLE_W; } -static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i) +static void copy_src_reg(struct i915_src_register *o, const struct tgsi_src_register *i) { o->File = i->File; o->Indirect = i->Indirect; @@ -159,7 +233,7 @@ static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_regi o->Negate = i->Negate; } -static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i) +static void copy_dst_reg(struct i915_dst_register *o, const struct tgsi_dst_register *i) { o->File = i->File; o->WriteMask = i->WriteMask; @@ -168,7 +242,7 @@ static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_regi o->Index = i->Index; } -static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i) +static void copy_instruction(struct i915_full_instruction *o, const struct tgsi_full_instruction *i) { memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction)); memcpy(&o->Texture, &i->Texture, sizeof(o->Texture)); @@ -180,7 +254,7 @@ static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_ copy_src_reg(&o->Src[2].Register, &i->Src[2].Register); } -static void copy_token(union i915_full_token* o, union tgsi_full_token* i) +static void copy_token(union i915_full_token *o, union tgsi_full_token *i) { if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) memcpy(o, i, sizeof(*o)); @@ -189,6 +263,223 @@ static void copy_token(union i915_full_token* o, union tgsi_full_token* i) } +static void liveness_mark_written(struct i915_optimize_context *ctx, + struct i915_full_dst_register *dst_reg, + int pos) +{ + int dst_reg_index; + if (dst_reg->Register.File == TGSI_FILE_TEMPORARY) { + dst_reg_index = dst_reg->Register.Index; + assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS); + /* dead -> live transition */ + if (ctx->first_write[dst_reg_index] != -1) + ctx->first_write[dst_reg_index] = pos; + } +} + +static void liveness_mark_read(struct i915_optimize_context *ctx, + struct i915_full_src_register *src_reg, + int pos) +{ + int src_reg_index; + if (src_reg->Register.File == TGSI_FILE_TEMPORARY) { + src_reg_index = src_reg->Register.Index; + assert(src_reg_index < TGSI_EXEC_NUM_TEMPS); + /* live -> dead transition */ + if (ctx->last_read[src_reg_index] != -1) + ctx->last_read[src_reg_index] = pos; + } +} + +static void liveness_analysis(struct i915_optimize_context *ctx, + struct i915_token_list *tokens) +{ + struct i915_full_dst_register *dst_reg; + struct i915_full_src_register *src_reg; + union i915_full_token *current; + unsigned opcode; + int num_dst, num_src; + int i = 0; + + for(i = 0; i < TGSI_EXEC_NUM_TEMPS; i++) + { + ctx->first_write[i] = -1; + ctx->last_read[i] = -1; + } + + for(i = 0; i < tokens->NumTokens; i++) + { + current = &tokens->Tokens[i]; + + if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) + continue; + + opcode = current->FullInstruction.Instruction.Opcode; + num_dst = op_num_dst(opcode); + + switch(num_dst) + { + case 1: + dst_reg = ¤t->FullInstruction.Dst[0]; + liveness_mark_written(ctx, dst_reg, i); + case 0: + break; + default: + debug_printf("Op %d has %d dst regs\n", opcode, num_dst); + break; + } + } + + for(i = tokens->NumTokens - 1; i >= 0; i--) + { + current = &tokens->Tokens[i]; + + if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) + continue; + + opcode = current->FullInstruction.Instruction.Opcode; + num_src = op_num_src(opcode); + + switch(num_src) + { + case 3: + src_reg = ¤t->FullInstruction.Src[2]; + liveness_mark_read(ctx, src_reg, i); + case 2: + src_reg = ¤t->FullInstruction.Src[1]; + liveness_mark_read(ctx, src_reg, i); + case 1: + src_reg = ¤t->FullInstruction.Src[0]; + liveness_mark_read(ctx, src_reg, i); + case 0: + break; + default: + debug_printf("Op %d has %d src regs\n", opcode, num_src); + break; + } + } +} + +static int unused_from(struct i915_optimize_context *ctx, struct i915_full_dst_register *dst_reg, int from) +{ + int dst_reg_index = dst_reg->Register.Index; + assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS); + return (from >= ctx->last_read[dst_reg_index]); +} + +/* Returns a mask with the components used for a texture access instruction */ +static unsigned i915_tex_mask(union i915_full_token *instr) +{ + unsigned mask; + + /* Get the number of coords */ + mask = mask_for_unswizzled(i915_num_coords(instr->FullInstruction.Texture.Texture)); + + /* Add the W component if projective */ + if (instr->FullInstruction.Instruction.Opcode == TGSI_OPCODE_TXP) + mask |= TGSI_WRITEMASK_W; + + return mask; +} + +static boolean target_is_texture2d(uint tex) +{ + switch (tex) { + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + return true; + default: + return false; + } +} + + +/* + * Optimize away useless indirect texture reads: + * MOV TEMP[0].xy, IN[0].xyyy + * TEX TEMP[1], TEMP[0], SAMP[0], 2D + * into: + * TEX TEMP[1], IN[0], SAMP[0], 2D + * + * note: this only seems to work on 2D/RECT textures, but not SHAADOW2D/1D/.. + */ +static void i915_fpc_optimize_mov_before_tex(struct i915_optimize_context *ctx, + struct i915_token_list *tokens, + int index) +{ + union i915_full_token *current = &tokens->Tokens[index - 1]; + union i915_full_token *next = &tokens->Tokens[index]; + + if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && + next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && + current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && + op_is_texture(next->FullInstruction.Instruction.Opcode) && + target_is_texture2d(next->FullInstruction.Texture.Texture) && + same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) && + is_unswizzled(¤t->FullInstruction.Src[0], i915_tex_mask(next)) && + unused_from(ctx, ¤t->FullInstruction.Dst[0], index)) + { + memcpy(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0], sizeof(struct i915_src_register)); + current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; + } +} + +/* + * Optimize away things like: + * MOV TEMP[0].xy, TEMP[1].xyyy (first write for TEMP[0]) + * MOV TEMP[0].w, TEMP[1].wwww (last write for TEMP[0]) + * into: + * NOP + * MOV OUT[0].xyw, TEMP[1].xyww + */ +static void i915_fpc_optimize_mov_after_mov(union i915_full_token *current, union i915_full_token *next) +{ + struct i915_full_src_register *src_reg1, *src_reg2; + struct i915_full_dst_register *dst_reg1, *dst_reg2; + unsigned swizzle_x, swizzle_y, swizzle_z, swizzle_w; + + if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && + next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && + current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && + next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && + current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && + same_dst_reg(&next->FullInstruction.Dst[0], ¤t->FullInstruction.Dst[0]) && + same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0]) && + !same_src_dst_reg(¤t->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) ) + { + src_reg1 = ¤t->FullInstruction.Src[0]; + dst_reg1 = ¤t->FullInstruction.Dst[0]; + src_reg2 = &next->FullInstruction.Src[0]; + dst_reg2 = &next->FullInstruction.Dst[0]; + + /* Start with swizzles from the first mov */ + swizzle_x = src_reg1->Register.SwizzleX; + swizzle_y = src_reg1->Register.SwizzleY; + swizzle_z = src_reg1->Register.SwizzleZ; + swizzle_w = src_reg1->Register.SwizzleW; + + /* Pile the second mov on top */ + if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_X) + swizzle_x = src_reg2->Register.SwizzleX; + if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Y) + swizzle_y = src_reg2->Register.SwizzleY; + if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Z) + swizzle_z = src_reg2->Register.SwizzleZ; + if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_W) + swizzle_w = src_reg2->Register.SwizzleW; + + dst_reg2->Register.WriteMask |= dst_reg1->Register.WriteMask; + src_reg2->Register.SwizzleX = swizzle_x; + src_reg2->Register.SwizzleY = swizzle_y; + src_reg2->Register.SwizzleZ = swizzle_z; + src_reg2->Register.SwizzleW = swizzle_w; + + current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; + + return; + } +} + /* * Optimize away things like: * MUL OUT[0].xyz, TEMP[1], TEMP[2] @@ -197,7 +488,7 @@ static void copy_token(union i915_full_token* o, union tgsi_full_token* i) * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2] * This is useful for optimizing texenv. */ -static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, union i915_full_token* next) +static void i915_fpc_optimize_mov_after_alu(union i915_full_token *current, union i915_full_token *next) { if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && @@ -254,13 +545,13 @@ static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, unio * into: * NOP */ -static boolean i915_fpc_useless_mov(union tgsi_full_token* tgsi_current) +static boolean i915_fpc_useless_mov(union tgsi_full_token *tgsi_current) { union i915_full_token current; copy_token(¤t , tgsi_current); if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && - has_destination(current.FullInstruction.Instruction.Opcode) && + op_has_dst(current.FullInstruction.Instruction.Opcode) && current.FullInstruction.Instruction.Saturate == TGSI_SAT_NONE && current.FullInstruction.Src[0].Register.Absolute == 0 && current.FullInstruction.Src[0].Register.Negate == 0 && @@ -279,16 +570,23 @@ static boolean i915_fpc_useless_mov(union tgsi_full_token* tgsi_current) * into: * *** OUT[0], TEMP[1], TEMP[2] */ -static void i915_fpc_optimize_useless_mov_after_inst(union i915_full_token* current, union i915_full_token* next) +static void i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context *ctx, + struct i915_token_list *tokens, + int index) { + union i915_full_token *current = &tokens->Tokens[index - 1]; + union i915_full_token *next = &tokens->Tokens[index]; + + // &out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && - has_destination(current->FullInstruction.Instruction.Opcode) && + op_has_dst(current->FullInstruction.Instruction.Opcode) && next->FullInstruction.Instruction.Saturate == TGSI_SAT_NONE && next->FullInstruction.Src[0].Register.Absolute == 0 && next->FullInstruction.Src[0].Register.Negate == 0 && - next->FullInstruction.Dst[0].Register.File == TGSI_FILE_OUTPUT && + unused_from(ctx, ¤t->FullInstruction.Dst[0], index) && + current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZW && is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) && current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask && same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) ) @@ -304,8 +602,11 @@ struct i915_token_list* i915_optimize(const struct tgsi_token *tokens) { struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list)); struct tgsi_parse_context parse; + struct i915_optimize_context *ctx; int i = 0; + ctx = malloc(sizeof(*ctx)); + out_tokens->NumTokens = 0; /* Count the tokens */ @@ -330,18 +631,27 @@ struct i915_token_list* i915_optimize(const struct tgsi_token *tokens) copy_token(&out_tokens->Tokens[i] , &parse.FullToken); - if (i > 0) { - i915_fpc_optimize_useless_mov_after_inst(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); - i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); - } i++; } tgsi_parse_free (&parse); + liveness_analysis(ctx, out_tokens); + + i = 1; + while( i < out_tokens->NumTokens) { + i915_fpc_optimize_useless_mov_after_inst(ctx, out_tokens, i); + i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); + i915_fpc_optimize_mov_after_mov(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); + i915_fpc_optimize_mov_before_tex(ctx, out_tokens, i); + i++; + } + + free(ctx); + return out_tokens; } -void i915_optimize_free(struct i915_token_list* tokens) +void i915_optimize_free(struct i915_token_list *tokens) { free(tokens->Tokens); free(tokens); diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index 765a1012c90..74f9943ed3e 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -31,6 +31,7 @@ #include "i915_reg.h" #include "i915_context.h" #include "i915_fpc.h" +#include "i915_debug_private.h" #include "pipe/p_shader_tokens.h" #include "util/u_math.h" @@ -179,7 +180,7 @@ static uint get_mapping(struct i915_fragment_shader* fs, int unit) static uint src_vector(struct i915_fp_compile *p, const struct i915_full_src_register *source, - struct i915_fragment_shader* fs) + struct i915_fragment_shader *fs) { uint index = source->Register.Index; uint src = 0, sem_name, sem_ind; @@ -381,8 +382,8 @@ translate_tex_src_target(struct i915_fp_compile *p, uint tex) /** * Return the number of coords needed to access a given TGSI_TEXTURE_* */ -static uint -texture_num_coords(struct i915_fp_compile *p, uint tex) +uint +i915_num_coords(uint tex) { switch (tex) { case TGSI_TEXTURE_SHADOW1D: @@ -400,7 +401,7 @@ texture_num_coords(struct i915_fp_compile *p, uint tex) return 3; default: - i915_program_error(p, "Num coords"); + debug_printf("Unknown texture target for num coords"); return 2; } } @@ -427,7 +428,7 @@ emit_tex(struct i915_fp_compile *p, sampler, coord, opcode, - texture_num_coords(p, texture) ); + i915_num_coords(texture) ); } @@ -440,7 +441,7 @@ static void emit_simple_arith(struct i915_fp_compile *p, const struct i915_full_instruction *inst, uint opcode, uint numArgs, - struct i915_fragment_shader* fs) + struct i915_fragment_shader *fs) { uint arg1, arg2, arg3; @@ -465,7 +466,7 @@ static void emit_simple_arith_swap2(struct i915_fp_compile *p, const struct i915_full_instruction *inst, uint opcode, uint numArgs, - struct i915_fragment_shader* fs) + struct i915_fragment_shader *fs) { struct i915_full_instruction inst2; @@ -1108,7 +1109,7 @@ i915_translate_instruction(struct i915_fp_compile *p, static void i915_translate_token(struct i915_fp_compile *p, - const union i915_full_token* token, + const union i915_full_token *token, struct i915_fragment_shader *fs) { struct i915_fragment_shader *ifs = p->shader; -- 2.30.2