From 2bc5e0e97ba7b6c32f6ff90cb90448173d74b89b Mon Sep 17 00:00:00 2001 From: =?utf8?q?St=C3=A9phane=20Marchesin?= Date: Mon, 4 Jul 2011 20:21:37 -0700 Subject: [PATCH] i915g: introduce the tiny shader optimizer. --- src/gallium/drivers/i915/Makefile | 1 + src/gallium/drivers/i915/SConscript | 1 + src/gallium/drivers/i915/i915_fpc.h | 88 ++++++++ src/gallium/drivers/i915/i915_fpc_emit.c | 1 - src/gallium/drivers/i915/i915_fpc_optimize.c | 182 ++++++++++++++++ src/gallium/drivers/i915/i915_fpc_translate.c | 195 +++++++++--------- 6 files changed, 372 insertions(+), 96 deletions(-) create mode 100644 src/gallium/drivers/i915/i915_fpc_optimize.c diff --git a/src/gallium/drivers/i915/Makefile b/src/gallium/drivers/i915/Makefile index 778124728bb..36197fbc93b 100644 --- a/src/gallium/drivers/i915/Makefile +++ b/src/gallium/drivers/i915/Makefile @@ -27,6 +27,7 @@ C_SOURCES = \ i915_resource_buffer.c \ i915_fpc_emit.c \ i915_fpc_translate.c \ + i915_fpc_optimize.c \ i915_surface.c include ../../Makefile.template diff --git a/src/gallium/drivers/i915/SConscript b/src/gallium/drivers/i915/SConscript index 98370601b7f..76f597001fe 100644 --- a/src/gallium/drivers/i915/SConscript +++ b/src/gallium/drivers/i915/SConscript @@ -14,6 +14,7 @@ i915 = env.ConvenienceLibrary( 'i915_flush.c', 'i915_fpc_emit.c', 'i915_fpc_translate.c', + 'i915_fpc_optimize.c', 'i915_prim_emit.c', 'i915_prim_vbuf.c', 'i915_query.c', diff --git a/src/gallium/drivers/i915/i915_fpc.h b/src/gallium/drivers/i915/i915_fpc.h index 509395cf1f5..b760bc461a1 100644 --- a/src/gallium/drivers/i915/i915_fpc.h +++ b/src/gallium/drivers/i915/i915_fpc.h @@ -33,7 +33,9 @@ #include "i915_context.h" #include "i915_reg.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" #define I915_PROGRAM_SIZE 192 @@ -207,4 +209,90 @@ extern void i915_program_error(struct i915_fp_compile *p, const char *msg, ...); +/*====================================================================== + * i915_fpc_optimize.c + */ + + +struct i915_src_register +{ + unsigned File : 4; /* TGSI_FILE_ */ + unsigned Indirect : 1; /* BOOL */ + unsigned Dimension : 1; /* BOOL */ + int Index : 16; /* SINT */ + unsigned SwizzleX : 3; /* TGSI_SWIZZLE_ */ + unsigned SwizzleY : 3; /* TGSI_SWIZZLE_ */ + unsigned SwizzleZ : 3; /* TGSI_SWIZZLE_ */ + unsigned SwizzleW : 3; /* TGSI_SWIZZLE_ */ + unsigned Absolute : 1; /* BOOL */ + unsigned Negate : 1; /* BOOL */ +}; + +/* Additional swizzle supported in i915 */ +#define TGSI_SWIZZLE_ZERO 4 +#define TGSI_SWIZZLE_ONE 5 + +struct i915_dst_register +{ + unsigned File : 4; /* TGSI_FILE_ */ + unsigned WriteMask : 4; /* TGSI_WRITEMASK_ */ + unsigned Indirect : 1; /* BOOL */ + unsigned Dimension : 1; /* BOOL */ + int Index : 16; /* SINT */ + unsigned Padding : 6; +}; + + +struct i915_full_dst_register +{ + struct i915_dst_register Register; +/* + struct tgsi_src_register Indirect; + struct tgsi_dimension Dimension; + struct tgsi_src_register DimIndirect; +*/ +}; + +struct i915_full_src_register +{ + struct i915_src_register Register; +/* + struct tgsi_src_register Indirect; + struct tgsi_dimension Dimension; + struct tgsi_src_register DimIndirect; +*/ +}; + +struct i915_full_instruction +{ + struct tgsi_instruction Instruction; +/* + struct tgsi_instruction_predicate Predicate; + struct tgsi_instruction_label Label; +*/ + struct tgsi_instruction_texture Texture; + struct i915_full_dst_register Dst[1]; + struct i915_full_src_register Src[3]; +}; + + +union i915_full_token +{ + struct tgsi_token Token; + struct tgsi_full_declaration FullDeclaration; + struct tgsi_full_immediate FullImmediate; + struct i915_full_instruction FullInstruction; + struct tgsi_full_property FullProperty; +}; + +struct i915_token_list +{ + union i915_full_token* Tokens; + unsigned NumTokens; +}; + +extern struct i915_token_list* i915_optimize(const struct tgsi_token *tokens); + +extern void i915_optimize_free(struct i915_token_list* tokens); + #endif diff --git a/src/gallium/drivers/i915/i915_fpc_emit.c b/src/gallium/drivers/i915/i915_fpc_emit.c index d28595e0fd3..c4a42df7882 100644 --- a/src/gallium/drivers/i915/i915_fpc_emit.c +++ b/src/gallium/drivers/i915/i915_fpc_emit.c @@ -369,7 +369,6 @@ i915_emit_const4f(struct i915_fp_compile * p, // XXX emit swizzle here for 0, 1, -1 and any combination thereof // we can use swizzle + neg for that - printf("const %f %f %f %f\n",c0,c1,c2,c3); for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { if (ifs->constant_flags[reg] == 0xf && ifs->constants[reg][0] == c0 && diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c new file mode 100644 index 00000000000..5c60d952de2 --- /dev/null +++ b/src/gallium/drivers/i915/i915_fpc_optimize.c @@ -0,0 +1,182 @@ +/************************************************************************** + * + * Copyright 2011 The Chromium OS authors. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_fpc.h" + +#include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_string.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" + +static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2) +{ + return (d1->Register.File == d2->Register.File && + d1->Register.Indirect == d2->Register.Indirect && + d1->Register.Dimension == d2->Register.Dimension && + d1->Register.Index == d2->Register.Index); +} + +static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_src_register* d2) +{ + return (d1->Register.File == d2->Register.File && + d1->Register.Indirect == d2->Register.Indirect && + d1->Register.Dimension == d2->Register.Dimension && + d1->Register.Index == d2->Register.Index && + d1->Register.Absolute == d2->Register.Absolute && + d1->Register.Negate == d2->Register.Negate); +} + + +/* + * Optimize away things like: + * MUL OUT[0].xyz, TEMP[1], TEMP[2] + * MOV OUT[0].w, TEMP[2] + * into: + * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2] + * This is useful for optimizing texenv. + */ +static void i915_fpc_optimize_mov_after_mul(union i915_full_token* current, union i915_full_token* next) +{ + if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && + next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && + current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MUL && + next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && + current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZ && + next->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_W && + same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) && + same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[1]) ) + { + next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; + current->FullInstruction.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; + current->FullInstruction.Src[0].Register.SwizzleW = TGSI_SWIZZLE_ONE; + return; + } + + if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && + next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && + current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MUL && + next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && + current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZ && + next->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_W && + same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) && + same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0]) ) + { + next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; + current->FullInstruction.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; + current->FullInstruction.Src[1].Register.SwizzleW = TGSI_SWIZZLE_ONE; + return; + } +} + +static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i) +{ + o->File = i->File; + o->Indirect = i->Indirect; + o->Dimension = i->Dimension; + o->Index = i->Index; + o->SwizzleX = i->SwizzleX; + o->SwizzleY = i->SwizzleY; + o->SwizzleZ = i->SwizzleZ; + o->SwizzleW = i->SwizzleW; + o->Absolute = i->Absolute; + o->Negate = i->Negate; +} + +static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i) +{ + o->File = i->File; + o->WriteMask = i->WriteMask; + o->Indirect = i->Indirect; + o->Dimension = i->Dimension; + o->Index = i->Index; +} + +static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i) +{ + memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction)); + memcpy(&o->Texture, &i->Texture, sizeof(o->Texture)); + + copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register); + + copy_src_reg(&o->Src[0].Register, &i->Src[0].Register); + copy_src_reg(&o->Src[1].Register, &i->Src[1].Register); + copy_src_reg(&o->Src[2].Register, &i->Src[2].Register); +} + +static void copy_token(union i915_full_token* o, union tgsi_full_token* i) +{ + if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) + memcpy(o, i, sizeof(*o)); + else + copy_instruction(&o->FullInstruction, &i->FullInstruction); + +} + +struct i915_token_list* i915_optimize(const struct tgsi_token *tokens) +{ + struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list)); + out_tokens->NumTokens = 0; + struct tgsi_parse_context parse; + int i = 0; + + /* Count the tokens */ + tgsi_parse_init( &parse, tokens ); + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + out_tokens->NumTokens++; + } + tgsi_parse_free (&parse); + + /* Allocate our tokens */ + out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens); + + tgsi_parse_init( &parse, tokens ); + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + copy_token(&out_tokens->Tokens[i] , &parse.FullToken); + + if (i > 0) + i915_fpc_optimize_mov_after_mul(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); + + i++; + } + tgsi_parse_free (&parse); + + return out_tokens; +} + +void i915_optimize_free(struct i915_token_list* tokens) +{ + free(tokens->Tokens); + free(tokens); +} + + diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index df606e27e26..e19d9be04de 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -172,7 +172,7 @@ static uint get_mapping(struct i915_fragment_shader* fs, int unit) */ static uint src_vector(struct i915_fp_compile *p, - const struct tgsi_full_src_register *source, + const struct i915_full_src_register *source, struct i915_fragment_shader* fs) { uint index = source->Register.Index; @@ -287,7 +287,7 @@ src_vector(struct i915_fp_compile *p, */ static uint get_result_vector(struct i915_fp_compile *p, - const struct tgsi_full_dst_register *dest) + const struct i915_full_dst_register *dest) { switch (dest->Register.File) { case TGSI_FILE_OUTPUT: @@ -316,7 +316,7 @@ get_result_vector(struct i915_fp_compile *p, * Compute flags for saturation and writemask. */ static uint -get_result_flags(const struct tgsi_full_instruction *inst) +get_result_flags(const struct i915_full_instruction *inst) { const uint writeMask = inst->Dst[0].Register.WriteMask; @@ -378,7 +378,7 @@ translate_tex_src_target(struct i915_fp_compile *p, uint tex) */ static void emit_tex(struct i915_fp_compile *p, - const struct tgsi_full_instruction *inst, + const struct i915_full_instruction *inst, uint opcode, struct i915_fragment_shader* fs) { @@ -404,7 +404,7 @@ emit_tex(struct i915_fp_compile *p, */ static void emit_simple_arith(struct i915_fp_compile *p, - const struct tgsi_full_instruction *inst, + const struct i915_full_instruction *inst, uint opcode, uint numArgs, struct i915_fragment_shader* fs) { @@ -429,11 +429,11 @@ emit_simple_arith(struct i915_fp_compile *p, /** As above, but swap the first two src regs */ static void emit_simple_arith_swap2(struct i915_fp_compile *p, - const struct tgsi_full_instruction *inst, + const struct i915_full_instruction *inst, uint opcode, uint numArgs, struct i915_fragment_shader* fs) { - struct tgsi_full_instruction inst2; + struct i915_full_instruction inst2; assert(numArgs == 2); @@ -457,7 +457,7 @@ emit_simple_arith_swap2(struct i915_fp_compile *p, */ static void i915_translate_instruction(struct i915_fp_compile *p, - const struct tgsi_full_instruction *inst, + const struct i915_full_instruction *inst, struct i915_fragment_shader *fs) { uint writemask; @@ -728,6 +728,9 @@ i915_translate_instruction(struct i915_fp_compile *p, emit_simple_arith(p, inst, A0_MUL, 2, fs); break; + case TGSI_OPCODE_NOP: + break; + case TGSI_OPCODE_POW: src0 = src_vector(p, &inst->Src[0], fs); src1 = src_vector(p, &inst->Src[1], fs); @@ -1044,107 +1047,107 @@ i915_translate_instruction(struct i915_fp_compile *p, } -/** - * Translate TGSI fragment shader into i915 hardware instructions. - * \param p the translation state - * \param tokens the TGSI token array - */ -static void -i915_translate_instructions(struct i915_fp_compile *p, - const struct tgsi_token *tokens, - struct i915_fragment_shader *fs) +static void i915_translate_token(struct i915_fp_compile *p, + const union i915_full_token* token, + struct i915_fragment_shader *fs) { struct i915_fragment_shader *ifs = p->shader; - struct tgsi_parse_context parse; - - tgsi_parse_init( &parse, tokens ); - - while( !tgsi_parse_end_of_tokens( &parse ) ) { - - tgsi_parse_token( &parse ); + switch( token->Token.Type ) { + case TGSI_TOKEN_TYPE_PROPERTY: + /* + * We only support one cbuf, but we still need to ignore the property + * correctly so we don't hit the assert at the end of the switch case. + */ + assert(token->FullProperty.Property.PropertyName == + TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS); + break; - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_PROPERTY: - /* - * We only support one cbuf, but we still need to ignore the property - * correctly so we don't hit the assert at the end of the switch case. - */ - assert(parse.FullToken.FullProperty.Property.PropertyName == - TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS); - break; - case TGSI_TOKEN_TYPE_DECLARATION: - if (parse.FullToken.FullDeclaration.Declaration.File - == TGSI_FILE_CONSTANT) { - uint i; - for (i = parse.FullToken.FullDeclaration.Range.First; - i <= parse.FullToken.FullDeclaration.Range.Last; - i++) { - assert(ifs->constant_flags[i] == 0x0); - ifs->constant_flags[i] = I915_CONSTFLAG_USER; - ifs->num_constants = MAX2(ifs->num_constants, i + 1); - } + case TGSI_TOKEN_TYPE_DECLARATION: + if (token->FullDeclaration.Declaration.File + == TGSI_FILE_CONSTANT) { + uint i; + for (i = token->FullDeclaration.Range.First; + i <= token->FullDeclaration.Range.Last; + i++) { + assert(ifs->constant_flags[i] == 0x0); + ifs->constant_flags[i] = I915_CONSTFLAG_USER; + ifs->num_constants = MAX2(ifs->num_constants, i + 1); } - else if (parse.FullToken.FullDeclaration.Declaration.File - == TGSI_FILE_TEMPORARY) { - uint i; - for (i = parse.FullToken.FullDeclaration.Range.First; - i <= parse.FullToken.FullDeclaration.Range.Last; - i++) { - if (i >= I915_MAX_TEMPORARY) - debug_printf("Too many temps (%d)\n",i); - else - /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ - p->temp_flag |= (1 << i); /* mark temp as used */ - } + } + else if (token->FullDeclaration.Declaration.File + == TGSI_FILE_TEMPORARY) { + uint i; + for (i = token->FullDeclaration.Range.First; + i <= token->FullDeclaration.Range.Last; + i++) { + if (i >= I915_MAX_TEMPORARY) + debug_printf("Too many temps (%d)\n",i); + else + /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ + p->temp_flag |= (1 << i); /* mark temp as used */ } - break; + } + break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - const struct tgsi_full_immediate *imm - = &parse.FullToken.FullImmediate; - const uint pos = p->num_immediates++; - uint j; - assert( imm->Immediate.NrTokens <= 4 + 1 ); - for (j = 0; j < imm->Immediate.NrTokens - 1; j++) { - p->immediates[pos][j] = imm->u[j].Float; - } + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const struct tgsi_full_immediate *imm + = &token->FullImmediate; + const uint pos = p->num_immediates++; + uint j; + assert( imm->Immediate.NrTokens <= 4 + 1 ); + for (j = 0; j < imm->Immediate.NrTokens - 1; j++) { + p->immediates[pos][j] = imm->u[j].Float; } - break; + } + break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - if (p->first_instruction) { - /* resolve location of immediates */ - uint i, j; - for (i = 0; i < p->num_immediates; i++) { - /* find constant slot for this immediate */ - for (j = 0; j < I915_MAX_CONSTANT; j++) { - if (ifs->constant_flags[j] == 0x0) { - memcpy(ifs->constants[j], - p->immediates[i], - 4 * sizeof(float)); - /*printf("immediate %d maps to const %d\n", i, j);*/ - ifs->constant_flags[j] = 0xf; /* all four comps used */ - p->immediates_map[i] = j; - ifs->num_constants = MAX2(ifs->num_constants, j + 1); - break; - } + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (p->first_instruction) { + /* resolve location of immediates */ + uint i, j; + for (i = 0; i < p->num_immediates; i++) { + /* find constant slot for this immediate */ + for (j = 0; j < I915_MAX_CONSTANT; j++) { + if (ifs->constant_flags[j] == 0x0) { + memcpy(ifs->constants[j], + p->immediates[i], + 4 * sizeof(float)); + /*printf("immediate %d maps to const %d\n", i, j);*/ + ifs->constant_flags[j] = 0xf; /* all four comps used */ + p->immediates_map[i] = j; + ifs->num_constants = MAX2(ifs->num_constants, j + 1); + break; } } - - p->first_instruction = FALSE; } - i915_translate_instruction(p, &parse.FullToken.FullInstruction, fs); - break; - - default: - assert( 0 ); + p->first_instruction = FALSE; } - } /* while */ + i915_translate_instruction(p, &token->FullInstruction, fs); + break; + + default: + assert( 0 ); + } - tgsi_parse_free (&parse); +} + +/** + * Translate TGSI fragment shader into i915 hardware instructions. + * \param p the translation state + * \param tokens the TGSI token array + */ +static void +i915_translate_instructions(struct i915_fp_compile *p, + const struct i915_token_list *tokens, + struct i915_fragment_shader *fs) +{ + int i; + for(i = 0; iNumTokens; i++) { + i915_translate_token(p, &tokens->Tokens[i], fs); + } } @@ -1303,8 +1306,10 @@ i915_translate_fragment_program( struct i915_context *i915, p = i915_init_compile(i915, fs); - i915_translate_instructions(p, tokens, fs); + struct i915_token_list* i_tokens = i915_optimize(tokens); + i915_translate_instructions(p, i_tokens, fs); i915_fixup_depth_write(p); i915_fini_compile(i915, p); + i915_optimize_free(i_tokens); } -- 2.30.2