i915g: introduce the tiny shader optimizer.
[mesa.git] / src / gallium / drivers / i915 / i915_fpc_optimize.c
1 /**************************************************************************
2 *
3 * Copyright 2011 The Chromium OS authors.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "i915_reg.h"
29 #include "i915_context.h"
30 #include "i915_fpc.h"
31
32 #include "pipe/p_shader_tokens.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "util/u_string.h"
36 #include "tgsi/tgsi_parse.h"
37 #include "tgsi/tgsi_dump.h"
38
39 static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2)
40 {
41 return (d1->Register.File == d2->Register.File &&
42 d1->Register.Indirect == d2->Register.Indirect &&
43 d1->Register.Dimension == d2->Register.Dimension &&
44 d1->Register.Index == d2->Register.Index);
45 }
46
47 static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_src_register* d2)
48 {
49 return (d1->Register.File == d2->Register.File &&
50 d1->Register.Indirect == d2->Register.Indirect &&
51 d1->Register.Dimension == d2->Register.Dimension &&
52 d1->Register.Index == d2->Register.Index &&
53 d1->Register.Absolute == d2->Register.Absolute &&
54 d1->Register.Negate == d2->Register.Negate);
55 }
56
57
58 /*
59 * Optimize away things like:
60 * MUL OUT[0].xyz, TEMP[1], TEMP[2]
61 * MOV OUT[0].w, TEMP[2]
62 * into:
63 * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
64 * This is useful for optimizing texenv.
65 */
66 static void i915_fpc_optimize_mov_after_mul(union i915_full_token* current, union i915_full_token* next)
67 {
68 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
69 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
70 current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MUL &&
71 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
72 current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZ &&
73 next->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_W &&
74 same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) &&
75 same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[1]) )
76 {
77 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
78 current->FullInstruction.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
79 current->FullInstruction.Src[0].Register.SwizzleW = TGSI_SWIZZLE_ONE;
80 return;
81 }
82
83 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
84 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
85 current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MUL &&
86 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
87 current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZ &&
88 next->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_W &&
89 same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) &&
90 same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) )
91 {
92 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
93 current->FullInstruction.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
94 current->FullInstruction.Src[1].Register.SwizzleW = TGSI_SWIZZLE_ONE;
95 return;
96 }
97 }
98
99 static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i)
100 {
101 o->File = i->File;
102 o->Indirect = i->Indirect;
103 o->Dimension = i->Dimension;
104 o->Index = i->Index;
105 o->SwizzleX = i->SwizzleX;
106 o->SwizzleY = i->SwizzleY;
107 o->SwizzleZ = i->SwizzleZ;
108 o->SwizzleW = i->SwizzleW;
109 o->Absolute = i->Absolute;
110 o->Negate = i->Negate;
111 }
112
113 static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i)
114 {
115 o->File = i->File;
116 o->WriteMask = i->WriteMask;
117 o->Indirect = i->Indirect;
118 o->Dimension = i->Dimension;
119 o->Index = i->Index;
120 }
121
122 static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i)
123 {
124 memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
125 memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
126
127 copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
128
129 copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
130 copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
131 copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
132 }
133
134 static void copy_token(union i915_full_token* o, union tgsi_full_token* i)
135 {
136 if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
137 memcpy(o, i, sizeof(*o));
138 else
139 copy_instruction(&o->FullInstruction, &i->FullInstruction);
140
141 }
142
143 struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
144 {
145 struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
146 out_tokens->NumTokens = 0;
147 struct tgsi_parse_context parse;
148 int i = 0;
149
150 /* Count the tokens */
151 tgsi_parse_init( &parse, tokens );
152 while( !tgsi_parse_end_of_tokens( &parse ) ) {
153 tgsi_parse_token( &parse );
154 out_tokens->NumTokens++;
155 }
156 tgsi_parse_free (&parse);
157
158 /* Allocate our tokens */
159 out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
160
161 tgsi_parse_init( &parse, tokens );
162 while( !tgsi_parse_end_of_tokens( &parse ) ) {
163 tgsi_parse_token( &parse );
164 copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
165
166 if (i > 0)
167 i915_fpc_optimize_mov_after_mul(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
168
169 i++;
170 }
171 tgsi_parse_free (&parse);
172
173 return out_tokens;
174 }
175
176 void i915_optimize_free(struct i915_token_list* tokens)
177 {
178 free(tokens->Tokens);
179 free(tokens);
180 }
181
182