1 /**************************************************************************
3 * Copyright 2011 The Chromium OS authors.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "i915_context.h"
32 #include "pipe/p_shader_tokens.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "util/u_string.h"
36 #include "tgsi/tgsi_parse.h"
37 #include "tgsi/tgsi_dump.h"
38 #include "tgsi/tgsi_exec.h"
40 struct i915_optimize_context
42 int first_write
[TGSI_EXEC_NUM_TEMPS
];
43 int last_read
[TGSI_EXEC_NUM_TEMPS
];
46 static boolean
same_src_dst_reg(struct i915_full_src_register
*s1
, struct i915_full_dst_register
*d1
)
48 return (s1
->Register
.File
== d1
->Register
.File
&&
49 s1
->Register
.Indirect
== d1
->Register
.Indirect
&&
50 s1
->Register
.Dimension
== d1
->Register
.Dimension
&&
51 s1
->Register
.Index
== d1
->Register
.Index
);
54 static boolean
same_dst_reg(struct i915_full_dst_register
*d1
, struct i915_full_dst_register
*d2
)
56 return (d1
->Register
.File
== d2
->Register
.File
&&
57 d1
->Register
.Indirect
== d2
->Register
.Indirect
&&
58 d1
->Register
.Dimension
== d2
->Register
.Dimension
&&
59 d1
->Register
.Index
== d2
->Register
.Index
);
62 static boolean
same_src_reg(struct i915_full_src_register
*d1
, struct i915_full_src_register
*d2
)
64 return (d1
->Register
.File
== d2
->Register
.File
&&
65 d1
->Register
.Indirect
== d2
->Register
.Indirect
&&
66 d1
->Register
.Dimension
== d2
->Register
.Dimension
&&
67 d1
->Register
.Index
== d2
->Register
.Index
&&
68 d1
->Register
.Absolute
== d2
->Register
.Absolute
&&
69 d1
->Register
.Negate
== d2
->Register
.Negate
);
75 unsigned neutral_element
;
78 } op_table
[TGSI_OPCODE_LAST
] = {
79 [ TGSI_OPCODE_ABS
] = { false, false, 0, 1, 1 },
80 [ TGSI_OPCODE_ADD
] = { false, true, TGSI_SWIZZLE_ZERO
, 1, 2 },
81 [ TGSI_OPCODE_CEIL
] = { false, false, 0, 1, 1 },
82 [ TGSI_OPCODE_CMP
] = { false, false, 0, 1, 2 },
83 [ TGSI_OPCODE_COS
] = { false, false, 0, 1, 1 },
84 [ TGSI_OPCODE_DDX
] = { false, false, 0, 1, 0 },
85 [ TGSI_OPCODE_DDY
] = { false, false, 0, 1, 0 },
86 [ TGSI_OPCODE_DP2
] = { false, true, TGSI_SWIZZLE_ONE
, 1, 2 },
87 [ TGSI_OPCODE_DP3
] = { false, true, TGSI_SWIZZLE_ONE
, 1, 2 },
88 [ TGSI_OPCODE_DP4
] = { false, true, TGSI_SWIZZLE_ONE
, 1, 2 },
89 [ TGSI_OPCODE_DPH
] = { false, false, 0, 1, 2 },
90 [ TGSI_OPCODE_DST
] = { false, false, 0, 1, 2 },
91 [ TGSI_OPCODE_END
] = { false, false, 0, 0, 0 },
92 [ TGSI_OPCODE_EX2
] = { false, false, 0, 1, 1 },
93 [ TGSI_OPCODE_FLR
] = { false, false, 0, 1, 1 },
94 [ TGSI_OPCODE_FRC
] = { false, false, 0, 1, 1 },
95 [ TGSI_OPCODE_KILL_IF
] = { false, false, 0, 0, 1 },
96 [ TGSI_OPCODE_KILL
] = { false, false, 0, 0, 0 },
97 [ TGSI_OPCODE_LG2
] = { false, false, 0, 1, 1 },
98 [ TGSI_OPCODE_LIT
] = { false, false, 0, 1, 1 },
99 [ TGSI_OPCODE_LRP
] = { false, false, 0, 1, 3 },
100 [ TGSI_OPCODE_MAX
] = { false, false, 0, 1, 2 },
101 [ TGSI_OPCODE_MAD
] = { false, false, 0, 1, 3 },
102 [ TGSI_OPCODE_MIN
] = { false, false, 0, 1, 2 },
103 [ TGSI_OPCODE_MOV
] = { false, false, 0, 1, 1 },
104 [ TGSI_OPCODE_MUL
] = { false, true, TGSI_SWIZZLE_ONE
, 1, 2 },
105 [ TGSI_OPCODE_NOP
] = { false, false, 0, 0, 0 },
106 [ TGSI_OPCODE_POW
] = { false, false, 0, 1, 2 },
107 [ TGSI_OPCODE_RCP
] = { false, false, 0, 1, 1 },
108 [ TGSI_OPCODE_RET
] = { false, false, 0, 0, 0 },
109 [ TGSI_OPCODE_RSQ
] = { false, false, 0, 1, 1 },
110 [ TGSI_OPCODE_SCS
] = { false, false, 0, 1, 1 },
111 [ TGSI_OPCODE_SEQ
] = { false, false, 0, 1, 2 },
112 [ TGSI_OPCODE_SGE
] = { false, false, 0, 1, 2 },
113 [ TGSI_OPCODE_SGT
] = { false, false, 0, 1, 2 },
114 [ TGSI_OPCODE_SIN
] = { false, false, 0, 1, 1 },
115 [ TGSI_OPCODE_SLE
] = { false, false, 0, 1, 2 },
116 [ TGSI_OPCODE_SLT
] = { false, false, 0, 1, 2 },
117 [ TGSI_OPCODE_SNE
] = { false, false, 0, 1, 2 },
118 [ TGSI_OPCODE_SSG
] = { false, false, 0, 1, 1 },
119 [ TGSI_OPCODE_SUB
] = { false, false, 0, 1, 2 },
120 [ TGSI_OPCODE_TEX
] = { true, false, 0, 1, 2 },
121 [ TGSI_OPCODE_TRUNC
] = { false, false, 0, 1, 1 },
122 [ TGSI_OPCODE_TXB
] = { true, false, 0, 1, 2 },
123 [ TGSI_OPCODE_TXP
] = { true, false, 0, 1, 2 },
124 [ TGSI_OPCODE_XPD
] = { false, false, 0, 1, 2 },
127 static boolean
op_has_dst(unsigned opcode
)
129 return (op_table
[opcode
].num_dst
> 0);
132 static int op_num_dst(unsigned opcode
)
134 return op_table
[opcode
].num_dst
;
137 static int op_num_src(unsigned opcode
)
139 return op_table
[opcode
].num_src
;
142 static boolean
op_commutes(unsigned opcode
)
144 return op_table
[opcode
].commutes
;
147 static unsigned mask_for_unswizzled(int num_components
)
150 switch(num_components
)
153 mask
|= TGSI_WRITEMASK_W
;
155 mask
|= TGSI_WRITEMASK_Z
;
157 mask
|= TGSI_WRITEMASK_Y
;
159 mask
|= TGSI_WRITEMASK_X
;
164 static boolean
is_unswizzled(struct i915_full_src_register
*r
,
167 if ( write_mask
& TGSI_WRITEMASK_X
&& r
->Register
.SwizzleX
!= TGSI_SWIZZLE_X
)
169 if ( write_mask
& TGSI_WRITEMASK_Y
&& r
->Register
.SwizzleY
!= TGSI_SWIZZLE_Y
)
171 if ( write_mask
& TGSI_WRITEMASK_Z
&& r
->Register
.SwizzleZ
!= TGSI_SWIZZLE_Z
)
173 if ( write_mask
& TGSI_WRITEMASK_W
&& r
->Register
.SwizzleW
!= TGSI_SWIZZLE_W
)
178 static boolean
op_is_texture(unsigned opcode
)
180 return op_table
[opcode
].is_texture
;
183 static unsigned op_neutral_element(unsigned opcode
)
185 unsigned ne
= op_table
[opcode
].neutral_element
;
187 debug_printf("No neutral element for opcode %d\n",opcode
);
188 ne
= TGSI_SWIZZLE_ZERO
;
194 * Sets the swizzle to the neutral element for the operation for the bits
195 * of writemask which are set, swizzle to identity otherwise.
197 static void set_neutral_element_swizzle(struct i915_full_src_register
*r
,
201 if ( write_mask
& TGSI_WRITEMASK_X
)
202 r
->Register
.SwizzleX
= neutral
;
204 r
->Register
.SwizzleX
= TGSI_SWIZZLE_X
;
206 if ( write_mask
& TGSI_WRITEMASK_Y
)
207 r
->Register
.SwizzleY
= neutral
;
209 r
->Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
211 if ( write_mask
& TGSI_WRITEMASK_Z
)
212 r
->Register
.SwizzleZ
= neutral
;
214 r
->Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
216 if ( write_mask
& TGSI_WRITEMASK_W
)
217 r
->Register
.SwizzleW
= neutral
;
219 r
->Register
.SwizzleW
= TGSI_SWIZZLE_W
;
222 static void copy_src_reg(struct i915_src_register
*o
, const struct tgsi_src_register
*i
)
225 o
->Indirect
= i
->Indirect
;
226 o
->Dimension
= i
->Dimension
;
228 o
->SwizzleX
= i
->SwizzleX
;
229 o
->SwizzleY
= i
->SwizzleY
;
230 o
->SwizzleZ
= i
->SwizzleZ
;
231 o
->SwizzleW
= i
->SwizzleW
;
232 o
->Absolute
= i
->Absolute
;
233 o
->Negate
= i
->Negate
;
236 static void copy_dst_reg(struct i915_dst_register
*o
, const struct tgsi_dst_register
*i
)
239 o
->WriteMask
= i
->WriteMask
;
240 o
->Indirect
= i
->Indirect
;
241 o
->Dimension
= i
->Dimension
;
245 static void copy_instruction(struct i915_full_instruction
*o
, const struct tgsi_full_instruction
*i
)
247 memcpy(&o
->Instruction
, &i
->Instruction
, sizeof(o
->Instruction
));
248 memcpy(&o
->Texture
, &i
->Texture
, sizeof(o
->Texture
));
250 copy_dst_reg(&o
->Dst
[0].Register
, &i
->Dst
[0].Register
);
252 copy_src_reg(&o
->Src
[0].Register
, &i
->Src
[0].Register
);
253 copy_src_reg(&o
->Src
[1].Register
, &i
->Src
[1].Register
);
254 copy_src_reg(&o
->Src
[2].Register
, &i
->Src
[2].Register
);
257 static void copy_token(union i915_full_token
*o
, union tgsi_full_token
*i
)
259 if (i
->Token
.Type
!= TGSI_TOKEN_TYPE_INSTRUCTION
)
260 memcpy(o
, i
, sizeof(*o
));
262 copy_instruction(&o
->FullInstruction
, &i
->FullInstruction
);
266 static void liveness_mark_written(struct i915_optimize_context
*ctx
,
267 struct i915_full_dst_register
*dst_reg
,
271 if (dst_reg
->Register
.File
== TGSI_FILE_TEMPORARY
) {
272 dst_reg_index
= dst_reg
->Register
.Index
;
273 assert(dst_reg_index
< TGSI_EXEC_NUM_TEMPS
);
274 /* dead -> live transition */
275 if (ctx
->first_write
[dst_reg_index
] != -1)
276 ctx
->first_write
[dst_reg_index
] = pos
;
280 static void liveness_mark_read(struct i915_optimize_context
*ctx
,
281 struct i915_full_src_register
*src_reg
,
285 if (src_reg
->Register
.File
== TGSI_FILE_TEMPORARY
) {
286 src_reg_index
= src_reg
->Register
.Index
;
287 assert(src_reg_index
< TGSI_EXEC_NUM_TEMPS
);
288 /* live -> dead transition */
289 if (ctx
->last_read
[src_reg_index
] != -1)
290 ctx
->last_read
[src_reg_index
] = pos
;
294 static void liveness_analysis(struct i915_optimize_context
*ctx
,
295 struct i915_token_list
*tokens
)
297 struct i915_full_dst_register
*dst_reg
;
298 struct i915_full_src_register
*src_reg
;
299 union i915_full_token
*current
;
301 int num_dst
, num_src
;
304 for(i
= 0; i
< TGSI_EXEC_NUM_TEMPS
; i
++)
306 ctx
->first_write
[i
] = -1;
307 ctx
->last_read
[i
] = -1;
310 for(i
= 0; i
< tokens
->NumTokens
; i
++)
312 current
= &tokens
->Tokens
[i
];
314 if (current
->Token
.Type
!= TGSI_TOKEN_TYPE_INSTRUCTION
)
317 opcode
= current
->FullInstruction
.Instruction
.Opcode
;
318 num_dst
= op_num_dst(opcode
);
323 dst_reg
= ¤t
->FullInstruction
.Dst
[0];
324 liveness_mark_written(ctx
, dst_reg
, i
);
328 debug_printf("Op %d has %d dst regs\n", opcode
, num_dst
);
333 for(i
= tokens
->NumTokens
- 1; i
>= 0; i
--)
335 current
= &tokens
->Tokens
[i
];
337 if (current
->Token
.Type
!= TGSI_TOKEN_TYPE_INSTRUCTION
)
340 opcode
= current
->FullInstruction
.Instruction
.Opcode
;
341 num_src
= op_num_src(opcode
);
346 src_reg
= ¤t
->FullInstruction
.Src
[2];
347 liveness_mark_read(ctx
, src_reg
, i
);
349 src_reg
= ¤t
->FullInstruction
.Src
[1];
350 liveness_mark_read(ctx
, src_reg
, i
);
352 src_reg
= ¤t
->FullInstruction
.Src
[0];
353 liveness_mark_read(ctx
, src_reg
, i
);
357 debug_printf("Op %d has %d src regs\n", opcode
, num_src
);
363 static int unused_from(struct i915_optimize_context
*ctx
, struct i915_full_dst_register
*dst_reg
, int from
)
365 int dst_reg_index
= dst_reg
->Register
.Index
;
366 assert(dst_reg_index
< TGSI_EXEC_NUM_TEMPS
);
367 return (from
>= ctx
->last_read
[dst_reg_index
]);
370 /* Returns a mask with the components used for a texture access instruction */
371 static unsigned i915_tex_mask(union i915_full_token
*instr
)
375 /* Get the number of coords */
376 mask
= mask_for_unswizzled(i915_num_coords(instr
->FullInstruction
.Texture
.Texture
));
378 /* Add the W component if projective */
379 if (instr
->FullInstruction
.Instruction
.Opcode
== TGSI_OPCODE_TXP
)
380 mask
|= TGSI_WRITEMASK_W
;
385 static boolean
target_is_texture2d(uint tex
)
388 case TGSI_TEXTURE_2D
:
389 case TGSI_TEXTURE_RECT
:
398 * Optimize away useless indirect texture reads:
399 * MOV TEMP[0].xy, IN[0].xyyy
400 * TEX TEMP[1], TEMP[0], SAMP[0], 2D
402 * TEX TEMP[1], IN[0], SAMP[0], 2D
404 * note: this only seems to work on 2D/RECT textures, but not SHAADOW2D/1D/..
406 static void i915_fpc_optimize_mov_before_tex(struct i915_optimize_context
*ctx
,
407 struct i915_token_list
*tokens
,
410 union i915_full_token
*current
= &tokens
->Tokens
[index
- 1];
411 union i915_full_token
*next
= &tokens
->Tokens
[index
];
413 if ( current
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
414 next
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
415 current
->FullInstruction
.Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
416 op_is_texture(next
->FullInstruction
.Instruction
.Opcode
) &&
417 target_is_texture2d(next
->FullInstruction
.Texture
.Texture
) &&
418 same_src_dst_reg(&next
->FullInstruction
.Src
[0], ¤t
->FullInstruction
.Dst
[0]) &&
419 is_unswizzled(¤t
->FullInstruction
.Src
[0], i915_tex_mask(next
)) &&
420 unused_from(ctx
, ¤t
->FullInstruction
.Dst
[0], index
))
422 memcpy(&next
->FullInstruction
.Src
[0], ¤t
->FullInstruction
.Src
[0], sizeof(struct i915_src_register
));
423 current
->FullInstruction
.Instruction
.Opcode
= TGSI_OPCODE_NOP
;
428 * Optimize away things like:
429 * MOV TEMP[0].xy, TEMP[1].xyyy (first write for TEMP[0])
430 * MOV TEMP[0].w, TEMP[1].wwww (last write for TEMP[0])
433 * MOV OUT[0].xyw, TEMP[1].xyww
435 static void i915_fpc_optimize_mov_after_mov(union i915_full_token
*current
, union i915_full_token
*next
)
437 struct i915_full_src_register
*src_reg1
, *src_reg2
;
438 struct i915_full_dst_register
*dst_reg1
, *dst_reg2
;
439 unsigned swizzle_x
, swizzle_y
, swizzle_z
, swizzle_w
;
441 if ( current
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
442 next
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
443 current
->FullInstruction
.Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
444 next
->FullInstruction
.Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
445 current
->FullInstruction
.Instruction
.Saturate
== next
->FullInstruction
.Instruction
.Saturate
&&
446 same_dst_reg(&next
->FullInstruction
.Dst
[0], ¤t
->FullInstruction
.Dst
[0]) &&
447 same_src_reg(&next
->FullInstruction
.Src
[0], ¤t
->FullInstruction
.Src
[0]) &&
448 !same_src_dst_reg(¤t
->FullInstruction
.Src
[0], ¤t
->FullInstruction
.Dst
[0]) )
450 src_reg1
= ¤t
->FullInstruction
.Src
[0];
451 dst_reg1
= ¤t
->FullInstruction
.Dst
[0];
452 src_reg2
= &next
->FullInstruction
.Src
[0];
453 dst_reg2
= &next
->FullInstruction
.Dst
[0];
455 /* Start with swizzles from the first mov */
456 swizzle_x
= src_reg1
->Register
.SwizzleX
;
457 swizzle_y
= src_reg1
->Register
.SwizzleY
;
458 swizzle_z
= src_reg1
->Register
.SwizzleZ
;
459 swizzle_w
= src_reg1
->Register
.SwizzleW
;
461 /* Pile the second mov on top */
462 if (dst_reg2
->Register
.WriteMask
& TGSI_WRITEMASK_X
)
463 swizzle_x
= src_reg2
->Register
.SwizzleX
;
464 if (dst_reg2
->Register
.WriteMask
& TGSI_WRITEMASK_Y
)
465 swizzle_y
= src_reg2
->Register
.SwizzleY
;
466 if (dst_reg2
->Register
.WriteMask
& TGSI_WRITEMASK_Z
)
467 swizzle_z
= src_reg2
->Register
.SwizzleZ
;
468 if (dst_reg2
->Register
.WriteMask
& TGSI_WRITEMASK_W
)
469 swizzle_w
= src_reg2
->Register
.SwizzleW
;
471 dst_reg2
->Register
.WriteMask
|= dst_reg1
->Register
.WriteMask
;
472 src_reg2
->Register
.SwizzleX
= swizzle_x
;
473 src_reg2
->Register
.SwizzleY
= swizzle_y
;
474 src_reg2
->Register
.SwizzleZ
= swizzle_z
;
475 src_reg2
->Register
.SwizzleW
= swizzle_w
;
477 current
->FullInstruction
.Instruction
.Opcode
= TGSI_OPCODE_NOP
;
484 * Optimize away things like:
485 * MUL OUT[0].xyz, TEMP[1], TEMP[2]
486 * MOV OUT[0].w, TEMP[2]
488 * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
489 * This is useful for optimizing texenv.
491 static void i915_fpc_optimize_mov_after_alu(union i915_full_token
*current
, union i915_full_token
*next
)
493 if ( current
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
494 next
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
495 op_commutes(current
->FullInstruction
.Instruction
.Opcode
) &&
496 current
->FullInstruction
.Instruction
.Saturate
== next
->FullInstruction
.Instruction
.Saturate
&&
497 next
->FullInstruction
.Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
498 same_dst_reg(&next
->FullInstruction
.Dst
[0], ¤t
->FullInstruction
.Dst
[0]) &&
499 same_src_reg(&next
->FullInstruction
.Src
[0], ¤t
->FullInstruction
.Src
[1]) &&
500 !same_src_dst_reg(&next
->FullInstruction
.Src
[0], ¤t
->FullInstruction
.Dst
[0]) &&
501 is_unswizzled(¤t
->FullInstruction
.Src
[0], current
->FullInstruction
.Dst
[0].Register
.WriteMask
) &&
502 is_unswizzled(¤t
->FullInstruction
.Src
[1], current
->FullInstruction
.Dst
[0].Register
.WriteMask
) &&
503 is_unswizzled(&next
->FullInstruction
.Src
[0], next
->FullInstruction
.Dst
[0].Register
.WriteMask
) )
505 next
->FullInstruction
.Instruction
.Opcode
= TGSI_OPCODE_NOP
;
507 set_neutral_element_swizzle(¤t
->FullInstruction
.Src
[1], 0, 0);
508 set_neutral_element_swizzle(¤t
->FullInstruction
.Src
[0],
509 next
->FullInstruction
.Dst
[0].Register
.WriteMask
,
510 op_neutral_element(current
->FullInstruction
.Instruction
.Opcode
));
512 current
->FullInstruction
.Dst
[0].Register
.WriteMask
= current
->FullInstruction
.Dst
[0].Register
.WriteMask
|
513 next
->FullInstruction
.Dst
[0].Register
.WriteMask
;
517 if ( current
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
518 next
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
519 op_commutes(current
->FullInstruction
.Instruction
.Opcode
) &&
520 current
->FullInstruction
.Instruction
.Saturate
== next
->FullInstruction
.Instruction
.Saturate
&&
521 next
->FullInstruction
.Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
522 same_dst_reg(&next
->FullInstruction
.Dst
[0], ¤t
->FullInstruction
.Dst
[0]) &&
523 same_src_reg(&next
->FullInstruction
.Src
[0], ¤t
->FullInstruction
.Src
[0]) &&
524 !same_src_dst_reg(&next
->FullInstruction
.Src
[0], ¤t
->FullInstruction
.Dst
[0]) &&
525 is_unswizzled(¤t
->FullInstruction
.Src
[0], current
->FullInstruction
.Dst
[0].Register
.WriteMask
) &&
526 is_unswizzled(¤t
->FullInstruction
.Src
[1], current
->FullInstruction
.Dst
[0].Register
.WriteMask
) &&
527 is_unswizzled(&next
->FullInstruction
.Src
[0], next
->FullInstruction
.Dst
[0].Register
.WriteMask
) )
529 next
->FullInstruction
.Instruction
.Opcode
= TGSI_OPCODE_NOP
;
531 set_neutral_element_swizzle(¤t
->FullInstruction
.Src
[0], 0, 0);
532 set_neutral_element_swizzle(¤t
->FullInstruction
.Src
[1],
533 next
->FullInstruction
.Dst
[0].Register
.WriteMask
,
534 op_neutral_element(current
->FullInstruction
.Instruction
.Opcode
));
536 current
->FullInstruction
.Dst
[0].Register
.WriteMask
= current
->FullInstruction
.Dst
[0].Register
.WriteMask
|
537 next
->FullInstruction
.Dst
[0].Register
.WriteMask
;
543 * Optimize away things like:
544 * MOV TEMP[0].xyz TEMP[0].xyzx
548 static boolean
i915_fpc_useless_mov(union tgsi_full_token
*tgsi_current
)
550 union i915_full_token current
;
551 copy_token(¤t
, tgsi_current
);
552 if ( current
.Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
553 current
.FullInstruction
.Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
554 op_has_dst(current
.FullInstruction
.Instruction
.Opcode
) &&
555 !current
.FullInstruction
.Instruction
.Saturate
&&
556 current
.FullInstruction
.Src
[0].Register
.Absolute
== 0 &&
557 current
.FullInstruction
.Src
[0].Register
.Negate
== 0 &&
558 is_unswizzled(¤t
.FullInstruction
.Src
[0], current
.FullInstruction
.Dst
[0].Register
.WriteMask
) &&
559 same_src_dst_reg(¤t
.FullInstruction
.Src
[0], ¤t
.FullInstruction
.Dst
[0]) )
567 * Optimize away things like:
568 * *** TEMP[0], TEMP[1], TEMP[2]
571 * *** OUT[0], TEMP[1], TEMP[2]
573 static void i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context
*ctx
,
574 struct i915_token_list
*tokens
,
577 union i915_full_token
*current
= &tokens
->Tokens
[index
- 1];
578 union i915_full_token
*next
= &tokens
->Tokens
[index
];
580 // &out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
581 if ( current
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
582 next
->Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
&&
583 next
->FullInstruction
.Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
584 op_has_dst(current
->FullInstruction
.Instruction
.Opcode
) &&
585 !next
->FullInstruction
.Instruction
.Saturate
&&
586 next
->FullInstruction
.Src
[0].Register
.Absolute
== 0 &&
587 next
->FullInstruction
.Src
[0].Register
.Negate
== 0 &&
588 unused_from(ctx
, ¤t
->FullInstruction
.Dst
[0], index
) &&
589 current
->FullInstruction
.Dst
[0].Register
.WriteMask
== TGSI_WRITEMASK_XYZW
&&
590 is_unswizzled(&next
->FullInstruction
.Src
[0], next
->FullInstruction
.Dst
[0].Register
.WriteMask
) &&
591 current
->FullInstruction
.Dst
[0].Register
.WriteMask
== next
->FullInstruction
.Dst
[0].Register
.WriteMask
&&
592 same_src_dst_reg(&next
->FullInstruction
.Src
[0], ¤t
->FullInstruction
.Dst
[0]) )
594 next
->FullInstruction
.Instruction
.Opcode
= TGSI_OPCODE_NOP
;
596 current
->FullInstruction
.Dst
[0] = next
->FullInstruction
.Dst
[0];
601 struct i915_token_list
* i915_optimize(const struct tgsi_token
*tokens
)
603 struct i915_token_list
*out_tokens
= MALLOC(sizeof(struct i915_token_list
));
604 struct tgsi_parse_context parse
;
605 struct i915_optimize_context
*ctx
;
608 ctx
= malloc(sizeof(*ctx
));
610 out_tokens
->NumTokens
= 0;
612 /* Count the tokens */
613 tgsi_parse_init( &parse
, tokens
);
614 while( !tgsi_parse_end_of_tokens( &parse
) ) {
615 tgsi_parse_token( &parse
);
616 out_tokens
->NumTokens
++;
618 tgsi_parse_free (&parse
);
620 /* Allocate our tokens */
621 out_tokens
->Tokens
= MALLOC(sizeof(union i915_full_token
) * out_tokens
->NumTokens
);
623 tgsi_parse_init( &parse
, tokens
);
624 while( !tgsi_parse_end_of_tokens( &parse
) ) {
625 tgsi_parse_token( &parse
);
627 if (i915_fpc_useless_mov(&parse
.FullToken
)) {
628 out_tokens
->NumTokens
--;
632 copy_token(&out_tokens
->Tokens
[i
] , &parse
.FullToken
);
636 tgsi_parse_free (&parse
);
638 liveness_analysis(ctx
, out_tokens
);
641 while( i
< out_tokens
->NumTokens
) {
642 i915_fpc_optimize_useless_mov_after_inst(ctx
, out_tokens
, i
);
643 i915_fpc_optimize_mov_after_alu(&out_tokens
->Tokens
[i
-1], &out_tokens
->Tokens
[i
]);
644 i915_fpc_optimize_mov_after_mov(&out_tokens
->Tokens
[i
-1], &out_tokens
->Tokens
[i
]);
645 i915_fpc_optimize_mov_before_tex(ctx
, out_tokens
, i
);
654 void i915_optimize_free(struct i915_token_list
*tokens
)
656 free(tokens
->Tokens
);