2 * Copyright (C) 2009 Nicolai Haehnle.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "radeon_dataflow.h"
30 #include "radeon_compiler.h"
33 struct updatemask_state
{
34 unsigned char Output
[RC_REGISTER_MAX_INDEX
];
35 unsigned char Temporary
[RC_REGISTER_MAX_INDEX
];
36 unsigned char Address
;
37 unsigned char Special
[RC_NUM_SPECIAL_REGISTERS
];
40 struct instruction_state
{
41 unsigned char WriteMask
:4;
42 unsigned char WriteALUResult
:1;
43 unsigned char SrcReg
[3];
47 struct updatemask_state
* Breaks
;
48 unsigned int BreakCount
;
49 unsigned int BreaksReserved
;
53 unsigned int HaveElse
:1;
55 struct updatemask_state StoreEndif
;
56 struct updatemask_state StoreElse
;
59 struct deadcode_state
{
60 struct radeon_compiler
* C
;
61 struct instruction_state
* Instructions
;
63 struct updatemask_state R
;
65 struct branchinfo
* BranchStack
;
66 unsigned int BranchStackSize
;
67 unsigned int BranchStackReserved
;
69 struct loopinfo
* LoopStack
;
70 unsigned int LoopStackSize
;
71 unsigned int LoopStackReserved
;
75 static void or_updatemasks(
76 struct updatemask_state
* dst
,
77 struct updatemask_state
* a
,
78 struct updatemask_state
* b
)
80 for(unsigned int i
= 0; i
< RC_REGISTER_MAX_INDEX
; ++i
) {
81 dst
->Output
[i
] = a
->Output
[i
] | b
->Output
[i
];
82 dst
->Temporary
[i
] = a
->Temporary
[i
] | b
->Temporary
[i
];
85 for(unsigned int i
= 0; i
< RC_NUM_SPECIAL_REGISTERS
; ++i
)
86 dst
->Special
[i
] = a
->Special
[i
] | b
->Special
[i
];
88 dst
->Address
= a
->Address
| b
->Address
;
91 static void push_break(struct deadcode_state
*s
)
93 struct loopinfo
* loop
= &s
->LoopStack
[s
->LoopStackSize
- 1];
94 memory_pool_array_reserve(&s
->C
->Pool
, struct updatemask_state
,
95 loop
->Breaks
, loop
->BreakCount
, loop
->BreaksReserved
, 1);
97 memcpy(&loop
->Breaks
[loop
->BreakCount
++], &s
->R
, sizeof(s
->R
));
100 static void push_loop(struct deadcode_state
* s
)
102 memory_pool_array_reserve(&s
->C
->Pool
, struct loopinfo
, s
->LoopStack
,
103 s
->LoopStackSize
, s
->LoopStackReserved
, 1);
104 memset(&s
->LoopStack
[s
->LoopStackSize
++], 0, sizeof(struct loopinfo
));
107 static void push_branch(struct deadcode_state
* s
)
109 struct branchinfo
* branch
;
111 memory_pool_array_reserve(&s
->C
->Pool
, struct branchinfo
, s
->BranchStack
,
112 s
->BranchStackSize
, s
->BranchStackReserved
, 1);
114 branch
= &s
->BranchStack
[s
->BranchStackSize
++];
115 branch
->HaveElse
= 0;
116 memcpy(&branch
->StoreEndif
, &s
->R
, sizeof(s
->R
));
119 static unsigned char * get_used_ptr(struct deadcode_state
*s
, rc_register_file file
, unsigned int index
)
121 if (file
== RC_FILE_OUTPUT
|| file
== RC_FILE_TEMPORARY
) {
122 if (index
>= RC_REGISTER_MAX_INDEX
) {
123 rc_error(s
->C
, "%s: index %i is out of bounds for file %i\n", __FUNCTION__
, index
, file
);
127 if (file
== RC_FILE_OUTPUT
)
128 return &s
->R
.Output
[index
];
130 return &s
->R
.Temporary
[index
];
131 } else if (file
== RC_FILE_ADDRESS
) {
132 return &s
->R
.Address
;
133 } else if (file
== RC_FILE_SPECIAL
) {
134 if (index
>= RC_NUM_SPECIAL_REGISTERS
) {
135 rc_error(s
->C
, "%s: special file index %i out of bounds\n", __FUNCTION__
, index
);
139 return &s
->R
.Special
[index
];
145 static void mark_used(struct deadcode_state
* s
, rc_register_file file
, unsigned int index
, unsigned int mask
)
147 unsigned char * pused
= get_used_ptr(s
, file
, index
);
152 static void update_instruction(struct deadcode_state
* s
, struct rc_instruction
* inst
)
154 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
155 struct instruction_state
* insts
= &s
->Instructions
[inst
->IP
];
156 unsigned int usedmask
= 0;
157 unsigned int srcmasks
[3];
159 if (opcode
->HasDstReg
) {
160 unsigned char * pused
= get_used_ptr(s
, inst
->U
.I
.DstReg
.File
, inst
->U
.I
.DstReg
.Index
);
162 usedmask
= *pused
& inst
->U
.I
.DstReg
.WriteMask
;
163 if (!inst
->U
.I
.DstReg
.RelAddr
)
167 if (inst
->U
.I
.DstReg
.RelAddr
)
168 mark_used(s
, RC_FILE_ADDRESS
, 0, RC_MASK_X
);
171 insts
->WriteMask
|= usedmask
;
173 if (inst
->U
.I
.WriteALUResult
) {
174 unsigned char * pused
= get_used_ptr(s
, RC_FILE_SPECIAL
, RC_SPECIAL_ALU_RESULT
);
175 if (pused
&& *pused
) {
176 if (inst
->U
.I
.WriteALUResult
== RC_ALURESULT_X
)
177 usedmask
|= RC_MASK_X
;
178 else if (inst
->U
.I
.WriteALUResult
== RC_ALURESULT_W
)
179 usedmask
|= RC_MASK_W
;
182 insts
->WriteALUResult
= 1;
186 rc_compute_sources_for_writemask(inst
, usedmask
, srcmasks
);
188 for(unsigned int src
= 0; src
< opcode
->NumSrcRegs
; ++src
) {
189 unsigned int refmask
= 0;
190 unsigned int newsrcmask
= srcmasks
[src
] & ~insts
->SrcReg
[src
];
191 insts
->SrcReg
[src
] |= newsrcmask
;
193 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
194 if (GET_BIT(newsrcmask
, chan
))
195 refmask
|= 1 << GET_SWZ(inst
->U
.I
.SrcReg
[src
].Swizzle
, chan
);
198 /* get rid of spurious bits from ZERO, ONE, etc. swizzles */
199 refmask
&= RC_MASK_XYZW
;
204 mark_used(s
, inst
->U
.I
.SrcReg
[src
].File
, inst
->U
.I
.SrcReg
[src
].Index
, refmask
);
206 if (inst
->U
.I
.SrcReg
[src
].RelAddr
)
207 mark_used(s
, RC_FILE_ADDRESS
, 0, RC_MASK_X
);
211 static void mark_output_use(void * data
, unsigned int index
, unsigned int mask
)
213 struct deadcode_state
* s
= data
;
215 mark_used(s
, RC_FILE_OUTPUT
, index
, mask
);
218 void rc_dataflow_deadcode(struct radeon_compiler
* c
, void *user
)
220 struct deadcode_state s
;
221 unsigned int nr_instructions
;
222 unsigned has_temp_reladdr_src
= 0;
223 rc_dataflow_mark_outputs_fn dce
= (rc_dataflow_mark_outputs_fn
)user
;
226 /* Give up if there is relative addressing of destination operands. */
227 for(struct rc_instruction
* inst
= c
->Program
.Instructions
.Next
;
228 inst
!= &c
->Program
.Instructions
;
230 const struct rc_opcode_info
*opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
231 if (opcode
->HasDstReg
&&
232 inst
->U
.I
.DstReg
.WriteMask
&&
233 inst
->U
.I
.DstReg
.RelAddr
) {
238 memset(&s
, 0, sizeof(s
));
241 nr_instructions
= rc_recompute_ips(c
);
242 s
.Instructions
= memory_pool_malloc(&c
->Pool
, sizeof(struct instruction_state
)*nr_instructions
);
243 memset(s
.Instructions
, 0, sizeof(struct instruction_state
)*nr_instructions
);
245 dce(c
, &s
, &mark_output_use
);
247 for(struct rc_instruction
* inst
= c
->Program
.Instructions
.Prev
;
248 inst
!= &c
->Program
.Instructions
;
250 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
252 switch(opcode
->Opcode
){
253 /* Mark all sources in the loop body as used before doing
254 * normal deadcode analysis. This is probably not optimal.
256 case RC_OPCODE_ENDLOOP
:
259 struct rc_instruction
*ptr
;
260 for(ptr
= inst
->Prev
; endloops
> 0; ptr
= ptr
->Prev
){
261 opcode
= rc_get_opcode_info(ptr
->U
.I
.Opcode
);
262 if(ptr
->U
.I
.Opcode
== RC_OPCODE_BGNLOOP
){
266 if(ptr
->U
.I
.Opcode
== RC_OPCODE_ENDLOOP
){
270 if(opcode
->HasDstReg
){
272 unsigned int srcmasks
[3];
273 rc_compute_sources_for_writemask(ptr
,
274 ptr
->U
.I
.DstReg
.WriteMask
, srcmasks
);
275 for(src
=0; src
< opcode
->NumSrcRegs
; src
++){
277 ptr
->U
.I
.SrcReg
[src
].File
,
278 ptr
->U
.I
.SrcReg
[src
].Index
,
289 case RC_OPCODE_BGNLOOP
:
292 struct loopinfo
* loop
= &s
.LoopStack
[s
.LoopStackSize
-1];
293 for(i
= 0; i
< loop
->BreakCount
; i
++) {
294 or_updatemasks(&s
.R
, &s
.R
, &loop
->Breaks
[i
]);
300 case RC_OPCODE_ENDIF
:
304 if (opcode
->IsFlowControl
&& s
.BranchStackSize
) {
305 struct branchinfo
* branch
= &s
.BranchStack
[s
.BranchStackSize
-1];
306 if (opcode
->Opcode
== RC_OPCODE_IF
) {
309 branch
->HaveElse
? &branch
->StoreElse
: &branch
->StoreEndif
);
312 } else if (opcode
->Opcode
== RC_OPCODE_ELSE
) {
313 if (branch
->HaveElse
) {
314 rc_error(c
, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__
);
316 memcpy(&branch
->StoreElse
, &s
.R
, sizeof(s
.R
));
317 memcpy(&s
.R
, &branch
->StoreEndif
, sizeof(s
.R
));
318 branch
->HaveElse
= 1;
321 rc_error(c
, "%s: Unhandled control flow instruction %s\n", __FUNCTION__
, opcode
->Name
);
325 if (!has_temp_reladdr_src
) {
326 for (unsigned i
= 0; i
< opcode
->NumSrcRegs
; i
++) {
327 if (inst
->U
.I
.SrcReg
[i
].File
== RC_FILE_TEMPORARY
&&
328 inst
->U
.I
.SrcReg
[i
].RelAddr
) {
329 /* If there is a register read from a temporary file with relative addressing,
330 * mark all preceding written registers as used. */
331 for (struct rc_instruction
*ptr
= inst
->Prev
;
332 ptr
!= &c
->Program
.Instructions
;
334 opcode
= rc_get_opcode_info(ptr
->U
.I
.Opcode
);
335 if (opcode
->HasDstReg
&&
336 ptr
->U
.I
.DstReg
.File
== RC_FILE_TEMPORARY
&&
337 ptr
->U
.I
.DstReg
.WriteMask
) {
339 ptr
->U
.I
.DstReg
.File
,
340 ptr
->U
.I
.DstReg
.Index
,
341 ptr
->U
.I
.DstReg
.WriteMask
);
345 has_temp_reladdr_src
= 1;
352 update_instruction(&s
, inst
);
356 for(struct rc_instruction
* inst
= c
->Program
.Instructions
.Next
;
357 inst
!= &c
->Program
.Instructions
;
358 inst
= inst
->Next
, ++ip
) {
359 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
361 unsigned int srcmasks
[3];
362 unsigned int usemask
;
364 if (!opcode
->HasDstReg
) {
367 inst
->U
.I
.DstReg
.WriteMask
= s
.Instructions
[ip
].WriteMask
;
368 if (s
.Instructions
[ip
].WriteMask
)
371 if (s
.Instructions
[ip
].WriteALUResult
)
374 inst
->U
.I
.WriteALUResult
= RC_ALURESULT_NONE
;
378 struct rc_instruction
* todelete
= inst
;
380 rc_remove_instruction(todelete
);
384 usemask
= s
.Instructions
[ip
].WriteMask
;
386 if (inst
->U
.I
.WriteALUResult
== RC_ALURESULT_X
)
387 usemask
|= RC_MASK_X
;
388 else if (inst
->U
.I
.WriteALUResult
== RC_ALURESULT_W
)
389 usemask
|= RC_MASK_W
;
391 rc_compute_sources_for_writemask(inst
, usemask
, srcmasks
);
393 for(unsigned int src
= 0; src
< 3; ++src
) {
394 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
395 if (!GET_BIT(srcmasks
[src
], chan
))
396 SET_SWZ(inst
->U
.I
.SrcReg
[src
].Swizzle
, chan
, RC_SWIZZLE_UNUSED
);
401 rc_calculate_inputs_outputs(c
);