Merge branch 'master' into pipe-video
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_dataflow_deadcode.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 #include "radeon_dataflow.h"
29
30 #include "radeon_compiler.h"
31
32
33 struct updatemask_state {
34 unsigned char Output[RC_REGISTER_MAX_INDEX];
35 unsigned char Temporary[RC_REGISTER_MAX_INDEX];
36 unsigned char Address;
37 unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
38 };
39
40 struct instruction_state {
41 unsigned char WriteMask:4;
42 unsigned char WriteALUResult:1;
43 unsigned char SrcReg[3];
44 };
45
46 struct loopinfo {
47 struct updatemask_state * Breaks;
48 unsigned int BreakCount;
49 unsigned int BreaksReserved;
50 };
51
52 struct branchinfo {
53 unsigned int HaveElse:1;
54
55 struct updatemask_state StoreEndif;
56 struct updatemask_state StoreElse;
57 };
58
59 struct deadcode_state {
60 struct radeon_compiler * C;
61 struct instruction_state * Instructions;
62
63 struct updatemask_state R;
64
65 struct branchinfo * BranchStack;
66 unsigned int BranchStackSize;
67 unsigned int BranchStackReserved;
68
69 struct loopinfo * LoopStack;
70 unsigned int LoopStackSize;
71 unsigned int LoopStackReserved;
72 };
73
74
75 static void or_updatemasks(
76 struct updatemask_state * dst,
77 struct updatemask_state * a,
78 struct updatemask_state * b)
79 {
80 for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
81 dst->Output[i] = a->Output[i] | b->Output[i];
82 dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
83 }
84
85 for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
86 dst->Special[i] = a->Special[i] | b->Special[i];
87
88 dst->Address = a->Address | b->Address;
89 }
90
91 static void push_break(struct deadcode_state *s)
92 {
93 struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1];
94 memory_pool_array_reserve(&s->C->Pool, struct updatemask_state,
95 loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1);
96
97 memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R));
98 }
99
100 static void push_loop(struct deadcode_state * s)
101 {
102 memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack,
103 s->LoopStackSize, s->LoopStackReserved, 1);
104 memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));
105 }
106
107 static void push_branch(struct deadcode_state * s)
108 {
109 struct branchinfo * branch;
110
111 memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
112 s->BranchStackSize, s->BranchStackReserved, 1);
113
114 branch = &s->BranchStack[s->BranchStackSize++];
115 branch->HaveElse = 0;
116 memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
117 }
118
119 static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
120 {
121 if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
122 if (index >= RC_REGISTER_MAX_INDEX) {
123 rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file);
124 return 0;
125 }
126
127 if (file == RC_FILE_OUTPUT)
128 return &s->R.Output[index];
129 else
130 return &s->R.Temporary[index];
131 } else if (file == RC_FILE_ADDRESS) {
132 return &s->R.Address;
133 } else if (file == RC_FILE_SPECIAL) {
134 if (index >= RC_NUM_SPECIAL_REGISTERS) {
135 rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index);
136 return 0;
137 }
138
139 return &s->R.Special[index];
140 }
141
142 return 0;
143 }
144
145 static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask)
146 {
147 unsigned char * pused = get_used_ptr(s, file, index);
148 if (pused)
149 *pused |= mask;
150 }
151
152 static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst)
153 {
154 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
155 struct instruction_state * insts = &s->Instructions[inst->IP];
156 unsigned int usedmask = 0;
157 unsigned int srcmasks[3];
158
159 if (opcode->HasDstReg) {
160 unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
161 if (pused) {
162 usedmask = *pused & inst->U.I.DstReg.WriteMask;
163 if (!inst->U.I.DstReg.RelAddr)
164 *pused &= ~usedmask;
165 }
166
167 if (inst->U.I.DstReg.RelAddr)
168 mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
169 }
170
171 insts->WriteMask |= usedmask;
172
173 if (inst->U.I.WriteALUResult) {
174 unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
175 if (pused && *pused) {
176 if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
177 usedmask |= RC_MASK_X;
178 else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
179 usedmask |= RC_MASK_W;
180
181 *pused = 0;
182 insts->WriteALUResult = 1;
183 }
184 }
185
186 rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
187
188 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
189 unsigned int refmask = 0;
190 unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
191 insts->SrcReg[src] |= newsrcmask;
192
193 for(unsigned int chan = 0; chan < 4; ++chan) {
194 if (GET_BIT(newsrcmask, chan))
195 refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
196 }
197
198 /* get rid of spurious bits from ZERO, ONE, etc. swizzles */
199 refmask &= RC_MASK_XYZW;
200
201 if (!refmask)
202 continue;
203
204 mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);
205
206 if (inst->U.I.SrcReg[src].RelAddr)
207 mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
208 }
209 }
210
211 static void mark_output_use(void * data, unsigned int index, unsigned int mask)
212 {
213 struct deadcode_state * s = data;
214
215 mark_used(s, RC_FILE_OUTPUT, index, mask);
216 }
217
218 void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
219 {
220 struct deadcode_state s;
221 unsigned int nr_instructions;
222 unsigned has_temp_reladdr_src = 0;
223 rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user;
224 unsigned int ip;
225
226 /* Give up if there is relative addressing of destination operands. */
227 for(struct rc_instruction * inst = c->Program.Instructions.Next;
228 inst != &c->Program.Instructions;
229 inst = inst->Next) {
230 const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
231 if (opcode->HasDstReg &&
232 inst->U.I.DstReg.WriteMask &&
233 inst->U.I.DstReg.RelAddr) {
234 return;
235 }
236 }
237
238 memset(&s, 0, sizeof(s));
239 s.C = c;
240
241 nr_instructions = rc_recompute_ips(c);
242 s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions);
243 memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions);
244
245 dce(c, &s, &mark_output_use);
246
247 for(struct rc_instruction * inst = c->Program.Instructions.Prev;
248 inst != &c->Program.Instructions;
249 inst = inst->Prev) {
250 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
251
252 switch(opcode->Opcode){
253 /* Mark all sources in the loop body as used before doing
254 * normal deadcode analysis. This is probably not optimal.
255 */
256 case RC_OPCODE_ENDLOOP:
257 {
258 int endloops = 1;
259 struct rc_instruction *ptr;
260 for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){
261 opcode = rc_get_opcode_info(ptr->U.I.Opcode);
262 if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
263 endloops--;
264 continue;
265 }
266 if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){
267 endloops++;
268 continue;
269 }
270 if(opcode->HasDstReg){
271 int src = 0;
272 unsigned int srcmasks[3];
273 rc_compute_sources_for_writemask(ptr,
274 ptr->U.I.DstReg.WriteMask, srcmasks);
275 for(src=0; src < opcode->NumSrcRegs; src++){
276 mark_used(&s,
277 ptr->U.I.SrcReg[src].File,
278 ptr->U.I.SrcReg[src].Index,
279 srcmasks[src]);
280 }
281 }
282 }
283 push_loop(&s);
284 break;
285 }
286 case RC_OPCODE_BRK:
287 push_break(&s);
288 break;
289 case RC_OPCODE_BGNLOOP:
290 {
291 unsigned int i;
292 struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1];
293 for(i = 0; i < loop->BreakCount; i++) {
294 or_updatemasks(&s.R, &s.R, &loop->Breaks[i]);
295 }
296 break;
297 }
298 case RC_OPCODE_CONT:
299 break;
300 case RC_OPCODE_ENDIF:
301 push_branch(&s);
302 break;
303 default:
304 if (opcode->IsFlowControl && s.BranchStackSize) {
305 struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
306 if (opcode->Opcode == RC_OPCODE_IF) {
307 or_updatemasks(&s.R,
308 &s.R,
309 branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
310
311 s.BranchStackSize--;
312 } else if (opcode->Opcode == RC_OPCODE_ELSE) {
313 if (branch->HaveElse) {
314 rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__);
315 } else {
316 memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
317 memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
318 branch->HaveElse = 1;
319 }
320 } else {
321 rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
322 }
323 }
324
325 if (!has_temp_reladdr_src) {
326 for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
327 if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY &&
328 inst->U.I.SrcReg[i].RelAddr) {
329 /* If there is a register read from a temporary file with relative addressing,
330 * mark all preceding written registers as used. */
331 for (struct rc_instruction *ptr = inst->Prev;
332 ptr != &c->Program.Instructions;
333 ptr = ptr->Prev) {
334 opcode = rc_get_opcode_info(ptr->U.I.Opcode);
335 if (opcode->HasDstReg &&
336 ptr->U.I.DstReg.File == RC_FILE_TEMPORARY &&
337 ptr->U.I.DstReg.WriteMask) {
338 mark_used(&s,
339 ptr->U.I.DstReg.File,
340 ptr->U.I.DstReg.Index,
341 ptr->U.I.DstReg.WriteMask);
342 }
343 }
344
345 has_temp_reladdr_src = 1;
346 break;
347 }
348 }
349 }
350 }
351
352 update_instruction(&s, inst);
353 }
354
355 ip = 0;
356 for(struct rc_instruction * inst = c->Program.Instructions.Next;
357 inst != &c->Program.Instructions;
358 inst = inst->Next, ++ip) {
359 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
360 int dead = 1;
361 unsigned int srcmasks[3];
362 unsigned int usemask;
363
364 if (!opcode->HasDstReg) {
365 dead = 0;
366 } else {
367 inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
368 if (s.Instructions[ip].WriteMask)
369 dead = 0;
370
371 if (s.Instructions[ip].WriteALUResult)
372 dead = 0;
373 else
374 inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
375 }
376
377 if (dead) {
378 struct rc_instruction * todelete = inst;
379 inst = inst->Prev;
380 rc_remove_instruction(todelete);
381 continue;
382 }
383
384 usemask = s.Instructions[ip].WriteMask;
385
386 if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
387 usemask |= RC_MASK_X;
388 else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
389 usemask |= RC_MASK_W;
390
391 rc_compute_sources_for_writemask(inst, usemask, srcmasks);
392
393 for(unsigned int src = 0; src < 3; ++src) {
394 for(unsigned int chan = 0; chan < 4; ++chan) {
395 if (!GET_BIT(srcmasks[src], chan))
396 SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
397 }
398 }
399 }
400
401 rc_calculate_inputs_outputs(c);
402 }