41d175a22f2974c8698eaf95f357a46d61a44130
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_dataflow_annotate.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 #include "radeon_dataflow.h"
29
30 #include "radeon_compiler.h"
31
32
33 struct dataflow_state {
34 struct radeon_compiler * C;
35 unsigned int DCE:1;
36 unsigned int UpdateRunning:1;
37
38 struct rc_dataflow_vector * Input[RC_REGISTER_MAX_INDEX];
39 struct rc_dataflow_vector * Output[RC_REGISTER_MAX_INDEX];
40 struct rc_dataflow_vector * Temporary[RC_REGISTER_MAX_INDEX];
41 struct rc_dataflow_vector * Address;
42
43 struct rc_dataflow_vector ** UpdateStack;
44 unsigned int UpdateStackSize;
45 unsigned int UpdateStackReserved;
46 };
47
48 static void mark_vector_use(struct dataflow_state * s, struct rc_dataflow_vector * vector, unsigned int mask);
49
50 static struct rc_dataflow_vector * get_register_contents(struct dataflow_state * s,
51 rc_register_file file, unsigned int index)
52 {
53 if (file == RC_FILE_INPUT || file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
54 if (index >= RC_REGISTER_MAX_INDEX)
55 return 0; /* cannot happen, but be defensive */
56
57 if (file == RC_FILE_TEMPORARY)
58 return s->Temporary[index];
59 if (file == RC_FILE_INPUT)
60 return s->Input[index];
61 if (file == RC_FILE_OUTPUT)
62 return s->Output[index];
63 }
64
65 if (file == RC_FILE_ADDRESS)
66 return s->Address;
67
68 return 0; /* can happen, constant register file */
69 }
70
71 static void mark_ref_use(struct dataflow_state * s, struct rc_dataflow_ref * ref, unsigned int mask)
72 {
73 if (!(mask & ~ref->UseMask))
74 return;
75
76 ref->UseMask |= mask;
77 mark_vector_use(s, ref->Vector, ref->UseMask);
78 }
79
80 static void mark_source_use(struct dataflow_state * s, struct rc_instruction * inst,
81 unsigned int src, unsigned int srcmask)
82 {
83 unsigned int refmask = 0;
84
85 for(unsigned int i = 0; i < 4; ++i) {
86 if (GET_BIT(srcmask, i))
87 refmask |= 1 << GET_SWZ(inst->I.SrcReg[src].Swizzle, i);
88 }
89
90 /* get rid of spurious bits from ZERO, ONE, etc. swizzles */
91 refmask &= RC_MASK_XYZW;
92
93 if (!refmask)
94 return; /* can happen if the swizzle contains constant components */
95
96 if (inst->Dataflow.SrcReg[src])
97 mark_ref_use(s, inst->Dataflow.SrcReg[src], refmask);
98
99 if (inst->Dataflow.SrcRegAddress[src])
100 mark_ref_use(s, inst->Dataflow.SrcRegAddress[src], RC_MASK_X);
101 }
102
103 static void compute_sources_for_writemask(
104 struct rc_instruction * inst,
105 unsigned int writemask,
106 unsigned int *srcmasks)
107 {
108 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
109
110 srcmasks[0] = 0;
111 srcmasks[1] = 0;
112 srcmasks[2] = 0;
113
114 if (inst->I.Opcode == RC_OPCODE_KIL)
115 srcmasks[0] |= RC_MASK_XYZW;
116
117 if (!writemask)
118 return;
119
120 if (opcode->IsComponentwise) {
121 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
122 srcmasks[src] |= writemask;
123 } else if (opcode->IsStandardScalar) {
124 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
125 srcmasks[src] |= RC_MASK_X;
126 } else {
127 switch(inst->I.Opcode) {
128 case RC_OPCODE_ARL:
129 srcmasks[0] |= RC_MASK_X;
130 break;
131 case RC_OPCODE_DP3:
132 srcmasks[0] |= RC_MASK_XYZ;
133 srcmasks[1] |= RC_MASK_XYZ;
134 break;
135 case RC_OPCODE_DP4:
136 srcmasks[0] |= RC_MASK_XYZW;
137 srcmasks[1] |= RC_MASK_XYZW;
138 break;
139 case RC_OPCODE_TEX:
140 case RC_OPCODE_TXB:
141 case RC_OPCODE_TXP:
142 srcmasks[0] |= RC_MASK_XYZW;
143 break;
144 case RC_OPCODE_DST:
145 srcmasks[0] |= 0x6;
146 srcmasks[1] |= 0xa;
147 break;
148 case RC_OPCODE_EXP:
149 case RC_OPCODE_LOG:
150 srcmasks[0] |= RC_MASK_XY;
151 break;
152 case RC_OPCODE_LIT:
153 srcmasks[0] |= 0xb;
154 break;
155 default:
156 break;
157 }
158 }
159 }
160
161 static void mark_instruction_source_use(struct dataflow_state * s,
162 struct rc_instruction * inst, unsigned int writemask)
163 {
164 unsigned int srcmasks[3];
165
166 compute_sources_for_writemask(inst, writemask, srcmasks);
167
168 for(unsigned int src = 0; src < 3; ++src)
169 mark_source_use(s, inst, src, srcmasks[src]);
170 }
171
172 static void run_update(struct dataflow_state * s)
173 {
174 s->UpdateRunning = 1;
175
176 while(s->UpdateStackSize) {
177 struct rc_dataflow_vector * vector = s->UpdateStack[--s->UpdateStackSize];
178 vector->PassBit = 0;
179
180 if (vector->WriteInstruction) {
181 struct rc_instruction * inst = vector->WriteInstruction;
182
183 if (inst->Dataflow.DstRegPrev) {
184 unsigned int carryover = vector->UseMask & ~inst->I.DstReg.WriteMask;
185
186 if (carryover)
187 mark_ref_use(s, inst->Dataflow.DstRegPrev, carryover);
188 }
189
190 mark_instruction_source_use(
191 s, vector->WriteInstruction,
192 vector->UseMask & inst->I.DstReg.WriteMask);
193 }
194 }
195
196 s->UpdateRunning = 0;
197 }
198
199 static void mark_vector_use(struct dataflow_state * s, struct rc_dataflow_vector * vector, unsigned int mask)
200 {
201 if (!(mask & ~vector->UseMask))
202 return; /* no new used bits */
203
204 vector->UseMask |= mask;
205 if (vector->PassBit)
206 return;
207
208 if (s->UpdateStackSize >= s->UpdateStackReserved) {
209 unsigned int new_reserve = 2 * s->UpdateStackReserved;
210 struct rc_dataflow_vector ** new_stack;
211
212 if (!new_reserve)
213 new_reserve = 16;
214
215 new_stack = memory_pool_malloc(&s->C->Pool, new_reserve * sizeof(struct rc_dataflow_vector *));
216 memcpy(new_stack, s->UpdateStack, s->UpdateStackSize * sizeof(struct rc_dataflow_vector *));
217
218 s->UpdateStack = new_stack;
219 s->UpdateStackReserved = new_reserve;
220 }
221
222 s->UpdateStack[s->UpdateStackSize++] = vector;
223 vector->PassBit = 1;
224
225 if (!s->UpdateRunning)
226 run_update(s);
227 }
228
229 static void annotate_instruction(struct dataflow_state * s, struct rc_instruction * inst)
230 {
231 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
232 unsigned int src;
233
234 for(src = 0; src < opcode->NumSrcRegs; ++src) {
235 struct rc_dataflow_vector * vector = get_register_contents(s, inst->I.SrcReg[src].File, inst->I.SrcReg[src].Index);
236 if (vector) {
237 inst->Dataflow.SrcReg[src] = rc_dataflow_create_ref(s->C, vector, inst);
238 }
239 if (inst->I.SrcReg[src].RelAddr) {
240 struct rc_dataflow_vector * addr = get_register_contents(s, RC_FILE_ADDRESS, 0);
241 if (addr)
242 inst->Dataflow.SrcRegAddress[src] = rc_dataflow_create_ref(s->C, addr, inst);
243 }
244 }
245
246 mark_instruction_source_use(s, inst, 0); /* for KIL */
247
248 if (opcode->HasDstReg) {
249 struct rc_dataflow_vector * oldvec = get_register_contents(s, inst->I.DstReg.File, inst->I.DstReg.Index);
250 struct rc_dataflow_vector * newvec = rc_dataflow_create_vector(s->C, inst->I.DstReg.File, inst->I.DstReg.Index, inst);
251
252 newvec->ValidMask = inst->I.DstReg.WriteMask;
253
254 if (oldvec) {
255 unsigned int carryover = oldvec->ValidMask & ~inst->I.DstReg.WriteMask;
256
257 if (oldvec->ValidMask)
258 inst->Dataflow.DstRegAliased = 1;
259
260 if (carryover) {
261 inst->Dataflow.DstRegPrev = rc_dataflow_create_ref(s->C, oldvec, inst);
262 newvec->ValidMask |= carryover;
263
264 if (!s->DCE)
265 mark_ref_use(s, inst->Dataflow.DstRegPrev, carryover);
266 }
267 }
268
269 inst->Dataflow.DstReg = newvec;
270
271 if (newvec->File == RC_FILE_TEMPORARY)
272 s->Temporary[newvec->Index] = newvec;
273 else if (newvec->File == RC_FILE_OUTPUT)
274 s->Output[newvec->Index] = newvec;
275 else
276 s->Address = newvec;
277
278 if (!s->DCE)
279 mark_vector_use(s, newvec, inst->I.DstReg.WriteMask);
280 }
281 }
282
283 static void init_inputs(struct dataflow_state * s)
284 {
285 unsigned int index;
286
287 for(index = 0; index < 32; ++index) {
288 if (s->C->Program.InputsRead & (1 << index)) {
289 s->Input[index] = rc_dataflow_create_vector(s->C, RC_FILE_INPUT, index, 0);
290 s->Input[index]->ValidMask = RC_MASK_XYZW;
291 }
292 }
293 }
294
295 static void mark_output_use(void * data, unsigned int index, unsigned int mask)
296 {
297 struct dataflow_state * s = data;
298 struct rc_dataflow_vector * vec = s->Output[index];
299
300 if (vec)
301 mark_vector_use(s, vec, mask);
302 }
303
304 void rc_dataflow_annotate(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata)
305 {
306 struct dataflow_state s;
307 struct rc_instruction * inst;
308
309 memset(&s, 0, sizeof(s));
310 s.C = c;
311 s.DCE = dce ? 1 : 0;
312
313 init_inputs(&s);
314
315 for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
316 annotate_instruction(&s, inst);
317 }
318
319 if (s.DCE) {
320 dce(userdata, &s, &mark_output_use);
321
322 for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
323 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
324
325 if (opcode->HasDstReg) {
326 unsigned int redundant_writes = inst->I.DstReg.WriteMask & ~inst->Dataflow.DstReg->UseMask;
327
328 inst->I.DstReg.WriteMask &= ~redundant_writes;
329
330 if (!inst->I.DstReg.WriteMask) {
331 struct rc_instruction * todelete = inst;
332 inst = inst->Prev;
333 rc_remove_instruction(todelete);
334 continue;
335 }
336 }
337
338 unsigned int srcmasks[3];
339 compute_sources_for_writemask(inst, inst->I.DstReg.WriteMask, srcmasks);
340
341 for(unsigned int src = 0; src < 3; ++src) {
342 for(unsigned int chan = 0; chan < 4; ++chan) {
343 if (!GET_BIT(srcmasks[src], chan))
344 SET_SWZ(inst->I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
345 }
346
347 if (inst->Dataflow.SrcReg[src]) {
348 if (!inst->Dataflow.SrcReg[src]->UseMask) {
349 rc_dataflow_remove_ref(inst->Dataflow.SrcReg[src]);
350 inst->Dataflow.SrcReg[src] = 0;
351 }
352 }
353
354 if (inst->Dataflow.SrcRegAddress[src]) {
355 if (!inst->Dataflow.SrcRegAddress[src]->UseMask) {
356 rc_dataflow_remove_ref(inst->Dataflow.SrcRegAddress[src]);
357 inst->Dataflow.SrcRegAddress[src] = 0;
358 }
359 }
360 }
361 }
362
363 rc_calculate_inputs_outputs(c);
364 }
365 }