Merge branch 'glsl2-head' into glsl2
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_optimize.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 #include "radeon_dataflow.h"
29
30 #include "radeon_compiler.h"
31 #include "radeon_swizzle.h"
32
33
34 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
35 {
36 struct rc_src_register combine;
37 combine.File = inner.File;
38 combine.Index = inner.Index;
39 combine.RelAddr = inner.RelAddr;
40 if (outer.Abs) {
41 combine.Abs = 1;
42 combine.Negate = outer.Negate;
43 } else {
44 combine.Abs = inner.Abs;
45 combine.Negate = 0;
46 for(unsigned int chan = 0; chan < 4; ++chan) {
47 unsigned int swz = GET_SWZ(outer.Swizzle, chan);
48 if (swz < 4)
49 combine.Negate |= GET_BIT(inner.Negate, swz) << chan;
50 }
51 combine.Negate ^= outer.Negate;
52 }
53 combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
54 return combine;
55 }
56
57 struct peephole_state {
58 struct radeon_compiler * C;
59 struct rc_instruction * Mov;
60 unsigned int Conflict:1;
61
62 /** Whether Mov's source has been clobbered */
63 unsigned int SourceClobbered:1;
64
65 /** Which components of Mov's destination register are still from that Mov? */
66 unsigned int MovMask:4;
67
68 /** Which components of Mov's destination register are clearly *not* from that Mov */
69 unsigned int DefinedMask:4;
70
71 /** Which components of Mov's source register are sourced */
72 unsigned int SourcedMask:4;
73
74 /** Branch depth beyond Mov; negative value indicates we left the Mov's block */
75 int BranchDepth;
76 };
77
78 static void peephole_scan_read(void * data, struct rc_instruction * inst,
79 rc_register_file file, unsigned int index, unsigned int mask)
80 {
81 struct peephole_state * s = data;
82
83 if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index)
84 return;
85
86 /* These instructions cannot read from the constants file.
87 * see radeonTransformTEX()
88 */
89 if(s->Mov->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
90 s->Mov->U.I.SrcReg[0].File != RC_FILE_INPUT &&
91 (inst->U.I.Opcode == RC_OPCODE_TEX ||
92 inst->U.I.Opcode == RC_OPCODE_TXB ||
93 inst->U.I.Opcode == RC_OPCODE_TXP ||
94 inst->U.I.Opcode == RC_OPCODE_KIL)){
95 s->Conflict = 1;
96 return;
97 }
98 if ((mask & s->MovMask) == mask) {
99 if (s->SourceClobbered) {
100 s->Conflict = 1;
101 }
102 } else if ((mask & s->DefinedMask) == mask) {
103 /* read from something entirely written by other instruction: this is okay */
104 } else {
105 /* read from component combination that is not well-defined without
106 * the MOV: cannot remove it */
107 s->Conflict = 1;
108 }
109 }
110
111 static void peephole_scan_write(void * data, struct rc_instruction * inst,
112 rc_register_file file, unsigned int index, unsigned int mask)
113 {
114 struct peephole_state * s = data;
115
116 if (s->BranchDepth < 0)
117 return;
118
119 if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) {
120 s->MovMask &= ~mask;
121 if (s->BranchDepth == 0)
122 s->DefinedMask |= mask;
123 else
124 s->DefinedMask &= ~mask;
125 }
126 if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) {
127 if (mask & s->SourcedMask)
128 s->SourceClobbered = 1;
129 } else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) {
130 s->SourceClobbered = 1;
131 }
132 }
133
134 static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mov)
135 {
136 struct peephole_state s;
137
138 if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || inst_mov->U.I.WriteALUResult)
139 return;
140
141 memset(&s, 0, sizeof(s));
142 s.C = c;
143 s.Mov = inst_mov;
144 s.MovMask = inst_mov->U.I.DstReg.WriteMask;
145 s.DefinedMask = RC_MASK_XYZW & ~s.MovMask;
146
147 for(unsigned int chan = 0; chan < 4; ++chan) {
148 unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan);
149 s.SourcedMask |= (1 << swz) & RC_MASK_XYZW;
150 }
151
152 /* 1st pass: Check whether all subsequent readers can be changed */
153 for(struct rc_instruction * inst = inst_mov->Next;
154 inst != &c->Program.Instructions;
155 inst = inst->Next) {
156 rc_for_all_reads_mask(inst, peephole_scan_read, &s);
157 rc_for_all_writes_mask(inst, peephole_scan_write, &s);
158 if (s.Conflict)
159 return;
160
161 if (s.BranchDepth >= 0) {
162 if (inst->U.I.Opcode == RC_OPCODE_IF) {
163 s.BranchDepth++;
164 } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
165 s.BranchDepth--;
166 if (s.BranchDepth < 0) {
167 s.DefinedMask &= ~s.MovMask;
168 s.MovMask = 0;
169 }
170 }
171 }
172 }
173
174 if (s.Conflict)
175 return;
176
177 /* 2nd pass: We can satisfy all readers, so switch them over all at once */
178 s.MovMask = inst_mov->U.I.DstReg.WriteMask;
179 s.BranchDepth = 0;
180
181 for(struct rc_instruction * inst = inst_mov->Next;
182 inst != &c->Program.Instructions;
183 inst = inst->Next) {
184 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
185
186 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
187 if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY &&
188 inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) {
189 unsigned int refmask = 0;
190
191 for(unsigned int chan = 0; chan < 4; ++chan) {
192 unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
193 refmask |= (1 << swz) & RC_MASK_XYZW;
194 }
195
196 if ((refmask & s.MovMask) == refmask)
197 inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]);
198 }
199 }
200
201 if (opcode->HasDstReg) {
202 if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY &&
203 inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) {
204 s.MovMask &= ~inst->U.I.DstReg.WriteMask;
205 }
206 }
207
208 if (s.BranchDepth >= 0) {
209 if (inst->U.I.Opcode == RC_OPCODE_IF) {
210 s.BranchDepth++;
211 } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
212 s.BranchDepth--;
213 if (s.BranchDepth < 0)
214 break; /* no more readers after this point */
215 }
216 }
217 }
218
219 /* Finally, remove the original MOV instruction */
220 rc_remove_instruction(inst_mov);
221 }
222
223 /**
224 * Check if a source register is actually always the same
225 * swizzle constant.
226 */
227 static int is_src_uniform_constant(struct rc_src_register src,
228 rc_swizzle * pswz, unsigned int * pnegate)
229 {
230 int have_used = 0;
231
232 if (src.File != RC_FILE_NONE) {
233 *pswz = 0;
234 return 0;
235 }
236
237 for(unsigned int chan = 0; chan < 4; ++chan) {
238 unsigned int swz = GET_SWZ(src.Swizzle, chan);
239 if (swz < 4) {
240 *pswz = 0;
241 return 0;
242 }
243 if (swz == RC_SWIZZLE_UNUSED)
244 continue;
245
246 if (!have_used) {
247 *pswz = swz;
248 *pnegate = GET_BIT(src.Negate, chan);
249 have_used = 1;
250 } else {
251 if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
252 *pswz = 0;
253 return 0;
254 }
255 }
256 }
257
258 return 1;
259 }
260
261
262 static void constant_folding_mad(struct rc_instruction * inst)
263 {
264 rc_swizzle swz;
265 unsigned int negate;
266
267 if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
268 if (swz == RC_SWIZZLE_ZERO) {
269 inst->U.I.Opcode = RC_OPCODE_MUL;
270 return;
271 }
272 }
273
274 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
275 if (swz == RC_SWIZZLE_ONE) {
276 inst->U.I.Opcode = RC_OPCODE_ADD;
277 if (negate)
278 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
279 inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
280 return;
281 } else if (swz == RC_SWIZZLE_ZERO) {
282 inst->U.I.Opcode = RC_OPCODE_MOV;
283 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
284 return;
285 }
286 }
287
288 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
289 if (swz == RC_SWIZZLE_ONE) {
290 inst->U.I.Opcode = RC_OPCODE_ADD;
291 if (negate)
292 inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
293 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
294 return;
295 } else if (swz == RC_SWIZZLE_ZERO) {
296 inst->U.I.Opcode = RC_OPCODE_MOV;
297 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
298 return;
299 }
300 }
301 }
302
303 static void constant_folding_mul(struct rc_instruction * inst)
304 {
305 rc_swizzle swz;
306 unsigned int negate;
307
308 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
309 if (swz == RC_SWIZZLE_ONE) {
310 inst->U.I.Opcode = RC_OPCODE_MOV;
311 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
312 if (negate)
313 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
314 return;
315 } else if (swz == RC_SWIZZLE_ZERO) {
316 inst->U.I.Opcode = RC_OPCODE_MOV;
317 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
318 return;
319 }
320 }
321
322 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
323 if (swz == RC_SWIZZLE_ONE) {
324 inst->U.I.Opcode = RC_OPCODE_MOV;
325 if (negate)
326 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
327 return;
328 } else if (swz == RC_SWIZZLE_ZERO) {
329 inst->U.I.Opcode = RC_OPCODE_MOV;
330 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
331 return;
332 }
333 }
334 }
335
336 static void constant_folding_add(struct rc_instruction * inst)
337 {
338 rc_swizzle swz;
339 unsigned int negate;
340
341 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
342 if (swz == RC_SWIZZLE_ZERO) {
343 inst->U.I.Opcode = RC_OPCODE_MOV;
344 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
345 return;
346 }
347 }
348
349 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
350 if (swz == RC_SWIZZLE_ZERO) {
351 inst->U.I.Opcode = RC_OPCODE_MOV;
352 return;
353 }
354 }
355 }
356
357
358 /**
359 * Replace 0.0, 1.0 and 0.5 immediate constants by their
360 * respective swizzles. Simplify instructions like ADD dst, src, 0;
361 */
362 static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
363 {
364 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
365
366 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
367 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
368 if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
369 inst->U.I.SrcReg[src].RelAddr ||
370 inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
371 continue;
372
373 struct rc_constant * constant =
374 &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
375
376 if (constant->Type != RC_CONSTANT_IMMEDIATE)
377 continue;
378
379 struct rc_src_register newsrc = inst->U.I.SrcReg[src];
380 int have_real_reference = 0;
381 for(unsigned int chan = 0; chan < 4; ++chan) {
382 unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
383 if (swz >= 4)
384 continue;
385
386 unsigned int newswz;
387 float imm = constant->u.Immediate[swz];
388 float baseimm = imm;
389 if (imm < 0.0)
390 baseimm = -baseimm;
391
392 if (baseimm == 0.0) {
393 newswz = RC_SWIZZLE_ZERO;
394 } else if (baseimm == 1.0) {
395 newswz = RC_SWIZZLE_ONE;
396 } else if (baseimm == 0.5) {
397 newswz = RC_SWIZZLE_HALF;
398 } else {
399 have_real_reference = 1;
400 continue;
401 }
402
403 SET_SWZ(newsrc.Swizzle, chan, newswz);
404 if (imm < 0.0 && !newsrc.Abs)
405 newsrc.Negate ^= 1 << chan;
406 }
407
408 if (!have_real_reference) {
409 newsrc.File = RC_FILE_NONE;
410 newsrc.Index = 0;
411 }
412
413 /* don't make the swizzle worse */
414 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
415 c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
416 continue;
417
418 inst->U.I.SrcReg[src] = newsrc;
419 }
420
421 /* Simplify instructions based on constants */
422 if (inst->U.I.Opcode == RC_OPCODE_MAD)
423 constant_folding_mad(inst);
424
425 /* note: MAD can simplify to MUL or ADD */
426 if (inst->U.I.Opcode == RC_OPCODE_MUL)
427 constant_folding_mul(inst);
428 else if (inst->U.I.Opcode == RC_OPCODE_ADD)
429 constant_folding_add(inst);
430 }
431
432 void rc_optimize(struct radeon_compiler * c)
433 {
434 struct rc_instruction * inst = c->Program.Instructions.Next;
435 while(inst != &c->Program.Instructions) {
436 struct rc_instruction * cur = inst;
437 inst = inst->Next;
438
439 constant_folding(c, cur);
440
441 if (cur->U.I.Opcode == RC_OPCODE_MOV) {
442 peephole(c, cur);
443 /* cur may no longer be part of the program */
444 }
445 }
446 }