Merge branch 'mesa_7_5_branch'
[mesa.git] / src / mesa / drivers / dri / r300 / radeon_nqssadce.c
1 /*
2 * Copyright (C) 2008 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * @file
30 *
31 * "Not-quite SSA" and Dead-Code Elimination.
32 *
33 * @note This code uses SWIZZLE_NIL in a source register to indicate that
34 * the corresponding component is ignored by the corresponding instruction.
35 */
36
37 #include "radeon_nqssadce.h"
38
39
40 /**
41 * Return the @ref register_state for the given register (or 0 for untracked
42 * registers, i.e. constants).
43 */
44 static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index)
45 {
46 switch(file) {
47 case PROGRAM_TEMPORARY: return &s->Temps[index];
48 case PROGRAM_OUTPUT: return &s->Outputs[index];
49 default: return 0;
50 }
51 }
52
53
54 /**
55 * Left multiplication of a register with a swizzle
56 *
57 * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles.
58 */
59 static struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg)
60 {
61 struct prog_src_register tmp = srcreg;
62 int i;
63 tmp.Swizzle = 0;
64 tmp.Negate = NEGATE_NONE;
65 for(i = 0; i < 4; ++i) {
66 GLuint swz = GET_SWZ(swizzle, i);
67 if (swz < 4) {
68 tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
69 tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
70 } else {
71 tmp.Swizzle |= swz << (i*3);
72 }
73 }
74 return tmp;
75 }
76
77
78 static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
79 struct prog_instruction *inst, GLint src, GLuint sourced)
80 {
81 int i;
82 GLuint deswz_source = 0;
83
84 for(i = 0; i < 4; ++i) {
85 if (GET_BIT(sourced, i)) {
86 GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i);
87 deswz_source |= 1 << swz;
88 } else {
89 inst->SrcReg[src].Swizzle &= ~(7 << (3*i));
90 inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
91 }
92 }
93
94 if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) {
95 struct prog_dst_register dstreg = inst->DstReg;
96 dstreg.File = PROGRAM_TEMPORARY;
97 dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
98 dstreg.WriteMask = sourced;
99
100 s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]);
101
102 inst = s->Program->Instructions + s->IP;
103 inst->SrcReg[src].File = PROGRAM_TEMPORARY;
104 inst->SrcReg[src].Index = dstreg.Index;
105 inst->SrcReg[src].Swizzle = 0;
106 inst->SrcReg[src].Negate = NEGATE_NONE;
107 inst->SrcReg[src].Abs = 0;
108 for(i = 0; i < 4; ++i) {
109 if (GET_BIT(sourced, i))
110 inst->SrcReg[src].Swizzle |= i << (3*i);
111 else
112 inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
113 }
114 deswz_source = sourced;
115 }
116
117 struct register_state *regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
118 if (regstate)
119 regstate->Sourced |= deswz_source & 0xf;
120
121 return inst;
122 }
123
124
125 static void rewrite_depth_out(struct prog_instruction *inst)
126 {
127 if (inst->DstReg.WriteMask & WRITEMASK_Z) {
128 inst->DstReg.WriteMask = WRITEMASK_W;
129 } else {
130 inst->DstReg.WriteMask = 0;
131 return;
132 }
133
134 switch (inst->Opcode) {
135 case OPCODE_FRC:
136 case OPCODE_MOV:
137 inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
138 break;
139 case OPCODE_ADD:
140 case OPCODE_MAX:
141 case OPCODE_MIN:
142 case OPCODE_MUL:
143 inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
144 inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
145 break;
146 case OPCODE_CMP:
147 case OPCODE_MAD:
148 inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
149 inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
150 inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]);
151 break;
152 default:
153 // Scalar instructions needn't be reswizzled
154 break;
155 }
156 }
157
158 static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex)
159 {
160 int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
161 int i;
162 for(i = 0; i < nsrc; ++i)
163 if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex)
164 inst->SrcReg[i].Index = newindex;
165 }
166
167 static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex)
168 {
169 GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
170 int ip;
171 for(ip = 0; ip < s->IP; ++ip) {
172 struct prog_instruction* inst = s->Program->Instructions + ip;
173 if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex)
174 inst->DstReg.Index = newindex;
175 unalias_srcregs(inst, oldindex, newindex);
176 }
177 unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex);
178 }
179
180
181 /**
182 * Handle one instruction.
183 */
184 static void process_instruction(struct nqssadce_state* s)
185 {
186 struct prog_instruction *inst = s->Program->Instructions + s->IP;
187
188 if (inst->Opcode == OPCODE_END)
189 return;
190
191 if (inst->Opcode != OPCODE_KIL) {
192 if (s->Descr->RewriteDepthOut) {
193 if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPTH)
194 rewrite_depth_out(inst);
195 }
196
197 struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index);
198 if (!regstate) {
199 _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n",
200 inst->DstReg.File, inst->DstReg.Index);
201 return;
202 }
203
204 inst->DstReg.WriteMask &= regstate->Sourced;
205 regstate->Sourced &= ~inst->DstReg.WriteMask;
206
207 if (inst->DstReg.WriteMask == 0) {
208 _mesa_delete_instructions(s->Program, s->IP, 1);
209 return;
210 }
211
212 if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced)
213 unalias_temporary(s, inst->DstReg.Index);
214 }
215
216 /* Attention: Due to swizzle emulation code, the following
217 * might change the instruction stream under us, so we have
218 * to be careful with the inst pointer. */
219 switch (inst->Opcode) {
220 case OPCODE_DDX:
221 case OPCODE_DDY:
222 case OPCODE_FRC:
223 case OPCODE_MOV:
224 inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
225 break;
226 case OPCODE_ADD:
227 case OPCODE_MAX:
228 case OPCODE_MIN:
229 case OPCODE_MUL:
230 inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
231 inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
232 break;
233 case OPCODE_CMP:
234 case OPCODE_MAD:
235 inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
236 inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
237 inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask);
238 break;
239 case OPCODE_COS:
240 case OPCODE_EX2:
241 case OPCODE_LG2:
242 case OPCODE_RCP:
243 case OPCODE_RSQ:
244 case OPCODE_SIN:
245 inst = track_used_srcreg(s, inst, 0, 0x1);
246 break;
247 case OPCODE_DP3:
248 inst = track_used_srcreg(s, inst, 0, 0x7);
249 inst = track_used_srcreg(s, inst, 1, 0x7);
250 break;
251 case OPCODE_DP4:
252 inst = track_used_srcreg(s, inst, 0, 0xf);
253 inst = track_used_srcreg(s, inst, 1, 0xf);
254 break;
255 case OPCODE_KIL:
256 case OPCODE_TEX:
257 case OPCODE_TXB:
258 case OPCODE_TXP:
259 inst = track_used_srcreg(s, inst, 0, 0xf);
260 break;
261 default:
262 _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
263 return;
264 }
265 }
266
267
268 void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr)
269 {
270 struct nqssadce_state s;
271
272 _mesa_bzero(&s, sizeof(s));
273 s.Ctx = ctx;
274 s.Program = p;
275 s.Descr = descr;
276 s.Descr->Init(&s);
277 s.IP = p->NumInstructions;
278
279 while(s.IP > 0) {
280 s.IP--;
281 process_instruction(&s);
282 }
283 }