Merge commit 'origin/gallium-0.1'
[mesa.git] / src / mesa / drivers / dri / r300 / radeon_nqssadce.c
1 /*
2 * Copyright (C) 2008 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * @file
30 *
31 * "Not-quite SSA" and Dead-Code Elimination.
32 *
33 * @note This code uses SWIZZLE_NIL in a source register to indicate that
34 * the corresponding component is ignored by the corresponding instruction.
35 */
36
37 #include "radeon_nqssadce.h"
38
39
40 /**
41 * Return the @ref register_state for the given register (or 0 for untracked
42 * registers, i.e. constants).
43 */
44 static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index)
45 {
46 switch(file) {
47 case PROGRAM_TEMPORARY: return &s->Temps[index];
48 case PROGRAM_OUTPUT: return &s->Outputs[index];
49 default: return 0;
50 }
51 }
52
53
54 /**
55 * Left multiplication of a register with a swizzle
56 *
57 * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles.
58 */
59 static struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg)
60 {
61 struct prog_src_register tmp = srcreg;
62 int i;
63 tmp.Swizzle = 0;
64 tmp.NegateBase = 0;
65 for(i = 0; i < 4; ++i) {
66 GLuint swz = GET_SWZ(swizzle, i);
67 if (swz < 4) {
68 tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
69 tmp.NegateBase |= GET_BIT(srcreg.NegateBase, swz) << i;
70 } else {
71 tmp.Swizzle |= swz << (i*3);
72 }
73 }
74 return tmp;
75 }
76
77
78 static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
79 struct prog_instruction *inst, GLint src, GLuint sourced)
80 {
81 int i;
82 GLuint deswz_source = 0;
83
84 for(i = 0; i < 4; ++i) {
85 if (GET_BIT(sourced, i)) {
86 GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i);
87 deswz_source |= 1 << swz;
88 } else {
89 inst->SrcReg[src].Swizzle &= ~(7 << (3*i));
90 inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
91 }
92 }
93
94 if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) {
95 struct prog_dst_register dstreg = inst->DstReg;
96 dstreg.File = PROGRAM_TEMPORARY;
97 dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
98 dstreg.WriteMask = sourced;
99
100 s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]);
101
102 inst = s->Program->Instructions + s->IP;
103 inst->SrcReg[src].File = PROGRAM_TEMPORARY;
104 inst->SrcReg[src].Index = dstreg.Index;
105 inst->SrcReg[src].Swizzle = 0;
106 inst->SrcReg[src].NegateBase = 0;
107 inst->SrcReg[src].Abs = 0;
108 inst->SrcReg[src].NegateAbs = 0;
109 for(i = 0; i < 4; ++i) {
110 if (GET_BIT(sourced, i))
111 inst->SrcReg[src].Swizzle |= i << (3*i);
112 else
113 inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
114 }
115 deswz_source = sourced;
116 }
117
118 struct register_state *regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
119 if (regstate)
120 regstate->Sourced |= deswz_source & 0xf;
121
122 return inst;
123 }
124
125
126 static void rewrite_depth_out(struct prog_instruction *inst)
127 {
128 if (inst->DstReg.WriteMask & WRITEMASK_Z) {
129 inst->DstReg.WriteMask = WRITEMASK_W;
130 } else {
131 inst->DstReg.WriteMask = 0;
132 return;
133 }
134
135 switch (inst->Opcode) {
136 case OPCODE_FRC:
137 case OPCODE_MOV:
138 inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
139 break;
140 case OPCODE_ADD:
141 case OPCODE_MAX:
142 case OPCODE_MIN:
143 case OPCODE_MUL:
144 inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
145 inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
146 break;
147 case OPCODE_CMP:
148 case OPCODE_MAD:
149 inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
150 inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
151 inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]);
152 break;
153 default:
154 // Scalar instructions needn't be reswizzled
155 break;
156 }
157 }
158
159 static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex)
160 {
161 int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
162 int i;
163 for(i = 0; i < nsrc; ++i)
164 if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex)
165 inst->SrcReg[i].Index = newindex;
166 }
167
168 static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex)
169 {
170 GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
171 int ip;
172 for(ip = 0; ip < s->IP; ++ip) {
173 struct prog_instruction* inst = s->Program->Instructions + ip;
174 if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex)
175 inst->DstReg.Index = newindex;
176 unalias_srcregs(inst, oldindex, newindex);
177 }
178 unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex);
179 }
180
181
182 /**
183 * Handle one instruction.
184 */
185 static void process_instruction(struct nqssadce_state* s)
186 {
187 struct prog_instruction *inst = s->Program->Instructions + s->IP;
188
189 if (inst->Opcode == OPCODE_END)
190 return;
191
192 if (inst->Opcode != OPCODE_KIL) {
193 if (s->Descr->RewriteDepthOut) {
194 if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPTH)
195 rewrite_depth_out(inst);
196 }
197
198 struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index);
199 if (!regstate) {
200 _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n",
201 inst->DstReg.File, inst->DstReg.Index);
202 return;
203 }
204
205 inst->DstReg.WriteMask &= regstate->Sourced;
206 regstate->Sourced &= ~inst->DstReg.WriteMask;
207
208 if (inst->DstReg.WriteMask == 0) {
209 _mesa_delete_instructions(s->Program, s->IP, 1);
210 return;
211 }
212
213 if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced)
214 unalias_temporary(s, inst->DstReg.Index);
215 }
216
217 /* Attention: Due to swizzle emulation code, the following
218 * might change the instruction stream under us, so we have
219 * to be careful with the inst pointer. */
220 switch (inst->Opcode) {
221 case OPCODE_DDX:
222 case OPCODE_DDY:
223 case OPCODE_FRC:
224 case OPCODE_MOV:
225 inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
226 break;
227 case OPCODE_ADD:
228 case OPCODE_MAX:
229 case OPCODE_MIN:
230 case OPCODE_MUL:
231 inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
232 inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
233 break;
234 case OPCODE_CMP:
235 case OPCODE_MAD:
236 inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
237 inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
238 inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask);
239 break;
240 case OPCODE_COS:
241 case OPCODE_EX2:
242 case OPCODE_LG2:
243 case OPCODE_RCP:
244 case OPCODE_RSQ:
245 case OPCODE_SIN:
246 inst = track_used_srcreg(s, inst, 0, 0x1);
247 break;
248 case OPCODE_DP3:
249 inst = track_used_srcreg(s, inst, 0, 0x7);
250 inst = track_used_srcreg(s, inst, 1, 0x7);
251 break;
252 case OPCODE_DP4:
253 inst = track_used_srcreg(s, inst, 0, 0xf);
254 inst = track_used_srcreg(s, inst, 1, 0xf);
255 break;
256 case OPCODE_KIL:
257 case OPCODE_TEX:
258 case OPCODE_TXB:
259 case OPCODE_TXP:
260 inst = track_used_srcreg(s, inst, 0, 0xf);
261 break;
262 default:
263 _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
264 return;
265 }
266 }
267
268
269 void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr)
270 {
271 struct nqssadce_state s;
272
273 _mesa_bzero(&s, sizeof(s));
274 s.Ctx = ctx;
275 s.Program = p;
276 s.Descr = descr;
277 s.Descr->Init(&s);
278 s.IP = p->NumInstructions;
279
280 while(s.IP > 0) {
281 s.IP--;
282 process_instruction(&s);
283 }
284 }