r300: Remove clause stuff for now in favour of a cloned generic gl_program
[mesa.git] / src / mesa / drivers / dri / r300 / radeon_program_alu.c
1 /*
2 * Copyright (C) 2008 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * @file
30 *
31 * Shareable transformations that transform "special" ALU instructions
32 * into ALU instructions that are supported by hardware.
33 *
34 */
35
36 #include "radeon_program_alu.h"
37
38
39 static struct prog_instruction *emit1(struct gl_program* p,
40 gl_inst_opcode Opcode, struct prog_dst_register DstReg,
41 struct prog_src_register SrcReg)
42 {
43 struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
44
45 fpi->Opcode = Opcode;
46 fpi->DstReg = DstReg;
47 fpi->SrcReg[0] = SrcReg;
48 return fpi;
49 }
50
51 static struct prog_instruction *emit2(struct gl_program* p,
52 gl_inst_opcode Opcode, struct prog_dst_register DstReg,
53 struct prog_src_register SrcReg0, struct prog_src_register SrcReg1)
54 {
55 struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
56
57 fpi->Opcode = Opcode;
58 fpi->DstReg = DstReg;
59 fpi->SrcReg[0] = SrcReg0;
60 fpi->SrcReg[1] = SrcReg1;
61 return fpi;
62 }
63
64 static struct prog_instruction *emit3(struct gl_program* p,
65 gl_inst_opcode Opcode, struct prog_dst_register DstReg,
66 struct prog_src_register SrcReg0, struct prog_src_register SrcReg1,
67 struct prog_src_register SrcReg2)
68 {
69 struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
70
71 fpi->Opcode = Opcode;
72 fpi->DstReg = DstReg;
73 fpi->SrcReg[0] = SrcReg0;
74 fpi->SrcReg[1] = SrcReg1;
75 fpi->SrcReg[2] = SrcReg2;
76 return fpi;
77 }
78
79 static void set_swizzle(struct prog_src_register *SrcReg, int coordinate, int swz)
80 {
81 SrcReg->Swizzle &= ~(7 << (3*coordinate));
82 SrcReg->Swizzle |= swz << (3*coordinate);
83 }
84
85 static void set_negate_base(struct prog_src_register *SrcReg, int coordinate, int negate)
86 {
87 SrcReg->NegateBase &= ~(1 << coordinate);
88 SrcReg->NegateBase |= (negate << coordinate);
89 }
90
91 static struct prog_dst_register dstreg(int file, int index)
92 {
93 struct prog_dst_register dst;
94 dst.File = file;
95 dst.Index = index;
96 dst.WriteMask = WRITEMASK_XYZW;
97 dst.CondMask = COND_TR;
98 dst.CondSwizzle = SWIZZLE_NOOP;
99 dst.CondSrc = 0;
100 dst.pad = 0;
101 return dst;
102 }
103
104 static const struct prog_src_register builtin_zero = {
105 .File = PROGRAM_BUILTIN,
106 .Index = 0,
107 .Swizzle = SWIZZLE_0000
108 };
109 static const struct prog_src_register builtin_one = {
110 .File = PROGRAM_BUILTIN,
111 .Index = 0,
112 .Swizzle = SWIZZLE_1111
113 };
114 static const struct prog_src_register srcreg_undefined = {
115 .File = PROGRAM_UNDEFINED,
116 .Index = 0,
117 .Swizzle = SWIZZLE_NOOP
118 };
119
120 static struct prog_src_register srcreg(int file, int index)
121 {
122 struct prog_src_register src = srcreg_undefined;
123 src.File = file;
124 src.Index = index;
125 return src;
126 }
127
128 static struct prog_src_register negate(struct prog_src_register reg)
129 {
130 struct prog_src_register newreg = reg;
131 newreg.NegateAbs = !newreg.NegateAbs;
132 return newreg;
133 }
134
135 static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x, GLuint y, GLuint z, GLuint w)
136 {
137 struct prog_src_register swizzled = reg;
138 swizzled.Swizzle = MAKE_SWIZZLE4(
139 GET_SWZ(reg.Swizzle, x),
140 GET_SWZ(reg.Swizzle, y),
141 GET_SWZ(reg.Swizzle, z),
142 GET_SWZ(reg.Swizzle, w));
143 return swizzled;
144 }
145
146 static struct prog_src_register scalar(struct prog_src_register reg)
147 {
148 return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
149 }
150
151 static void transform_ABS(struct gl_program* p,
152 struct prog_instruction* inst)
153 {
154 struct prog_src_register src = inst->SrcReg[0];
155 src.Abs = 1;
156 src.NegateBase = 0;
157 src.NegateAbs = 0;
158 emit1(p, OPCODE_MOV, inst->DstReg, src);
159 }
160
161 static void transform_DPH(struct gl_program* p,
162 struct prog_instruction* inst)
163 {
164 struct prog_src_register src0 = inst->SrcReg[0];
165 if (src0.NegateAbs) {
166 if (src0.Abs) {
167 int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
168 emit1(p, OPCODE_MOV, dstreg(PROGRAM_TEMPORARY, tempreg), src0);
169 src0 = srcreg(src0.File, src0.Index);
170 } else {
171 src0.NegateAbs = 0;
172 src0.NegateBase ^= NEGATE_XYZW;
173 }
174 }
175 set_swizzle(&src0, 3, SWIZZLE_ONE);
176 set_negate_base(&src0, 3, 0);
177 emit2(p, OPCODE_DP4, inst->DstReg, src0, inst->SrcReg[1]);
178 }
179
180 static void transform_FLR(struct gl_program* p,
181 struct prog_instruction* inst)
182 {
183 int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
184 emit1(p, OPCODE_FRC, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]);
185 emit2(p, OPCODE_ADD, inst->DstReg, inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
186 }
187
188 static void transform_POW(struct gl_program* p,
189 struct prog_instruction* inst)
190 {
191 int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
192 struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg);
193 struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg);
194 tempdst.WriteMask = WRITEMASK_W;
195 tempsrc.Swizzle = SWIZZLE_WWWW;
196
197 emit1(p, OPCODE_LG2, tempdst, scalar(inst->SrcReg[0]));
198 emit2(p, OPCODE_MUL, tempdst, tempsrc, scalar(inst->SrcReg[1]));
199 emit1(p, OPCODE_EX2, inst->DstReg, tempsrc);
200 }
201
202 static void transform_SGE(struct gl_program* p,
203 struct prog_instruction* inst)
204 {
205 int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
206
207 emit2(p, OPCODE_ADD, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
208 emit3(p, OPCODE_CMP, inst->DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one);
209 }
210
211 static void transform_SLT(struct gl_program* p,
212 struct prog_instruction* inst)
213 {
214 int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
215
216 emit2(p, OPCODE_ADD, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
217 emit3(p, OPCODE_CMP, inst->DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero);
218 }
219
220 static void transform_SUB(struct gl_program* p,
221 struct prog_instruction* inst)
222 {
223 emit2(p, OPCODE_ADD, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1]));
224 }
225
226 static void transform_SWZ(struct gl_program* p,
227 struct prog_instruction* inst)
228 {
229 emit1(p, OPCODE_MOV, inst->DstReg, inst->SrcReg[0]);
230 }
231
232 static void transform_XPD(struct gl_program* p,
233 struct prog_instruction* inst)
234 {
235 int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
236
237 emit2(p, OPCODE_MUL, dstreg(PROGRAM_TEMPORARY, tempreg),
238 swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
239 swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W));
240 emit3(p, OPCODE_MAD, inst->DstReg,
241 swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W),
242 swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
243 negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
244 }
245
246
247 /**
248 * Can be used as a transformation for @ref radeonClauseLocalTransform,
249 * no userData necessary.
250 *
251 * Eliminates the following ALU instructions:
252 * ABS, DPH, FLR, POW, SGE, SLT, SUB, SWZ, XPD
253 * using:
254 * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
255 *
256 * @note should be applicable to R300 and R500 fragment programs.
257 *
258 * @todo add LIT here as well?
259 */
260 GLboolean radeonTransformALU(
261 GLcontext* ctx,
262 struct gl_program* prog,
263 struct prog_instruction* inst,
264 void* unused)
265 {
266 switch(inst->Opcode) {
267 case OPCODE_ABS: transform_ABS(prog, inst); return GL_TRUE;
268 case OPCODE_DPH: transform_DPH(prog, inst); return GL_TRUE;
269 case OPCODE_FLR: transform_FLR(prog, inst); return GL_TRUE;
270 case OPCODE_POW: transform_POW(prog, inst); return GL_TRUE;
271 case OPCODE_SGE: transform_SGE(prog, inst); return GL_TRUE;
272 case OPCODE_SLT: transform_SLT(prog, inst); return GL_TRUE;
273 case OPCODE_SUB: transform_SUB(prog, inst); return GL_TRUE;
274 case OPCODE_SWZ: transform_SWZ(prog, inst); return GL_TRUE;
275 case OPCODE_XPD: transform_XPD(prog, inst); return GL_TRUE;
276 default:
277 return GL_FALSE;
278 }
279 }