Merge branch 'lp-offset-twoside'
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_pair_translate.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 #include "radeon_program_pair.h"
29
30 #include "radeon_compiler.h"
31
32
33 /**
34 * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
35 * and reverse the order of arguments for CMP.
36 */
37 static void final_rewrite(struct rc_sub_instruction *inst)
38 {
39 struct rc_src_register tmp;
40
41 switch(inst->Opcode) {
42 case RC_OPCODE_ADD:
43 inst->SrcReg[2] = inst->SrcReg[1];
44 inst->SrcReg[1].File = RC_FILE_NONE;
45 inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
46 inst->SrcReg[1].Negate = RC_MASK_NONE;
47 inst->Opcode = RC_OPCODE_MAD;
48 break;
49 case RC_OPCODE_CMP:
50 tmp = inst->SrcReg[2];
51 inst->SrcReg[2] = inst->SrcReg[0];
52 inst->SrcReg[0] = tmp;
53 break;
54 case RC_OPCODE_MOV:
55 /* AMD say we should use CMP.
56 * However, when we transform
57 * KIL -r0;
58 * into
59 * CMP tmp, -r0, -r0, 0;
60 * KIL tmp;
61 * we get incorrect behaviour on R500 when r0 == 0.0.
62 * It appears that the R500 KIL hardware treats -0.0 as less
63 * than zero.
64 */
65 inst->SrcReg[1].File = RC_FILE_NONE;
66 inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
67 inst->SrcReg[2].File = RC_FILE_NONE;
68 inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
69 inst->Opcode = RC_OPCODE_MAD;
70 break;
71 case RC_OPCODE_MUL:
72 inst->SrcReg[2].File = RC_FILE_NONE;
73 inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
74 inst->Opcode = RC_OPCODE_MAD;
75 break;
76 default:
77 /* nothing to do */
78 break;
79 }
80 }
81
82
83 /**
84 * Classify an instruction according to which ALUs etc. it needs
85 */
86 static void classify_instruction(struct rc_sub_instruction * inst,
87 int * needrgb, int * needalpha, int * istranscendent)
88 {
89 *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
90 *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
91 *istranscendent = 0;
92
93 if (inst->WriteALUResult == RC_ALURESULT_X)
94 *needrgb = 1;
95 else if (inst->WriteALUResult == RC_ALURESULT_W)
96 *needalpha = 1;
97
98 switch(inst->Opcode) {
99 case RC_OPCODE_ADD:
100 case RC_OPCODE_CMP:
101 case RC_OPCODE_DDX:
102 case RC_OPCODE_DDY:
103 case RC_OPCODE_FRC:
104 case RC_OPCODE_MAD:
105 case RC_OPCODE_MAX:
106 case RC_OPCODE_MIN:
107 case RC_OPCODE_MOV:
108 case RC_OPCODE_MUL:
109 break;
110 case RC_OPCODE_COS:
111 case RC_OPCODE_EX2:
112 case RC_OPCODE_LG2:
113 case RC_OPCODE_RCP:
114 case RC_OPCODE_RSQ:
115 case RC_OPCODE_SIN:
116 *istranscendent = 1;
117 *needalpha = 1;
118 break;
119 case RC_OPCODE_DP4:
120 *needalpha = 1;
121 /* fall through */
122 case RC_OPCODE_DP3:
123 *needrgb = 1;
124 break;
125 default:
126 break;
127 }
128 }
129
130 static void src_uses(struct rc_src_register src, unsigned int * rgb,
131 unsigned int * alpha)
132 {
133 int j;
134 for(j = 0; j < 4; ++j) {
135 unsigned int swz = GET_SWZ(src.Swizzle, j);
136 if (swz < 3)
137 *rgb = 1;
138 else if (swz < 4)
139 *alpha = 1;
140 }
141 }
142
143 /**
144 * Fill the given ALU instruction's opcodes and source operands into the given pair,
145 * if possible.
146 */
147 static void set_pair_instruction(struct r300_fragment_program_compiler *c,
148 struct rc_pair_instruction * pair,
149 struct rc_sub_instruction * inst)
150 {
151 int needrgb, needalpha, istranscendent;
152 const struct rc_opcode_info * opcode;
153 int i;
154
155 memset(pair, 0, sizeof(struct rc_pair_instruction));
156
157 classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
158
159 if (needrgb) {
160 if (istranscendent)
161 pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
162 else
163 pair->RGB.Opcode = inst->Opcode;
164 if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
165 pair->RGB.Saturate = 1;
166 }
167 if (needalpha) {
168 pair->Alpha.Opcode = inst->Opcode;
169 if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
170 pair->Alpha.Saturate = 1;
171 }
172
173 opcode = rc_get_opcode_info(inst->Opcode);
174
175 /* Presubtract handling:
176 * We need to make sure that the values used by the presubtract
177 * operation end up in src0 or src1. */
178 if(inst->PreSub.Opcode != RC_PRESUB_NONE) {
179 /* rc_pair_alloc_source() will fill in data for
180 * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
181 int j;
182 for(j = 0; j < 3; j++) {
183 int src_regs;
184 if(inst->SrcReg[j].File != RC_FILE_PRESUB)
185 continue;
186
187 src_regs = rc_presubtract_src_reg_count(
188 inst->PreSub.Opcode);
189 for(i = 0; i < src_regs; i++) {
190 unsigned int rgb = 0;
191 unsigned int alpha = 0;
192 src_uses(inst->SrcReg[j], &rgb, &alpha);
193 if(rgb) {
194 pair->RGB.Src[i].File =
195 inst->PreSub.SrcReg[i].File;
196 pair->RGB.Src[i].Index =
197 inst->PreSub.SrcReg[i].Index;
198 pair->RGB.Src[i].Used = 1;
199 }
200 if(alpha) {
201 pair->Alpha.Src[i].File =
202 inst->PreSub.SrcReg[i].File;
203 pair->Alpha.Src[i].Index =
204 inst->PreSub.SrcReg[i].Index;
205 pair->Alpha.Src[i].Used = 1;
206 }
207 }
208 }
209 }
210
211 for(i = 0; i < opcode->NumSrcRegs; ++i) {
212 int source;
213 if (needrgb && !istranscendent) {
214 unsigned int srcrgb = 0;
215 unsigned int srcalpha = 0;
216 int j;
217 /* We don't care about the alpha channel here. We only
218 * want the part of the swizzle that writes to rgb,
219 * since we are creating an rgb instruction. */
220 for(j = 0; j < 3; ++j) {
221 unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
222 if (swz < 3)
223 srcrgb = 1;
224 else if (swz < 4)
225 srcalpha = 1;
226 }
227 source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
228 inst->SrcReg[i].File, inst->SrcReg[i].Index);
229 if (source < 0) {
230 rc_error(&c->Base, "Failed to translate "
231 "rgb instruction.\n");
232 return;
233 }
234 pair->RGB.Arg[i].Source = source;
235 pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff;
236 pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
237 pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
238 }
239 if (needalpha) {
240 unsigned int srcrgb = 0;
241 unsigned int srcalpha = 0;
242 unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, istranscendent ? 0 : 3);
243 if (swz < 3)
244 srcrgb = 1;
245 else if (swz < 4)
246 srcalpha = 1;
247 source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
248 inst->SrcReg[i].File, inst->SrcReg[i].Index);
249 if (source < 0) {
250 rc_error(&c->Base, "Failed to translate "
251 "alpha instruction.\n");
252 return;
253 }
254 pair->Alpha.Arg[i].Source = source;
255 pair->Alpha.Arg[i].Swizzle = swz;
256 pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
257 pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W);
258 }
259 }
260
261 /* Destination handling */
262 if (inst->DstReg.File == RC_FILE_OUTPUT) {
263 if (inst->DstReg.Index == c->OutputDepth) {
264 pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
265 } else {
266 for (i = 0; i < 4; i++) {
267 if (inst->DstReg.Index == c->OutputColor[i]) {
268 pair->RGB.Target = i;
269 pair->Alpha.Target = i;
270 pair->RGB.OutputWriteMask |=
271 inst->DstReg.WriteMask & RC_MASK_XYZ;
272 pair->Alpha.OutputWriteMask |=
273 GET_BIT(inst->DstReg.WriteMask, 3);
274 break;
275 }
276 }
277 }
278 } else {
279 if (needrgb) {
280 pair->RGB.DestIndex = inst->DstReg.Index;
281 pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
282 }
283
284 if (needalpha) {
285 pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
286 if (pair->Alpha.WriteMask) {
287 pair->Alpha.DestIndex = inst->DstReg.Index;
288 }
289 }
290 }
291
292 if (inst->WriteALUResult) {
293 pair->WriteALUResult = inst->WriteALUResult;
294 pair->ALUResultCompare = inst->ALUResultCompare;
295 }
296 }
297
298
299 static void check_opcode_support(struct r300_fragment_program_compiler *c,
300 struct rc_sub_instruction *inst)
301 {
302 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
303
304 if (opcode->HasDstReg) {
305 if (inst->DstReg.RelAddr) {
306 rc_error(&c->Base, "Fragment program does not support relative addressing "
307 "of destination operands.\n");
308 return;
309 }
310
311 if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
312 rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
313 return;
314 }
315 }
316
317 for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
318 if (inst->SrcReg[i].RelAddr) {
319 rc_error(&c->Base, "Fragment program does not support relative addressing "
320 " of source operands.\n");
321 return;
322 }
323 }
324 }
325
326
327 /**
328 * Translate all ALU instructions into corresponding pair instructions,
329 * performing no other changes.
330 */
331 void rc_pair_translate(struct radeon_compiler *cc, void *user)
332 {
333 struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
334
335 for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
336 inst != &c->Base.Program.Instructions;
337 inst = inst->Next) {
338 const struct rc_opcode_info * opcode;
339 struct rc_sub_instruction copy;
340
341 if (inst->Type != RC_INSTRUCTION_NORMAL)
342 continue;
343
344 opcode = rc_get_opcode_info(inst->U.I.Opcode);
345
346 if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
347 continue;
348
349 copy = inst->U.I;
350
351 check_opcode_support(c, &copy);
352
353 final_rewrite(&copy);
354 inst->Type = RC_INSTRUCTION_PAIR;
355 set_pair_instruction(c, &inst->U.P, &copy);
356 }
357 }