r5xx: Move dumb_shader.
[mesa.git] / src / mesa / drivers / dri / r300 / r500_fragprog.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 *
31 * \author Ben Skeggs <darktama@iinet.net.au>
32 *
33 * \author Jerome Glisse <j.glisse@gmail.com>
34 *
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
36 *
37 * \todo Depth write, WPOS/FOGC inputs
38 *
39 * \todo FogOption
40 *
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
42 * specific cases.
43 */
44
45 #include "glheader.h"
46 #include "macros.h"
47 #include "enums.h"
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
51
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
54 #include "r300_reg.h"
55 #include "r300_state.h"
56
57 /*
58 * Useful macros and values
59 */
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
64 } while(0)
65
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
67
68 #define R500_US_NUM_TEMP_REGS 128
69 #define R500_US_NUM_CONST_REGS 256
70
71 /* "Register" flags */
72 #define REG_CONSTANT (1 << 8)
73 #define REG_SRC_REL (1 << 9)
74 #define REG_DEST_REL (1 << 7)
75
76 /* Swizzle tools */
77 #define R500_SWIZZLE_ZERO 4
78 #define R500_SWIZZLE_HALF 5
79 #define R500_SWIZZLE_ONE 6
80 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
81 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
82 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
83 /* Swizzles for inst2 */
84 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
85 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
86 /* Swizzles for inst3 */
87 #define MAKE_SWIZ_RGB_A(x) (x << 2)
88 #define MAKE_SWIZ_RGB_B(x) (x << 15)
89 /* Swizzles for inst4 */
90 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
91 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
92 /* Swizzle for inst5 */
93 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
94 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
95
96 /* Writemasks */
97 #define R500_WRITEMASK_ARGB 0xF
98
99 /* 1/(2pi), needed for quick modulus in trig insts
100 * Thanks to glisse for pointing out how to do it! */
101 static const GLfloat RCP_2PI[] = {0.15915494309189535,
102 0.15915494309189535,
103 0.15915494309189535,
104 0.15915494309189535};
105
106 static void dump_program(struct r500_fragment_program *fp);
107
108 static inline GLuint make_rgb_swizzle(struct prog_src_register src) {
109 GLuint swiz = 0x0;
110 GLuint temp;
111 /* This could be optimized, but it should be plenty fast already. */
112 int i;
113 for (i = 0; i < 3; i++) {
114 temp = GET_SWZ(src.Swizzle, i);
115 /* Fix SWIZZLE_ONE */
116 if (temp == 5) temp++;
117 swiz += temp << i*3;
118 }
119 return swiz;
120 }
121
122 static inline GLuint make_alpha_swizzle(struct prog_src_register src) {
123 GLuint swiz = GET_SWZ(src.Swizzle, 3);
124
125 if (swiz == 5) swiz++;
126 return swiz;
127 }
128
129 static inline GLuint make_sop_swizzle(struct prog_src_register src) {
130 GLuint swiz = GET_SWZ(src.Swizzle, 0);
131
132 if (swiz == 5) swiz++;
133 return swiz;
134 }
135
136 static inline GLuint make_strq_swizzle(struct prog_src_register src) {
137 GLuint swiz = 0x0;
138 GLuint temp = src.Swizzle;
139 int i;
140 for (i = 0; i < 4; i++) {
141 swiz += (temp & 0x3) << i*2;
142 temp >>= 3;
143 }
144 return swiz;
145 }
146
147 static int get_temp(struct r500_fragment_program *fp, int slot) {
148
149 COMPILE_STATE;
150
151 int r = cs->temp_in_use + 1 + slot;
152
153 if (r > R500_US_NUM_TEMP_REGS) {
154 ERROR("Too many temporary registers requested, can't compile!\n");
155 }
156
157 return r;
158 }
159
160 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
161 static GLuint emit_const4fv(struct r500_fragment_program *fp,
162 const GLfloat * cp)
163 {
164 GLuint reg = 0x0;
165 int index;
166
167 for (index = 0; index < fp->const_nr; ++index) {
168 if (fp->constant[index] == cp)
169 break;
170 }
171
172 if (index >= fp->const_nr) {
173 if (index >= R500_US_NUM_CONST_REGS) {
174 ERROR("Out of hw constants!\n");
175 return reg;
176 }
177
178 fp->const_nr++;
179 fp->constant[index] = cp;
180 }
181
182 reg = index | REG_CONSTANT;
183 return reg;
184 }
185
186 static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) {
187 COMPILE_STATE;
188 GLuint reg;
189 switch (src.File) {
190 case PROGRAM_TEMPORARY:
191 reg = src.Index + fp->temp_reg_offset;
192 break;
193 case PROGRAM_INPUT:
194 reg = cs->inputs[src.Index].reg;
195 break;
196 case PROGRAM_LOCAL_PARAM:
197 reg = emit_const4fv(fp,
198 fp->mesa_program.Base.LocalParams[src.
199 Index]);
200 break;
201 case PROGRAM_ENV_PARAM:
202 reg = emit_const4fv(fp,
203 fp->ctx->FragmentProgram.Parameters[src.
204 Index]);
205 break;
206 case PROGRAM_STATE_VAR:
207 case PROGRAM_NAMED_PARAM:
208 case PROGRAM_CONSTANT:
209 reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters->
210 ParameterValues[src.Index]);
211 break;
212 default:
213 ERROR("Can't handle src.File %x\n", src.File);
214 reg = 0x0;
215 break;
216 }
217 return reg;
218 }
219
220 static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_register dest) {
221 GLuint reg;
222 switch (dest.File) {
223 case PROGRAM_TEMPORARY:
224 reg = dest.Index + fp->temp_reg_offset;
225 break;
226 case PROGRAM_OUTPUT:
227 /* Eventually we may need to handle multiple
228 * rendering targets... */
229 reg = dest.Index;
230 break;
231 default:
232 ERROR("Can't handle dest.File %x\n", dest.File);
233 reg = 0x0;
234 break;
235 }
236 return reg;
237 }
238
239 static void emit_tex(struct r500_fragment_program *fp,
240 struct prog_instruction *fpi, int opcode, int dest, int counter)
241 {
242 int hwsrc, hwdest;
243 GLuint mask;
244
245 mask = fpi->DstReg.WriteMask << 11;
246 hwsrc = make_src(fp, fpi->SrcReg[0]);
247
248 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
249 hwdest = get_temp(fp, 0);
250 } else {
251 hwdest = dest;
252 }
253
254 fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask
255 | R500_INST_TEX_SEM_WAIT;
256
257 fp->inst[counter].inst1 = R500_TEX_ID(fpi->TexSrcUnit)
258 | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
259
260 if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX)
261 fp->inst[counter].inst1 |= R500_TEX_UNSCALED;
262
263 switch (opcode) {
264 case OPCODE_KIL:
265 fp->inst[counter].inst1 |= R500_TEX_INST_TEXKILL;
266 break;
267 case OPCODE_TEX:
268 fp->inst[counter].inst1 |= R500_TEX_INST_LD;
269 break;
270 case OPCODE_TXB:
271 fp->inst[counter].inst1 |= R500_TEX_INST_LODBIAS;
272 break;
273 case OPCODE_TXP:
274 fp->inst[counter].inst1 |= R500_TEX_INST_PROJ;
275 break;
276 default:
277 ERROR("emit_tex can't handle opcode %x\n", opcode);
278 }
279
280 fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc)
281 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
282 | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G
283 | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A
284 | R500_TEX_DST_ADDR(hwdest)
285 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
286 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
287
288 fp->inst[counter].inst3 = 0x0;
289 fp->inst[counter].inst4 = 0x0;
290 fp->inst[counter].inst5 = 0x0;
291
292 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
293 counter++;
294 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
295 | R500_INST_TEX_SEM_WAIT | (mask << 4);
296 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
297 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
298 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
299 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
300 | R500_ALU_RGB_SEL_B_SRC0
301 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB)
302 | R500_ALU_RGB_OMOD_DISABLE;
303 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
304 | R500_ALPHA_ADDRD(dest)
305 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_ALPHA_SWIZ_A_A)
306 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_ALPHA_SWIZ_A_A)
307 | R500_ALPHA_OMOD_DISABLE;
308 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
309 | R500_ALU_RGBA_ADDRD(dest)
310 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
311 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
312 }
313 }
314
315 static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi) {
316 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
317 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
318 /* output_mask */
319 | (fpi->DstReg.WriteMask << 15);
320 } else {
321 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
322 /* pixel_mask */
323 | (fpi->DstReg.WriteMask << 11);
324 }
325
326 fp->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT;
327 }
328
329 static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_src_register src, GLuint dest) {
330 /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
331 * it is technically more accurate and recommended by ATI/AMD. */
332 GLuint src_reg = make_src(fp, src);
333 fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg);
334 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg);
335 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
336 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(src))
337 | R500_ALU_RGB_SEL_B_SRC0
338 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src))
339 | R500_ALU_RGB_OMOD_DISABLE;
340 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
341 | R500_ALPHA_ADDRD(dest)
342 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src))
343 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src))
344 | R500_ALPHA_OMOD_DISABLE;
345 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
346 | R500_ALU_RGBA_ADDRD(dest)
347 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
348 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
349 }
350
351 static GLboolean parse_program(struct r500_fragment_program *fp)
352 {
353 struct gl_fragment_program *mp = &fp->mesa_program;
354 const struct prog_instruction *inst = mp->Base.Instructions;
355 struct prog_instruction *fpi;
356 GLuint src[3], dest, temp[2];
357 int temp_swiz, pixel_mask = 0, output_mask = 0, counter = 0;
358
359 if (!inst || inst[0].Opcode == OPCODE_END) {
360 ERROR("The program is empty!\n");
361 return GL_FALSE;
362 }
363
364 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
365
366 if (fpi->Opcode != OPCODE_KIL) {
367 dest = make_dest(fp, fpi->DstReg);
368
369 pixel_mask = fpi->DstReg.WriteMask << 11;
370 output_mask = fpi->DstReg.WriteMask << 15;
371 }
372
373 switch (fpi->Opcode) {
374 case OPCODE_ABS:
375 emit_alu(fp, counter, fpi);
376 emit_mov(fp, counter, fpi->SrcReg[0], dest);
377 fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS
378 | R500_ALU_RGB_MOD_B_ABS;
379 fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS
380 | R500_ALPHA_MOD_B_ABS;
381 break;
382 case OPCODE_ADD:
383 src[0] = make_src(fp, fpi->SrcReg[0]);
384 src[1] = make_src(fp, fpi->SrcReg[1]);
385 /* Variation on MAD: 1*src0+src1 */
386 emit_alu(fp, counter, fpi);
387 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
388 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(0);
389 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
390 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(0);
391 fp->inst[counter].inst3 = /* 1 */
392 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
393 | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
394 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
395 | R500_ALPHA_ADDRD(dest)
396 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
397 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
398 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
399 | R500_ALU_RGBA_ADDRD(dest)
400 | R500_ALU_RGBA_SEL_C_SRC1
401 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
402 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
403 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]));
404 break;
405 case OPCODE_CMP:
406 /* This inst's selects need to be swapped as follows:
407 * 0 -> C ; 1 -> B ; 2 -> A */
408 src[0] = make_src(fp, fpi->SrcReg[0]);
409 src[1] = make_src(fp, fpi->SrcReg[1]);
410 src[2] = make_src(fp, fpi->SrcReg[2]);
411 emit_alu(fp, counter, fpi);
412 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[2])
413 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[0]);
414 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[2])
415 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[0]);
416 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
417 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[2]))
418 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
419 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
420 | R500_ALPHA_ADDRD(dest)
421 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[2]))
422 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
423 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
424 | R500_ALU_RGBA_ADDRD(dest)
425 | R500_ALU_RGBA_SEL_C_SRC2
426 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
427 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
428 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
429 break;
430 case OPCODE_COS:
431 src[0] = make_src(fp, fpi->SrcReg[0]);
432 src[1] = emit_const4fv(fp, RCP_2PI);
433 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
434 | (R500_WRITEMASK_ARGB << 11);
435 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
436 | R500_RGB_ADDR1(src[1]);
437 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
438 | R500_ALPHA_ADDR1(src[1]);
439 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
440 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
441 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
442 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
443 | R500_ALPHA_ADDRD(get_temp(fp, 0))
444 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
445 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
446 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
447 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
448 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
449 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
450 counter++;
451 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
452 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
453 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
454 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
455 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
456 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
457 | R500_ALPHA_ADDRD(get_temp(fp, 1))
458 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
459 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
460 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1));
461 counter++;
462 emit_alu(fp, counter, fpi);
463 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
464 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
465 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0;
466 fp->inst[counter].inst4 = R500_ALPHA_OP_COS
467 | R500_ALPHA_ADDRD(dest)
468 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
469 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
470 | R500_ALU_RGBA_ADDRD(dest);
471 break;
472 case OPCODE_DP3:
473 src[0] = make_src(fp, fpi->SrcReg[0]);
474 src[1] = make_src(fp, fpi->SrcReg[1]);
475 emit_alu(fp, counter, fpi);
476 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
477 | R500_RGB_ADDR1(src[1]);
478 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
479 | R500_ALPHA_ADDR1(src[1]);
480 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
481 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
482 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
483 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
484 | R500_ALPHA_ADDRD(dest)
485 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
486 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
487 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3
488 | R500_ALU_RGBA_ADDRD(dest);
489 break;
490 case OPCODE_DP4:
491 src[0] = make_src(fp, fpi->SrcReg[0]);
492 src[1] = make_src(fp, fpi->SrcReg[1]);
493 /* Based on DP3 */
494 emit_alu(fp, counter, fpi);
495 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
496 | R500_RGB_ADDR1(src[1]);
497 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
498 | R500_ALPHA_ADDR1(src[1]);
499 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
500 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
501 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
502 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
503 | R500_ALPHA_ADDRD(dest)
504 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
505 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
506 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
507 | R500_ALU_RGBA_ADDRD(dest);
508 break;
509 case OPCODE_DPH:
510 src[0] = make_src(fp, fpi->SrcReg[0]);
511 src[1] = make_src(fp, fpi->SrcReg[1]);
512 /* Based on DP3 */
513 emit_alu(fp, counter, fpi);
514 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
515 | R500_RGB_ADDR1(src[1]);
516 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
517 | R500_ALPHA_ADDR1(src[1]);
518 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
519 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
520 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
521 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
522 | R500_ALPHA_ADDRD(dest)
523 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
524 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
525 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
526 | R500_ALU_RGBA_ADDRD(dest);
527 break;
528 case OPCODE_DST:
529 src[0] = make_src(fp, fpi->SrcReg[0]);
530 src[1] = make_src(fp, fpi->SrcReg[1]);
531 /* [1, src0.y*src1.y, src0.z, src1.w]
532 * So basically MUL with lotsa swizzling. */
533 emit_alu(fp, counter, fpi);
534 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
535 | R500_RGB_ADDR1(src[1]);
536 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
537 | R500_ALPHA_ADDR1(src[1]);
538 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
539 | R500_ALU_RGB_SEL_B_SRC1;
540 /* Select [1, y, z, 1] */
541 temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x7) | R500_SWIZZLE_ONE;
542 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(temp_swiz);
543 /* Select [1, y, 1, w] */
544 temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x1c7) | R500_SWIZZLE_ONE | (R500_SWIZZLE_ONE << 6);
545 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(temp_swiz);
546 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
547 | R500_ALPHA_ADDRD(dest)
548 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
549 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
550 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
551 | R500_ALU_RGBA_ADDRD(dest)
552 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
553 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
554 break;
555 case OPCODE_EX2:
556 src[0] = make_src(fp, fpi->SrcReg[0]);
557 emit_alu(fp, counter, fpi);
558 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
559 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
560 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
561 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
562 fp->inst[counter].inst4 = R500_ALPHA_OP_EX2
563 | R500_ALPHA_ADDRD(dest)
564 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
565 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
566 | R500_ALU_RGBA_ADDRD(dest);
567 break;
568 case OPCODE_FRC:
569 src[0] = make_src(fp, fpi->SrcReg[0]);
570 emit_alu(fp, counter, fpi);
571 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
572 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
573 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
574 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
575 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
576 | R500_ALPHA_ADDRD(dest)
577 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
578 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
579 | R500_ALU_RGBA_ADDRD(dest);
580 break;
581 case OPCODE_KIL:
582 emit_tex(fp, fpi, OPCODE_KIL, dest, counter);
583 break;
584 case OPCODE_LG2:
585 src[0] = make_src(fp, fpi->SrcReg[0]);
586 emit_alu(fp, counter, fpi);
587 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
588 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
589 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
590 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
591 fp->inst[counter].inst4 = R500_ALPHA_OP_LN2
592 | R500_ALPHA_ADDRD(dest)
593 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
594 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
595 | R500_ALU_RGBA_ADDRD(dest);
596 break;
597 case OPCODE_LRP:
598 /* src0 * src1 + INV(src0) * src2
599 * 1) MUL src0, src1, temp
600 * 2) PRE 1-src0; MAD srcp, src2, temp */
601 src[0] = make_src(fp, fpi->SrcReg[0]);
602 src[1] = make_src(fp, fpi->SrcReg[1]);
603 src[2] = make_src(fp, fpi->SrcReg[2]);
604 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
605 | R500_INST_NOP | (R500_WRITEMASK_ARGB << 11);
606 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
607 | R500_RGB_ADDR1(src[1]);
608 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
609 | R500_ALPHA_ADDR1(src[1]);
610 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
611 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
612 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
613 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
614 | R500_ALPHA_ADDRD(get_temp(fp, 0))
615 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
616 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
617 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
618 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
619 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
620 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
621 counter++;
622 emit_alu(fp, counter, fpi);
623 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
624 | R500_RGB_ADDR1(src[2])
625 | R500_RGB_ADDR2(get_temp(fp, 0))
626 | R500_RGB_SRCP_OP_1_MINUS_RGB0;
627 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
628 | R500_ALPHA_ADDR1(src[2])
629 | R500_ALPHA_ADDR2(get_temp(fp, 0))
630 | R500_ALPHA_SRCP_OP_1_MINUS_A0;
631 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRCP
632 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
633 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
634 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
635 | R500_ALPHA_ADDRD(dest)
636 | R500_ALPHA_SEL_A_SRCP | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
637 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
638 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
639 | R500_ALU_RGBA_ADDRD(dest)
640 | R500_ALU_RGBA_SEL_C_SRC2 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
641 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
642 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
643 break;
644 case OPCODE_MAD:
645 src[0] = make_src(fp, fpi->SrcReg[0]);
646 src[1] = make_src(fp, fpi->SrcReg[1]);
647 src[2] = make_src(fp, fpi->SrcReg[2]);
648 emit_alu(fp, counter, fpi);
649 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
650 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]);
651 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
652 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]);
653 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
654 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
655 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
656 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
657 | R500_ALPHA_ADDRD(dest)
658 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
659 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
660 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
661 | R500_ALU_RGBA_ADDRD(dest)
662 | R500_ALU_RGBA_SEL_C_SRC2
663 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
664 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
665 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
666 break;
667 case OPCODE_MAX:
668 src[0] = make_src(fp, fpi->SrcReg[0]);
669 src[1] = make_src(fp, fpi->SrcReg[1]);
670 emit_alu(fp, counter, fpi);
671 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
672 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
673 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
674 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
675 | R500_ALU_RGB_SEL_B_SRC1
676 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
677 fp->inst[counter].inst4 = R500_ALPHA_OP_MAX
678 | R500_ALPHA_ADDRD(dest)
679 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
680 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
681 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
682 | R500_ALU_RGBA_ADDRD(dest);
683 break;
684 case OPCODE_MIN:
685 src[0] = make_src(fp, fpi->SrcReg[0]);
686 src[1] = make_src(fp, fpi->SrcReg[1]);
687 emit_alu(fp, counter, fpi);
688 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
689 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
690 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
691 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
692 | R500_ALU_RGB_SEL_B_SRC1
693 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
694 fp->inst[counter].inst4 = R500_ALPHA_OP_MIN
695 | R500_ALPHA_ADDRD(dest)
696 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
697 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
698 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN
699 | R500_ALU_RGBA_ADDRD(dest);
700 break;
701 case OPCODE_MOV:
702 emit_alu(fp, counter, fpi);
703 emit_mov(fp, counter, fpi->SrcReg[0], dest);
704 break;
705 case OPCODE_MUL:
706 src[0] = make_src(fp, fpi->SrcReg[0]);
707 src[1] = make_src(fp, fpi->SrcReg[1]);
708 /* Variation on MAD: src0*src1+0 */
709 emit_alu(fp, counter, fpi);
710 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
711 | R500_RGB_ADDR1(src[1]);
712 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
713 | R500_ALPHA_ADDR1(src[1]);
714 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
715 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
716 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
717 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
718 | R500_ALPHA_ADDRD(dest)
719 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
720 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
721 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
722 | R500_ALU_RGBA_ADDRD(dest)
723 // | R500_ALU_RGBA_SEL_C_SRC2
724 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
725 // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
726 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
727 break;
728 case OPCODE_POW:
729 /* POW(a,b) = EX2(LN2(a)*b) */
730 src[0] = make_src(fp, fpi->SrcReg[0]);
731 src[1] = make_src(fp, fpi->SrcReg[1]);
732 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
733 | (R500_WRITEMASK_ARGB << 11);
734 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
735 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
736 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
737 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
738 fp->inst[counter].inst4 = R500_ALPHA_OP_LN2
739 | R500_ALPHA_ADDRD(get_temp(fp, 0))
740 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
741 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
742 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0));
743 counter++;
744 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
745 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0))
746 | R500_RGB_ADDR1(src[1]);
747 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0))
748 | R500_ALPHA_ADDR1(src[1]);
749 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
750 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
751 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
752 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
753 | R500_ALPHA_ADDRD(get_temp(fp, 1))
754 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
755 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
756 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
757 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1))
758 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
759 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
760 counter++;
761 emit_alu(fp, counter, fpi);
762 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
763 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
764 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
765 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
766 fp->inst[counter].inst4 = R500_ALPHA_OP_EX2
767 | R500_ALPHA_ADDRD(dest)
768 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
769 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
770 | R500_ALU_RGBA_ADDRD(dest);
771 break;
772 case OPCODE_RCP:
773 src[0] = make_src(fp, fpi->SrcReg[0]);
774 emit_alu(fp, counter, fpi);
775 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
776 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
777 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
778 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
779 fp->inst[counter].inst4 = R500_ALPHA_OP_RCP
780 | R500_ALPHA_ADDRD(dest)
781 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
782 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
783 | R500_ALU_RGBA_ADDRD(dest);
784 break;
785 case OPCODE_RSQ:
786 src[0] = make_src(fp, fpi->SrcReg[0]);
787 emit_alu(fp, counter, fpi);
788 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
789 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
790 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
791 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
792 fp->inst[counter].inst4 = R500_ALPHA_OP_RSQ
793 | R500_ALPHA_ADDRD(dest)
794 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
795 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
796 | R500_ALU_RGBA_ADDRD(dest);
797 break;
798 case OPCODE_SCS:
799 src[0] = make_src(fp, fpi->SrcReg[0]);
800 src[1] = emit_const4fv(fp, RCP_2PI);
801 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
802 | (R500_WRITEMASK_ARGB << 11);
803 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
804 | R500_RGB_ADDR1(src[1]);
805 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
806 | R500_ALPHA_ADDR1(src[1]);
807 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
808 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
809 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
810 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
811 | R500_ALPHA_ADDRD(get_temp(fp, 0))
812 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
813 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
814 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
815 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
816 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
817 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
818 counter++;
819 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
820 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
821 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
822 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
823 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
824 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
825 | R500_ALPHA_ADDRD(get_temp(fp, 1))
826 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
827 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
828 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1));
829 counter++;
830 /* Do a cosine, then a sine, masking out the channels we want to protect. */
831 /* Cosine only goes in R (x) channel. */
832 fpi->DstReg.WriteMask = 0x1;
833 emit_alu(fp, counter, fpi);
834 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
835 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
836 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
837 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
838 fp->inst[counter].inst4 = R500_ALPHA_OP_COS
839 | R500_ALPHA_ADDRD(dest)
840 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
841 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
842 | R500_ALU_RGBA_ADDRD(dest);
843 counter++;
844 /* Sine only goes in G (y) channel. */
845 fpi->DstReg.WriteMask = 0x2;
846 emit_alu(fp, counter, fpi);
847 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
848 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
849 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
850 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
851 fp->inst[counter].inst4 = R500_ALPHA_OP_SIN
852 | R500_ALPHA_ADDRD(dest)
853 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
854 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
855 | R500_ALU_RGBA_ADDRD(dest);
856 break;
857 case OPCODE_SGE:
858 /* We use SRCP, so as a precaution we're
859 * going to set NOP in previous inst, if possible. */
860 /* This inst's selects need to be swapped as follows:
861 * 0 -> C ; 1 -> B ; 2 -> A */
862 src[0] = make_src(fp, fpi->SrcReg[0]);
863 src[1] = make_src(fp, fpi->SrcReg[1]);
864 emit_alu(fp, counter, fpi);
865 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
866 | R500_RGB_ADDR1(src[1])
867 | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
868 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
869 | R500_ALPHA_ADDR1(src[1])
870 | R500_ALPHA_SRCP_OP_A1_MINUS_A0;
871 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
872 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
873 | R500_ALU_RGB_SEL_B_SRC1
874 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
875 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
876 | R500_ALPHA_ADDRD(dest)
877 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
878 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
879 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
880 | R500_ALU_RGBA_ADDRD(dest)
881 | R500_ALU_RGBA_SEL_C_SRCP
882 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
883 | R500_ALU_RGBA_ALPHA_SEL_C_SRCP
884 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
885 break;
886 case OPCODE_SIN:
887 src[0] = make_src(fp, fpi->SrcReg[0]);
888 src[1] = emit_const4fv(fp, RCP_2PI);
889 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
890 | (R500_WRITEMASK_ARGB << 11);
891 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
892 | R500_RGB_ADDR1(src[1]);
893 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
894 | R500_ALPHA_ADDR1(src[1]);
895 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
896 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
897 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
898 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
899 | R500_ALPHA_ADDRD(get_temp(fp, 0))
900 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
901 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
902 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
903 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
904 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
905 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
906 counter++;
907 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
908 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
909 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
910 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
911 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
912 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
913 | R500_ALPHA_ADDRD(get_temp(fp, 1))
914 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
915 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
916 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1));
917 counter++;
918 emit_alu(fp, counter, fpi);
919 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
920 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
921 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0;
922 fp->inst[counter].inst4 = R500_ALPHA_OP_SIN
923 | R500_ALPHA_ADDRD(dest)
924 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
925 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
926 | R500_ALU_RGBA_ADDRD(dest);
927 break;
928 case OPCODE_SLT:
929 /* We use SRCP, so as a precaution we're
930 * going to set NOP in previous inst, if possible. */
931 /* This inst's selects need to be swapped as follows:
932 * 0 -> C ; 1 -> B ; 2 -> A */
933 src[0] = make_src(fp, fpi->SrcReg[0]);
934 src[1] = make_src(fp, fpi->SrcReg[1]);
935 emit_alu(fp, counter, fpi);
936 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
937 | R500_RGB_ADDR1(src[1])
938 | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
939 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
940 | R500_ALPHA_ADDR1(src[1])
941 | R500_ALPHA_SRCP_OP_A1_MINUS_A0;
942 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
943 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
944 | R500_ALU_RGB_SEL_B_SRC1
945 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
946 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
947 | R500_ALPHA_ADDRD(dest)
948 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO)
949 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE);
950 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
951 | R500_ALU_RGBA_ADDRD(dest)
952 | R500_ALU_RGBA_SEL_C_SRCP
953 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
954 | R500_ALU_RGBA_ALPHA_SEL_C_SRCP
955 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
956 break;
957 case OPCODE_SUB:
958 src[0] = make_src(fp, fpi->SrcReg[0]);
959 src[1] = make_src(fp, fpi->SrcReg[1]);
960 /* Variation on MAD: 1*src0-src1 */
961 emit_alu(fp, counter, fpi);
962 fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
963 | R500_RGB_ADDR2(src[1]);
964 fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
965 | R500_ALPHA_ADDR2(src[1]);
966 fp->inst[counter].inst3 = /* 1 */
967 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
968 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
969 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
970 | R500_ALPHA_ADDRD(dest)
971 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
972 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
973 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
974 | R500_ALU_RGBA_ADDRD(dest)
975 | R500_ALU_RGBA_SEL_C_SRC2
976 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
977 | R500_ALU_RGBA_MOD_C_NEG
978 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
979 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
980 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
981 break;
982 case OPCODE_SWZ:
983 /* TODO: Negation masks! */
984 emit_alu(fp, counter, fpi);
985 emit_mov(fp, counter, fpi->SrcReg[0], dest);
986 break;
987 case OPCODE_TEX:
988 emit_tex(fp, fpi, OPCODE_TEX, dest, counter);
989 if (fpi->DstReg.File == PROGRAM_OUTPUT)
990 counter++;
991 break;
992 case OPCODE_TXB:
993 emit_tex(fp, fpi, OPCODE_TXB, dest, counter);
994 if (fpi->DstReg.File == PROGRAM_OUTPUT)
995 counter++;
996 break;
997 case OPCODE_TXP:
998 emit_tex(fp, fpi, OPCODE_TXP, dest, counter);
999 if (fpi->DstReg.File == PROGRAM_OUTPUT)
1000 counter++;
1001 break;
1002 default:
1003 ERROR("unknown fpi->Opcode %s\n", _mesa_opcode_string(fpi->Opcode));
1004 break;
1005 }
1006
1007 /* Finishing touches */
1008 if (fpi->SaturateMode == SATURATE_ZERO_ONE) {
1009 fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
1010 }
1011
1012 counter++;
1013
1014 if (fp->error)
1015 return GL_FALSE;
1016
1017 }
1018
1019 /* Finish him! (If it's an ALU/OUT instruction...) */
1020 if ((fp->inst[counter-1].inst0 & 0x3) == 1) {
1021 fp->inst[counter-1].inst0 |= R500_INST_LAST;
1022 } else {
1023 /* We still need to put an output inst, right? */
1024 WARN_ONCE("Final FP instruction is not an OUT.\n");
1025 #if 0
1026
1027 #endif
1028 }
1029
1030 fp->cs->nrslots = counter;
1031
1032 fp->max_temp_idx++;
1033
1034 return GL_TRUE;
1035 }
1036
1037 static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp)
1038 {
1039 struct r300_pfs_compile_state *cs = NULL;
1040 struct gl_fragment_program *mp = &fp->mesa_program;
1041 struct prog_instruction *fpi;
1042 GLuint InputsRead = mp->Base.InputsRead;
1043 GLuint temps_used = 0;
1044 int i, j;
1045
1046 /* New compile, reset tracking data */
1047 fp->optimization =
1048 driQueryOptioni(&r300->radeon.optionCache, "fp_optimization");
1049 fp->translated = GL_FALSE;
1050 fp->error = GL_FALSE;
1051 fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile);
1052 fp->cur_node = 0;
1053 fp->first_node_has_tex = 0;
1054 fp->const_nr = 0;
1055 /* Size of pixel stack, plus 1. */
1056 fp->max_temp_idx = 1;
1057 /* Temp register offset. */
1058 fp->temp_reg_offset = 0;
1059 fp->node[0].alu_end = -1;
1060 fp->node[0].tex_end = -1;
1061
1062 _mesa_memset(cs, 0, sizeof(*fp->cs));
1063 for (i = 0; i < PFS_MAX_ALU_INST; i++) {
1064 for (j = 0; j < 3; j++) {
1065 cs->slot[i].vsrc[j] = SRC_CONST;
1066 cs->slot[i].ssrc[j] = SRC_CONST;
1067 }
1068 }
1069
1070 /* Work out what temps the Mesa inputs correspond to, this must match
1071 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
1072 * configures itself based on the fragprog's InputsRead
1073 *
1074 * NOTE: this depends on get_hw_temp() allocating registers in order,
1075 * starting from register 0, so we're just going to do that instead.
1076 */
1077
1078 /* Texcoords come first */
1079 for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) {
1080 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
1081 cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0;
1082 cs->inputs[FRAG_ATTRIB_TEX0 + i].reg =
1083 fp->temp_reg_offset;
1084 fp->temp_reg_offset++;
1085 }
1086 }
1087 InputsRead &= ~FRAG_BITS_TEX_ANY;
1088
1089 /* fragment position treated as a texcoord */
1090 if (InputsRead & FRAG_BIT_WPOS) {
1091 cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
1092 cs->inputs[FRAG_ATTRIB_WPOS].reg =
1093 fp->temp_reg_offset;
1094 fp->temp_reg_offset++;
1095 }
1096 InputsRead &= ~FRAG_BIT_WPOS;
1097
1098 /* Then primary colour */
1099 if (InputsRead & FRAG_BIT_COL0) {
1100 cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
1101 cs->inputs[FRAG_ATTRIB_COL0].reg =
1102 fp->temp_reg_offset;
1103 fp->temp_reg_offset++;
1104 }
1105 InputsRead &= ~FRAG_BIT_COL0;
1106
1107 /* Secondary color */
1108 if (InputsRead & FRAG_BIT_COL1) {
1109 cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
1110 cs->inputs[FRAG_ATTRIB_COL1].reg =
1111 fp->temp_reg_offset;
1112 fp->temp_reg_offset++;
1113 }
1114 InputsRead &= ~FRAG_BIT_COL1;
1115
1116 /* Anything else */
1117 if (InputsRead) {
1118 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
1119 /* force read from hwreg 0 for now */
1120 for (i = 0; i < 32; i++)
1121 if (InputsRead & (1 << i))
1122 cs->inputs[i].reg = 0;
1123 }
1124
1125 if (!mp->Base.Instructions) {
1126 ERROR("No instructions found in program, going to go die now.\n");
1127 return;
1128 }
1129
1130 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
1131 for (i = 0; i < 3; i++) {
1132 if (fpi->SrcReg[i].File == PROGRAM_TEMPORARY) {
1133 if (fpi->SrcReg[i].Index > temps_used)
1134 temps_used = fpi->SrcReg[i].Index;
1135 }
1136 }
1137 }
1138
1139 cs->temp_in_use = temps_used;
1140
1141 fp->max_temp_idx = fp->temp_reg_offset + cs->temp_in_use + 1;
1142 }
1143
1144 static void update_params(struct r500_fragment_program *fp)
1145 {
1146 struct gl_fragment_program *mp = &fp->mesa_program;
1147
1148 /* Ask Mesa nicely to fill in ParameterValues for us */
1149 if (mp->Base.Parameters)
1150 _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters);
1151 }
1152
1153 static void dumb_shader(struct r500_fragment_program *fp)
1154 {
1155 fp->inst[0].inst0 = R500_INST_TYPE_TEX
1156 | R500_INST_TEX_SEM_WAIT
1157 | R500_INST_RGB_WMASK_R
1158 | R500_INST_RGB_WMASK_G
1159 | R500_INST_RGB_WMASK_B
1160 | R500_INST_ALPHA_WMASK
1161 | R500_INST_RGB_CLAMP
1162 | R500_INST_ALPHA_CLAMP;
1163 fp->inst[0].inst1 = R500_TEX_ID(0)
1164 | R500_TEX_INST_LD
1165 | R500_TEX_SEM_ACQUIRE
1166 | R500_TEX_IGNORE_UNCOVERED;
1167 fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0)
1168 | R500_TEX_SRC_S_SWIZ_R
1169 | R500_TEX_SRC_T_SWIZ_G
1170 | R500_TEX_DST_ADDR(0)
1171 | R500_TEX_DST_R_SWIZ_R
1172 | R500_TEX_DST_G_SWIZ_G
1173 | R500_TEX_DST_B_SWIZ_B
1174 | R500_TEX_DST_A_SWIZ_A;
1175 fp->inst[0].inst3 = R500_DX_ADDR(0)
1176 | R500_DX_S_SWIZ_R
1177 | R500_DX_T_SWIZ_R
1178 | R500_DX_R_SWIZ_R
1179 | R500_DX_Q_SWIZ_R
1180 | R500_DY_ADDR(0)
1181 | R500_DY_S_SWIZ_R
1182 | R500_DY_T_SWIZ_R
1183 | R500_DY_R_SWIZ_R
1184 | R500_DY_Q_SWIZ_R;
1185 fp->inst[0].inst4 = 0x0;
1186 fp->inst[0].inst5 = 0x0;
1187
1188 fp->inst[1].inst0 = R500_INST_TYPE_OUT |
1189 R500_INST_TEX_SEM_WAIT |
1190 R500_INST_LAST |
1191 R500_INST_RGB_OMASK_R |
1192 R500_INST_RGB_OMASK_G |
1193 R500_INST_RGB_OMASK_B |
1194 R500_INST_ALPHA_OMASK;
1195 fp->inst[1].inst1 = R500_RGB_ADDR0(0) |
1196 R500_RGB_ADDR1(0) |
1197 R500_RGB_ADDR1_CONST |
1198 R500_RGB_ADDR2(0) |
1199 R500_RGB_ADDR2_CONST |
1200 R500_RGB_SRCP_OP_1_MINUS_2RGB0;
1201 fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) |
1202 R500_ALPHA_ADDR1(0) |
1203 R500_ALPHA_ADDR1_CONST |
1204 R500_ALPHA_ADDR2(0) |
1205 R500_ALPHA_ADDR2_CONST |
1206 R500_ALPHA_SRCP_OP_1_MINUS_2A0;
1207 fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 |
1208 R500_ALU_RGB_R_SWIZ_A_R |
1209 R500_ALU_RGB_G_SWIZ_A_G |
1210 R500_ALU_RGB_B_SWIZ_A_B |
1211 R500_ALU_RGB_SEL_B_SRC0 |
1212 R500_ALU_RGB_R_SWIZ_B_1 |
1213 R500_ALU_RGB_B_SWIZ_B_1 |
1214 R500_ALU_RGB_G_SWIZ_B_1;
1215 fp->inst[1].inst4 = R500_ALPHA_OP_MAD |
1216 R500_ALPHA_SWIZ_A_A |
1217 R500_ALPHA_SWIZ_B_1;
1218 fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD |
1219 R500_ALU_RGBA_R_SWIZ_0 |
1220 R500_ALU_RGBA_G_SWIZ_0 |
1221 R500_ALU_RGBA_B_SWIZ_0 |
1222 R500_ALU_RGBA_A_SWIZ_0;
1223
1224 fp->cs->nrslots = 2;
1225 fp->translated = GL_TRUE;
1226 }
1227
1228 void r500TranslateFragmentShader(r300ContextPtr r300,
1229 struct r500_fragment_program *fp)
1230 {
1231
1232 struct r300_pfs_compile_state *cs = NULL;
1233
1234 if (!fp->translated) {
1235
1236 init_program(r300, fp);
1237 cs = fp->cs;
1238
1239 if (parse_program(fp) == GL_FALSE) {
1240 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
1241 dumb_shader(fp);
1242 fp->inst_offset = 0;
1243 fp->inst_end = cs->nrslots - 1;
1244 return;
1245 }
1246 fp->inst_offset = 0;
1247 fp->inst_end = cs->nrslots - 1;
1248
1249 fp->translated = GL_TRUE;
1250 if (1 || RADEON_DEBUG & DEBUG_PIXEL) {
1251 fprintf(stderr, "Mesa program:\n");
1252 fprintf(stderr, "-------------\n");
1253 _mesa_print_program(&fp->mesa_program.Base);
1254 fflush(stdout);
1255 dump_program(fp);
1256 }
1257
1258
1259 r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM);
1260 }
1261
1262 update_params(fp);
1263
1264 }
1265
1266 static char *toswiz(int swiz_val) {
1267 switch(swiz_val) {
1268 case 0: return "R";
1269 case 1: return "G";
1270 case 2: return "B";
1271 case 3: return "A";
1272 case 4: return "0";
1273 case 5: return "1/2";
1274 case 6: return "1";
1275 case 7: return "U";
1276 }
1277 return NULL;
1278 }
1279
1280 static char *toop(int op_val)
1281 {
1282 char *str;
1283 switch (op_val) {
1284 case 0: str = "MAD"; break;
1285 case 1: str = "DP3"; break;
1286 case 2: str = "DP4"; break;
1287 case 3: str = "D2A"; break;
1288 case 4: str = "MIN"; break;
1289 case 5: str = "MAX"; break;
1290 case 6: str = "Reserved"; break;
1291 case 7: str = "CND"; break;
1292 case 8: str = "CMP"; break;
1293 case 9: str = "FRC"; break;
1294 case 10: str = "SOP"; break;
1295 case 11: str = "MDH"; break;
1296 case 12: str = "MDV"; break;
1297 }
1298 return str;
1299 }
1300
1301 static char *to_alpha_op(int op_val)
1302 {
1303 char *str = NULL;
1304 switch (op_val) {
1305 case 0: str = "MAD"; break;
1306 case 1: str = "DP"; break;
1307 case 2: str = "MIN"; break;
1308 case 3: str = "MAX"; break;
1309 case 4: str = "Reserved"; break;
1310 case 5: str = "CND"; break;
1311 case 6: str = "CMP"; break;
1312 case 7: str = "FRC"; break;
1313 case 8: str = "EX2"; break;
1314 case 9: str = "LN2"; break;
1315 case 10: str = "RCP"; break;
1316 case 11: str = "RSQ"; break;
1317 case 12: str = "SIN"; break;
1318 case 13: str = "COS"; break;
1319 case 14: str = "MDH"; break;
1320 case 15: str = "MDV"; break;
1321 }
1322 return str;
1323 }
1324
1325 static char *to_mask(int val)
1326 {
1327 char *str = NULL;
1328 switch(val) {
1329 case 0: str = "NONE"; break;
1330 case 1: str = "R"; break;
1331 case 2: str = "G"; break;
1332 case 3: str = "RG"; break;
1333 case 4: str = "B"; break;
1334 case 5: str = "RB"; break;
1335 case 6: str = "GB"; break;
1336 case 7: str = "RGB"; break;
1337 case 8: str = "A"; break;
1338 case 9: str = "AR"; break;
1339 case 10: str = "AG"; break;
1340 case 11: str = "ARG"; break;
1341 case 12: str = "AB"; break;
1342 case 13: str = "ARB"; break;
1343 case 14: str = "AGB"; break;
1344 case 15: str = "ARGB"; break;
1345 }
1346 return str;
1347 }
1348
1349 static char *to_texop(int val)
1350 {
1351 switch(val) {
1352 case 0: return "NOP";
1353 case 1: return "LD";
1354 case 2: return "TEXKILL";
1355 case 3: return "PROJ";
1356 case 4: return "LODBIAS";
1357 case 5: return "LOD";
1358 case 6: return "DXDY";
1359 }
1360 return NULL;
1361 }
1362
1363 static void dump_program(struct r500_fragment_program *fp)
1364 {
1365 int pc = 0;
1366 int n;
1367 uint32_t inst;
1368 uint32_t inst0;
1369 char *str = NULL;
1370
1371 for (n = 0; n < fp->inst_end+1; n++) {
1372 inst0 = inst = fp->inst[n].inst0;
1373 fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
1374 switch(inst & 0x3) {
1375 case R500_INST_TYPE_ALU: str = "ALU"; break;
1376 case R500_INST_TYPE_OUT: str = "OUT"; break;
1377 case R500_INST_TYPE_FC: str = "FC"; break;
1378 case R500_INST_TYPE_TEX: str = "TEX"; break;
1379 };
1380 fprintf(stderr,"%s %s %s %s %s ", str,
1381 inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
1382 inst & R500_INST_LAST ? "LAST" : "",
1383 inst & R500_INST_NOP ? "NOP" : "",
1384 inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
1385 fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
1386 to_mask((inst >> 15) & 0xf));
1387
1388 switch(inst0 & 0x3) {
1389 case 0:
1390 case 1:
1391 fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", fp->inst[n].inst1);
1392 inst = fp->inst[n].inst1;
1393
1394 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1395 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1396 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1397 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1398 (inst >> 30));
1399
1400 fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", fp->inst[n].inst2);
1401 inst = fp->inst[n].inst2;
1402 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1403 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1404 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1405 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1406 (inst >> 30));
1407 fprintf(stderr,"\t3 RGB_INST: 0x%08x:", fp->inst[n].inst3);
1408 inst = fp->inst[n].inst3;
1409 fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
1410 (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
1411 (inst >> 11) & 0x3,
1412 (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
1413 (inst >> 24) & 0x3);
1414
1415
1416 fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", fp->inst[n].inst4);
1417 inst = fp->inst[n].inst4;
1418 fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d\n", to_alpha_op(inst & 0xf),
1419 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1420 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
1421 (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3);
1422
1423 fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", fp->inst[n].inst5);
1424 inst = fp->inst[n].inst5;
1425 fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
1426 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1427 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
1428 (inst >> 23) & 0x3,
1429 (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
1430 break;
1431 case 2:
1432 break;
1433 case 3:
1434 inst = fp->inst[n].inst1;
1435 fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
1436 to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
1437 (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");
1438 inst = fp->inst[n].inst2;
1439 fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,
1440 inst & 127, inst & (1<<7) ? "(rel)" : "",
1441 toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),
1442 toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
1443 (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",
1444 toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),
1445 toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));
1446
1447 fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", fp->inst[n].inst3);
1448 break;
1449 }
1450 fprintf(stderr,"\n");
1451 }
1452
1453 }