r500: fix swz gets and some returns
[mesa.git] / src / mesa / drivers / dri / r300 / r500_fragprog.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 *
31 * \author Ben Skeggs <darktama@iinet.net.au>
32 *
33 * \author Jerome Glisse <j.glisse@gmail.com>
34 *
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
36 *
37 * \todo Depth write, WPOS/FOGC inputs
38 *
39 * \todo FogOption
40 *
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
42 * specific cases.
43 */
44
45 #include "glheader.h"
46 #include "macros.h"
47 #include "enums.h"
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
51
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
54 #include "r300_reg.h"
55 #include "r300_state.h"
56
57 /*
58 * Useful macros and values
59 */
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
64 } while(0)
65
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
67
68 #define R500_US_NUM_TEMP_REGS 128
69 #define R500_US_NUM_CONST_REGS 256
70
71 /* "Register" flags */
72 #define REG_CONSTANT (1 << 8)
73 #define REG_SRC_REL (1 << 9)
74 #define REG_DEST_REL (1 << 7)
75
76 /* Swizzle tools */
77 #define R500_SWIZZLE_ZERO 4
78 #define R500_SWIZZLE_HALF 5
79 #define R500_SWIZZLE_ONE 6
80 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
81 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
82 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
83 /* Swizzles for inst2 */
84 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
85 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
86 /* Swizzles for inst3 */
87 #define MAKE_SWIZ_RGB_A(x) (x << 2)
88 #define MAKE_SWIZ_RGB_B(x) (x << 15)
89 /* Swizzles for inst4 */
90 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
91 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
92 /* Swizzle for inst5 */
93 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
94 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
95
96 static void dump_program(struct r500_fragment_program *fp);
97
98 static inline GLuint make_rgb_swizzle(struct prog_src_register src) {
99 GLuint swiz = 0x0;
100 GLuint temp;
101 /* This could be optimized, but it should be plenty fast already. */
102 int i;
103 for (i = 0; i < 3; i++) {
104 temp = GET_SWZ(src.Swizzle, i);
105 /* Fix SWIZZLE_ONE */
106 if (temp == 5) temp++;
107 swiz += temp << i*3;
108 }
109 return swiz;
110 }
111
112 static inline GLuint make_alpha_swizzle(struct prog_src_register src) {
113 GLuint swiz = GET_SWZ(src.Swizzle, 3);
114
115 if (swiz == 5) swiz++;
116 return swiz;
117 }
118
119 static inline GLuint make_strq_swizzle(struct prog_src_register src) {
120 GLuint swiz = 0x0;
121 GLuint temp = src.Swizzle;
122 int i;
123 for (i = 0; i < 4; i++) {
124 swiz += (temp & 0x3) << i*2;
125 temp >>= 3;
126 }
127 return swiz;
128 }
129
130 static int get_temp(struct r500_fragment_program *fp, int slot) {
131
132 COMPILE_STATE;
133
134 int r = slot;
135
136 while (cs->inputs[r].refcount != 0) {
137 /* Crap, taken. */
138 r++;
139 }
140
141 fp->temp_reg_offset = r - slot;
142
143 if (r >= R500_US_NUM_TEMP_REGS) {
144 ERROR("Out of hardware temps!\n");
145 return 0;
146 }
147
148 if (r > fp->max_temp_idx)
149 fp->max_temp_idx = r;
150
151 return r;
152 }
153
154 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
155 static GLuint emit_const4fv(struct r500_fragment_program *fp,
156 const GLfloat * cp)
157 {
158 GLuint reg = 0x0;
159 int index;
160
161 for (index = 0; index < fp->const_nr; ++index) {
162 if (fp->constant[index] == cp)
163 break;
164 }
165
166 if (index >= fp->const_nr) {
167 if (index >= R500_US_NUM_CONST_REGS) {
168 ERROR("Out of hw constants!\n");
169 return reg;
170 }
171
172 fp->const_nr++;
173 fp->constant[index] = cp;
174 }
175
176 reg = index | REG_CONSTANT;
177 return reg;
178 }
179
180 static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) {
181 COMPILE_STATE;
182 GLuint reg;
183 switch (src.File) {
184 case PROGRAM_TEMPORARY:
185 reg = src.Index + fp->temp_reg_offset;
186 break;
187 case PROGRAM_INPUT:
188 reg = cs->inputs[src.Index].reg;
189 break;
190 case PROGRAM_STATE_VAR:
191 case PROGRAM_NAMED_PARAM:
192 case PROGRAM_CONSTANT:
193 reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters->
194 ParameterValues[src.Index]);
195 break;
196 default:
197 ERROR("Can't handle src.File %x\n", src.File);
198 reg = 0x0;
199 break;
200 }
201 return reg;
202 }
203
204 static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_register dest) {
205 GLuint reg;
206 switch (dest.File) {
207 case PROGRAM_TEMPORARY:
208 reg = dest.Index + fp->temp_reg_offset;
209 break;
210 case PROGRAM_OUTPUT:
211 /* Eventually we may need to handle multiple
212 * rendering targets... */
213 reg = dest.Index;
214 break;
215 default:
216 ERROR("Can't handle dest.File %x\n", dest.File);
217 reg = 0x0;
218 break;
219 }
220 return reg;
221 }
222
223 static void emit_tex(struct r500_fragment_program *fp,
224 struct prog_instruction *fpi, int opcode, int dest, int counter)
225 {
226 int hwsrc, hwdest;
227 GLuint mask;
228
229 mask = fpi->DstReg.WriteMask << 11;
230 hwsrc = make_src(fp, fpi->SrcReg[0]);
231
232 fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask
233 | R500_INST_TEX_SEM_WAIT;
234
235 fp->inst[counter].inst1 = R500_TEX_ID(fpi->TexSrcUnit)
236 | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
237
238 if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX)
239 fp->inst[counter].inst1 |= R500_TEX_UNSCALED;
240
241 switch (opcode) {
242 case OPCODE_KIL:
243 fp->inst[counter].inst1 |= R500_TEX_INST_TEXKILL;
244 break;
245 case OPCODE_TEX:
246 fp->inst[counter].inst1 |= R500_TEX_INST_LD;
247 break;
248 case OPCODE_TXB:
249 fp->inst[counter].inst1 |= R500_TEX_INST_LODBIAS;
250 break;
251 case OPCODE_TXP:
252 fp->inst[counter].inst1 |= R500_TEX_INST_PROJ;
253 break;
254 default:
255 ERROR("emit_tex can't handle opcode %x\n", opcode);
256 }
257
258 fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc)
259 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
260 | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G
261 | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A
262 | R500_TEX_DST_ADDR(dest)
263 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
264 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
265
266
267
268 fp->inst[counter].inst3 = 0x0;
269 fp->inst[counter].inst4 = 0x0;
270 fp->inst[counter].inst5 = 0x0;
271 }
272
273 static void dumb_shader(struct r500_fragment_program *fp)
274 {
275 fp->inst[0].inst0 = R500_INST_TYPE_TEX
276 | R500_INST_TEX_SEM_WAIT
277 | R500_INST_RGB_WMASK_R
278 | R500_INST_RGB_WMASK_G
279 | R500_INST_RGB_WMASK_B
280 | R500_INST_ALPHA_WMASK
281 | R500_INST_RGB_CLAMP
282 | R500_INST_ALPHA_CLAMP;
283 fp->inst[0].inst1 = R500_TEX_ID(0)
284 | R500_TEX_INST_LD
285 | R500_TEX_SEM_ACQUIRE
286 | R500_TEX_IGNORE_UNCOVERED;
287 fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0)
288 | R500_TEX_SRC_S_SWIZ_R
289 | R500_TEX_SRC_T_SWIZ_G
290 | R500_TEX_DST_ADDR(0)
291 | R500_TEX_DST_R_SWIZ_R
292 | R500_TEX_DST_G_SWIZ_G
293 | R500_TEX_DST_B_SWIZ_B
294 | R500_TEX_DST_A_SWIZ_A;
295 fp->inst[0].inst3 = R500_DX_ADDR(0)
296 | R500_DX_S_SWIZ_R
297 | R500_DX_T_SWIZ_R
298 | R500_DX_R_SWIZ_R
299 | R500_DX_Q_SWIZ_R
300 | R500_DY_ADDR(0)
301 | R500_DY_S_SWIZ_R
302 | R500_DY_T_SWIZ_R
303 | R500_DY_R_SWIZ_R
304 | R500_DY_Q_SWIZ_R;
305 fp->inst[0].inst4 = 0x0;
306 fp->inst[0].inst5 = 0x0;
307
308 fp->inst[1].inst0 = R500_INST_TYPE_OUT |
309 R500_INST_TEX_SEM_WAIT |
310 R500_INST_LAST |
311 R500_INST_RGB_OMASK_R |
312 R500_INST_RGB_OMASK_G |
313 R500_INST_RGB_OMASK_B |
314 R500_INST_ALPHA_OMASK;
315 fp->inst[1].inst1 = R500_RGB_ADDR0(0) |
316 R500_RGB_ADDR1(0) |
317 R500_RGB_ADDR1_CONST |
318 R500_RGB_ADDR2(0) |
319 R500_RGB_ADDR2_CONST |
320 R500_RGB_SRCP_OP_1_MINUS_2RGB0;
321 fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) |
322 R500_ALPHA_ADDR1(0) |
323 R500_ALPHA_ADDR1_CONST |
324 R500_ALPHA_ADDR2(0) |
325 R500_ALPHA_ADDR2_CONST |
326 R500_ALPHA_SRCP_OP_1_MINUS_2A0;
327 fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 |
328 R500_ALU_RGB_R_SWIZ_A_R |
329 R500_ALU_RGB_G_SWIZ_A_G |
330 R500_ALU_RGB_B_SWIZ_A_B |
331 R500_ALU_RGB_SEL_B_SRC0 |
332 R500_ALU_RGB_R_SWIZ_B_1 |
333 R500_ALU_RGB_B_SWIZ_B_1 |
334 R500_ALU_RGB_G_SWIZ_B_1;
335 fp->inst[1].inst4 = R500_ALPHA_OP_MAD |
336 R500_ALPHA_SWIZ_A_A |
337 R500_ALPHA_SWIZ_B_1;
338 fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD |
339 R500_ALU_RGBA_R_SWIZ_0 |
340 R500_ALU_RGBA_G_SWIZ_0 |
341 R500_ALU_RGBA_B_SWIZ_0 |
342 R500_ALU_RGBA_A_SWIZ_0;
343
344 fp->cs->nrslots = 2;
345 fp->translated = GL_TRUE;
346 }
347
348 static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi) {
349 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
350 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
351 /* output_mask */
352 | (fpi->DstReg.WriteMask << 15);
353 } else {
354 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
355 /* pixel_mask */
356 | (fpi->DstReg.WriteMask << 11);
357 }
358
359 fp->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT;
360 }
361
362 static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_src_register src, GLuint dest) {
363 /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
364 * it is technically more accurate and recommended by ATI/AMD. */
365 GLuint src_reg = make_src(fp, src);
366 fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg);
367 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg);
368 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
369 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(src))
370 | R500_ALU_RGB_SEL_B_SRC0
371 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src))
372 | R500_ALU_RGB_OMOD_DISABLE;
373 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
374 | R500_ALPHA_ADDRD(dest)
375 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src))
376 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src))
377 | R500_ALPHA_OMOD_DISABLE;
378 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
379 | R500_ALU_RGBA_ADDRD(dest)
380 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
381 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
382 }
383
384 static GLboolean parse_program(struct r500_fragment_program *fp)
385 {
386 struct gl_fragment_program *mp = &fp->mesa_program;
387 const struct prog_instruction *inst = mp->Base.Instructions;
388 struct prog_instruction *fpi;
389 GLuint src[3], dest, temp[2];
390 int flags, pixel_mask = 0, output_mask = 0, counter = 0;
391
392 if (!inst || inst[0].Opcode == OPCODE_END) {
393 ERROR("The program is empty!\n");
394 return GL_FALSE;
395 }
396
397 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
398
399 if (fpi->Opcode != OPCODE_KIL) {
400 dest = make_dest(fp, fpi->DstReg);
401
402 pixel_mask = fpi->DstReg.WriteMask << 11;
403 output_mask = fpi->DstReg.WriteMask << 15;
404 }
405
406 switch (fpi->Opcode) {
407 case OPCODE_ABS:
408 emit_alu(fp, counter, fpi);
409 emit_mov(fp, counter, fpi->SrcReg[0], dest);
410 fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS
411 | R500_ALU_RGB_MOD_B_ABS;
412 fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS
413 | R500_ALPHA_MOD_B_ABS;
414 break;
415 case OPCODE_ADD:
416 src[0] = make_src(fp, fpi->SrcReg[0]);
417 src[1] = make_src(fp, fpi->SrcReg[1]);
418 /* Variation on MAD: 1*src0+src1 */
419 emit_alu(fp, counter, fpi);
420 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
421 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(0);
422 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
423 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(0);
424 fp->inst[counter].inst3 = /* 1 */
425 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
426 | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
427 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
428 | R500_ALPHA_ADDRD(dest)
429 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
430 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
431 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
432 | R500_ALU_RGBA_ADDRD(dest)
433 | R500_ALU_RGBA_SEL_C_SRC1
434 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
435 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
436 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]));
437 break;
438 case OPCODE_CMP:
439 /* This inst's selects need to be swapped as follows:
440 * 0 -> C ; 1 -> B ; 2 -> A */
441 src[0] = make_src(fp, fpi->SrcReg[0]);
442 src[1] = make_src(fp, fpi->SrcReg[1]);
443 src[2] = make_src(fp, fpi->SrcReg[2]);
444 emit_alu(fp, counter, fpi);
445 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[2])
446 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[0]);
447 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[2])
448 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[0]);
449 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
450 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[2]))
451 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
452 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
453 | R500_ALPHA_ADDRD(dest)
454 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[2]))
455 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
456 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
457 | R500_ALU_RGBA_ADDRD(dest)
458 | R500_ALU_RGBA_SEL_C_SRC2
459 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
460 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
461 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
462 break;
463 case OPCODE_COS:
464 src[0] = make_src(fp, fpi->SrcReg[0]);
465 emit_alu(fp, counter, fpi);
466 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
467 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
468 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
469 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
470 fp->inst[counter].inst4 = R500_ALPHA_OP_COS
471 | R500_ALPHA_ADDRD(dest)
472 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
473 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
474 | R500_ALU_RGBA_ADDRD(dest);
475 break;
476 case OPCODE_DP3:
477 src[0] = make_src(fp, fpi->SrcReg[0]);
478 src[1] = make_src(fp, fpi->SrcReg[1]);
479 emit_alu(fp, counter, fpi);
480 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
481 | R500_RGB_ADDR1(src[1]);
482 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
483 | R500_ALPHA_ADDR1(src[1]);
484 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
485 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
486 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
487 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
488 | R500_ALPHA_ADDRD(dest)
489 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
490 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
491 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3
492 | R500_ALU_RGBA_ADDRD(dest);
493 break;
494 case OPCODE_DP4:
495 src[0] = make_src(fp, fpi->SrcReg[0]);
496 src[1] = make_src(fp, fpi->SrcReg[1]);
497 /* Based on DP3 */
498 emit_alu(fp, counter, fpi);
499 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
500 | R500_RGB_ADDR1(src[1]);
501 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
502 | R500_ALPHA_ADDR1(src[1]);
503 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
504 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
505 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
506 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
507 | R500_ALPHA_ADDRD(dest)
508 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
509 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
510 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
511 | R500_ALU_RGBA_ADDRD(dest);
512 break;
513 case OPCODE_DPH:
514 src[0] = make_src(fp, fpi->SrcReg[0]);
515 src[1] = make_src(fp, fpi->SrcReg[1]);
516 /* Based on DP3 */
517 emit_alu(fp, counter, fpi);
518 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
519 | R500_RGB_ADDR1(src[1]);
520 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
521 | R500_ALPHA_ADDR1(src[1]);
522 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
523 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
524 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
525 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
526 | R500_ALPHA_ADDRD(dest)
527 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
528 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
529 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
530 | R500_ALU_RGBA_ADDRD(dest);
531 break;
532 case OPCODE_EX2:
533 src[0] = make_src(fp, fpi->SrcReg[0]);
534 emit_alu(fp, counter, fpi);
535 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
536 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
537 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
538 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
539 fp->inst[counter].inst4 = R500_ALPHA_OP_EX2
540 | R500_ALPHA_ADDRD(dest)
541 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
542 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
543 | R500_ALU_RGBA_ADDRD(dest);
544 break;
545 case OPCODE_FRC:
546 src[0] = make_src(fp, fpi->SrcReg[0]);
547 emit_alu(fp, counter, fpi);
548 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
549 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
550 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
551 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
552 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
553 | R500_ALPHA_ADDRD(dest)
554 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
555 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
556 | R500_ALU_RGBA_ADDRD(dest);
557 break;
558 case OPCODE_KIL:
559 emit_tex(fp, fpi, OPCODE_KIL, dest, counter);
560 break;
561 case OPCODE_LG2:
562 src[0] = make_src(fp, fpi->SrcReg[0]);
563 emit_alu(fp, counter, fpi);
564 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
565 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
566 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
567 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
568 fp->inst[counter].inst4 = R500_ALPHA_OP_LN2
569 | R500_ALPHA_ADDRD(dest)
570 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
571 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
572 | R500_ALU_RGBA_ADDRD(dest);
573 break;
574 case OPCODE_MAD:
575 src[0] = make_src(fp, fpi->SrcReg[0]);
576 src[1] = make_src(fp, fpi->SrcReg[1]);
577 src[2] = make_src(fp, fpi->SrcReg[2]);
578 emit_alu(fp, counter, fpi);
579 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
580 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]);
581 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
582 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]);
583 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
584 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
585 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
586 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
587 | R500_ALPHA_ADDRD(dest)
588 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
589 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
590 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
591 | R500_ALU_RGBA_ADDRD(dest)
592 | R500_ALU_RGBA_SEL_C_SRC2
593 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
594 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
595 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
596 break;
597 case OPCODE_MAX:
598 src[0] = make_src(fp, fpi->SrcReg[0]);
599 src[1] = make_src(fp, fpi->SrcReg[1]);
600 emit_alu(fp, counter, fpi);
601 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
602 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
603 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
604 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
605 | R500_ALU_RGB_SEL_B_SRC1
606 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
607 fp->inst[counter].inst4 = R500_ALPHA_OP_MAX
608 | R500_ALPHA_ADDRD(dest)
609 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
610 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
611 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
612 | R500_ALU_RGBA_ADDRD(dest);
613 break;
614 case OPCODE_MIN:
615 src[0] = make_src(fp, fpi->SrcReg[0]);
616 src[1] = make_src(fp, fpi->SrcReg[1]);
617 emit_alu(fp, counter, fpi);
618 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
619 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
620 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
621 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
622 | R500_ALU_RGB_SEL_B_SRC1
623 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
624 fp->inst[counter].inst4 = R500_ALPHA_OP_MIN
625 | R500_ALPHA_ADDRD(dest)
626 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
627 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
628 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN
629 | R500_ALU_RGBA_ADDRD(dest);
630 break;
631 case OPCODE_MOV:
632 emit_alu(fp, counter, fpi);
633 emit_mov(fp, counter, fpi->SrcReg[0], dest);
634 break;
635 case OPCODE_MUL:
636 src[0] = make_src(fp, fpi->SrcReg[0]);
637 src[1] = make_src(fp, fpi->SrcReg[1]);
638 /* Variation on MAD: src0*src1+0 */
639 emit_alu(fp, counter, fpi);
640 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
641 | R500_RGB_ADDR1(src[1]);
642 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
643 | R500_ALPHA_ADDR1(src[1]);
644 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
645 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
646 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
647 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
648 | R500_ALPHA_ADDRD(dest)
649 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
650 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
651 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
652 | R500_ALU_RGBA_ADDRD(dest)
653 // | R500_ALU_RGBA_SEL_C_SRC2
654 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
655 // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
656 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
657 break;
658 case OPCODE_RCP:
659 src[0] = make_src(fp, fpi->SrcReg[0]);
660 emit_alu(fp, counter, fpi);
661 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
662 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
663 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
664 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
665 fp->inst[counter].inst4 = R500_ALPHA_OP_RCP
666 | R500_ALPHA_ADDRD(dest)
667 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
668 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
669 | R500_ALU_RGBA_ADDRD(dest);
670 break;
671 case OPCODE_RSQ:
672 src[0] = make_src(fp, fpi->SrcReg[0]);
673 emit_alu(fp, counter, fpi);
674 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
675 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
676 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
677 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
678 fp->inst[counter].inst4 = R500_ALPHA_OP_RSQ
679 | R500_ALPHA_ADDRD(dest)
680 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
681 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
682 | R500_ALU_RGBA_ADDRD(dest);
683 break;
684 case OPCODE_SCS:
685 /* TODO: Make this elegant! */
686 /* Do a cosine, then a sine, masking out the channels we want to protect. */
687 src[0] = make_src(fp, fpi->SrcReg[0]);
688 /* Cosine only goes in R (x) channel. */
689 fpi->DstReg.WriteMask = 0x1;
690 emit_alu(fp, counter, fpi);
691 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
692 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
693 | R500_INST_TEX_SEM_WAIT | 0x1 << 14;
694 } else {
695 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
696 | R500_INST_TEX_SEM_WAIT | 0x1 << 11;
697 }
698 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
699 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
700 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
701 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
702 fp->inst[counter].inst4 = R500_ALPHA_OP_COS
703 | R500_ALPHA_ADDRD(dest)
704 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
705 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
706 | R500_ALU_RGBA_ADDRD(dest);
707 counter++;
708 /* Sine only goes in G (y) channel. */
709 fpi->DstReg.WriteMask = 0x2;
710 emit_alu(fp, counter, fpi);
711 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
712 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
713 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
714 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
715 fp->inst[counter].inst4 = R500_ALPHA_OP_SIN
716 | R500_ALPHA_ADDRD(dest)
717 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
718 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
719 | R500_ALU_RGBA_ADDRD(dest);
720 counter++;
721 /* Put 0 into B,A (z,w) channels. */
722 fpi->DstReg.WriteMask = 0xC;
723 emit_alu(fp, counter, fpi);
724 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
725 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
726 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
727 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
728 | R500_ALU_RGB_SEL_B_SRC0
729 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
730 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
731 | R500_ALPHA_ADDRD(dest)
732 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO)
733 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
734 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
735 | R500_ALU_RGBA_ADDRD(dest)
736 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
737 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
738 break;
739 case OPCODE_SIN:
740 src[0] = make_src(fp, fpi->SrcReg[0]);
741 emit_alu(fp, counter, fpi);
742 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
743 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
744 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
745 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
746 fp->inst[counter].inst4 = R500_ALPHA_OP_SIN
747 | R500_ALPHA_ADDRD(dest)
748 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
749 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
750 | R500_ALU_RGBA_ADDRD(dest);
751 break;
752 case OPCODE_SUB:
753 src[0] = make_src(fp, fpi->SrcReg[0]);
754 src[1] = make_src(fp, fpi->SrcReg[1]);
755 /* Variation on MAD: 1*src0-src1 */
756 emit_alu(fp, counter, fpi);
757 fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
758 | R500_RGB_ADDR2(src[1]);
759 fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
760 | R500_ALPHA_ADDR2(src[1]);
761 fp->inst[counter].inst3 = /* 1 */
762 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
763 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
764 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
765 | R500_ALPHA_ADDRD(dest)
766 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
767 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
768 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
769 | R500_ALU_RGBA_ADDRD(dest)
770 | R500_ALU_RGBA_SEL_C_SRC2
771 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
772 | R500_ALU_RGBA_MOD_C_NEG
773 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
774 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
775 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
776 break;
777 case OPCODE_SWZ:
778 /* TODO: Negation masks! */
779 emit_alu(fp, counter, fpi);
780 emit_mov(fp, counter, fpi->SrcReg[0], dest);
781 break;
782 case OPCODE_TEX:
783 emit_tex(fp, fpi, OPCODE_TEX, dest, counter);
784 break;
785 case OPCODE_TXB:
786 emit_tex(fp, fpi, OPCODE_TXB, dest, counter);
787 break;
788 case OPCODE_TXP:
789 emit_tex(fp, fpi, OPCODE_TXP, dest, counter);
790 break;
791 default:
792 ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
793 break;
794 }
795
796 /* Finishing touches */
797 if (fpi->SaturateMode == SATURATE_ZERO_ONE) {
798 fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
799 }
800
801 counter++;
802
803 if (fp->error)
804 return GL_FALSE;
805
806 }
807
808 /* Finish him! (If it's an ALU/OUT instruction...) */
809 if ((fp->inst[counter-1].inst0 & 0x3) == 1) {
810 fp->inst[counter-1].inst0 |= R500_INST_LAST;
811 } else {
812 /* We still need to put an output inst, right? */
813 WARN_ONCE("Final FP instruction is not an OUT.\n");
814 #if 0
815 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
816 | R500_INST_TEX_SEM_WAIT | R500_INST_LAST |
817 output_mask;
818 fp->inst[counter].inst1 = R500_RGB_ADDR0(dest);
819 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(dest);
820 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
821 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
822 | R500_ALU_RGB_SEL_B_SRC0
823 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
824 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
825 | R500_ALPHA_ADDRD(0)
826 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SEL_B_SRC0
827 | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_1;
828 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
829 | R500_ALU_RGBA_ADDRD(0)
830 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
831 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
832 counter++;
833 #endif
834 }
835
836 fp->cs->nrslots = counter;
837
838 fp->max_temp_idx++;
839
840 return GL_TRUE;
841 }
842
843 static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp)
844 {
845 struct r300_pfs_compile_state *cs = NULL;
846 struct gl_fragment_program *mp = &fp->mesa_program;
847 struct prog_instruction *fpi;
848 GLuint InputsRead = mp->Base.InputsRead;
849 GLuint temps_used = 0; /* for fp->temps[] */
850 int i, j;
851
852 /* New compile, reset tracking data */
853 fp->optimization =
854 driQueryOptioni(&r300->radeon.optionCache, "fp_optimization");
855 fp->translated = GL_FALSE;
856 fp->error = GL_FALSE;
857 fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile);
858 fp->cur_node = 0;
859 fp->first_node_has_tex = 0;
860 fp->const_nr = 0;
861 /* Size of pixel stack, plus 1. */
862 fp->max_temp_idx = 1;
863 /* Temp register offset. */
864 fp->temp_reg_offset = 0;
865 fp->node[0].alu_end = -1;
866 fp->node[0].tex_end = -1;
867
868 _mesa_memset(cs, 0, sizeof(*fp->cs));
869 for (i = 0; i < PFS_MAX_ALU_INST; i++) {
870 for (j = 0; j < 3; j++) {
871 cs->slot[i].vsrc[j] = SRC_CONST;
872 cs->slot[i].ssrc[j] = SRC_CONST;
873 }
874 }
875
876 /* Work out what temps the Mesa inputs correspond to, this must match
877 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
878 * configures itself based on the fragprog's InputsRead
879 *
880 * NOTE: this depends on get_hw_temp() allocating registers in order,
881 * starting from register 0, so we're just going to do that instead.
882 */
883
884 /* Texcoords come first */
885 for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) {
886 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
887 cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0;
888 cs->inputs[FRAG_ATTRIB_TEX0 + i].reg =
889 fp->temp_reg_offset;
890 fp->temp_reg_offset++;
891 }
892 }
893 InputsRead &= ~FRAG_BITS_TEX_ANY;
894
895 /* fragment position treated as a texcoord */
896 if (InputsRead & FRAG_BIT_WPOS) {
897 cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
898 cs->inputs[FRAG_ATTRIB_WPOS].reg =
899 fp->temp_reg_offset;
900 fp->temp_reg_offset++;
901 }
902 InputsRead &= ~FRAG_BIT_WPOS;
903
904 /* Then primary colour */
905 if (InputsRead & FRAG_BIT_COL0) {
906 cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
907 cs->inputs[FRAG_ATTRIB_COL0].reg =
908 fp->temp_reg_offset;
909 fp->temp_reg_offset++;
910 }
911 InputsRead &= ~FRAG_BIT_COL0;
912
913 /* Secondary color */
914 if (InputsRead & FRAG_BIT_COL1) {
915 cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
916 cs->inputs[FRAG_ATTRIB_COL1].reg =
917 fp->temp_reg_offset;
918 fp->temp_reg_offset++;
919 }
920 InputsRead &= ~FRAG_BIT_COL1;
921
922 /* Anything else */
923 if (InputsRead) {
924 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
925 /* force read from hwreg 0 for now */
926 for (i = 0; i < 32; i++)
927 if (InputsRead & (1 << i))
928 cs->inputs[i].reg = 0;
929 }
930
931 /* Pre-parse the mesa program, grabbing refcounts on input/temp regs.
932 * That way, we can free up the reg when it's no longer needed
933 */
934 if (!mp->Base.Instructions) {
935 ERROR("No instructions found in program, going to go die now.\n");
936 return;
937 }
938
939 #if 0
940 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
941 int idx;
942 for (i = 0; i < 3; i++) {
943 idx = fpi->SrcReg[i].Index;
944 if (fpi->SrcReg[i].File == PROGRAM_INPUT) {
945 cs->inputs[idx].refcount++;
946 if (fp->max_temp_idx < idx)
947 fp->max_temp_idx = idx;
948 }
949 }
950 }
951 #endif
952
953 fp->max_temp_idx = fp->temp_reg_offset + 1;
954
955 cs->temp_in_use = temps_used;
956 }
957
958 static void update_params(struct r500_fragment_program *fp)
959 {
960 struct gl_fragment_program *mp = &fp->mesa_program;
961
962 /* Ask Mesa nicely to fill in ParameterValues for us */
963 if (mp->Base.Parameters)
964 _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters);
965 }
966
967 void r500TranslateFragmentShader(r300ContextPtr r300,
968 struct r500_fragment_program *fp)
969 {
970
971 struct r300_pfs_compile_state *cs = NULL;
972
973 if (!fp->translated) {
974
975 /* I need to see what I'm working with! */
976 fprintf(stderr, "Mesa program:\n");
977 fprintf(stderr, "-------------\n");
978 _mesa_print_program(&fp->mesa_program.Base);
979 fflush(stdout);
980
981 init_program(r300, fp);
982 cs = fp->cs;
983
984 if (parse_program(fp) == GL_FALSE) {
985 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
986 dumb_shader(fp);
987 fp->inst_offset = 0;
988 fp->inst_end = cs->nrslots - 1;
989 return;
990 }
991 fp->inst_offset = 0;
992 fp->inst_end = cs->nrslots - 1;
993
994 fp->translated = GL_TRUE;
995 if (RADEON_DEBUG & DEBUG_PIXEL)
996 dump_program(fp);
997
998 r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM);
999 }
1000
1001 update_params(fp);
1002
1003 }
1004
1005 static char *toswiz(int swiz_val) {
1006 switch(swiz_val) {
1007 case 0: return "R";
1008 case 1: return "G";
1009 case 2: return "B";
1010 case 3: return "A";
1011 case 4: return "0";
1012 case 5: return "1/2";
1013 case 6: return "1";
1014 case 7: return "U";
1015 }
1016 return NULL;
1017 }
1018
1019 static char *toop(int op_val)
1020 {
1021 char *str;
1022 switch (op_val) {
1023 case 0: str = "MAD"; break;
1024 case 1: str = "DP3"; break;
1025 case 2: str = "DP4"; break;
1026 case 3: str = "D2A"; break;
1027 case 4: str = "MIN"; break;
1028 case 5: str = "MAX"; break;
1029 case 6: str = "Reserved"; break;
1030 case 7: str = "CND"; break;
1031 case 8: str = "CMP"; break;
1032 case 9: str = "FRC"; break;
1033 case 10: str = "SOP"; break;
1034 case 11: str = "MDH"; break;
1035 case 12: str = "MDV"; break;
1036 }
1037 return str;
1038 }
1039
1040 static char *to_alpha_op(int op_val)
1041 {
1042 char *str = NULL;
1043 switch (op_val) {
1044 case 0: str = "MAD"; break;
1045 case 1: str = "DP"; break;
1046 case 2: str = "MIN"; break;
1047 case 3: str = "MAX"; break;
1048 case 4: str = "Reserved"; break;
1049 case 5: str = "CND"; break;
1050 case 6: str = "CMP"; break;
1051 case 7: str = "FRC"; break;
1052 case 8: str = "EX2"; break;
1053 case 9: str = "LN2"; break;
1054 case 10: str = "RCP"; break;
1055 case 11: str = "RSQ"; break;
1056 case 12: str = "SIN"; break;
1057 case 13: str = "COS"; break;
1058 case 14: str = "MDH"; break;
1059 case 15: str = "MDV"; break;
1060 }
1061 return str;
1062 }
1063
1064 static char *to_mask(int val)
1065 {
1066 char *str = NULL;
1067 switch(val) {
1068 case 0: str = "NONE"; break;
1069 case 1: str = "R"; break;
1070 case 2: str = "G"; break;
1071 case 3: str = "RG"; break;
1072 case 4: str = "B"; break;
1073 case 5: str = "RB"; break;
1074 case 6: str = "GB"; break;
1075 case 7: str = "RGB"; break;
1076 case 8: str = "A"; break;
1077 case 9: str = "AR"; break;
1078 case 10: str = "AG"; break;
1079 case 11: str = "ARG"; break;
1080 case 12: str = "AB"; break;
1081 case 13: str = "ARB"; break;
1082 case 14: str = "AGB"; break;
1083 case 15: str = "ARGB"; break;
1084 }
1085 return str;
1086 }
1087
1088 static void dump_program(struct r500_fragment_program *fp)
1089 {
1090 int pc = 0;
1091 int n;
1092 uint32_t inst;
1093 uint32_t inst0;
1094 char *str = NULL;
1095
1096 for (n = 0; n < fp->inst_end+1; n++) {
1097 inst0 = inst = fp->inst[n].inst0;
1098 fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
1099 switch(inst & 0x3) {
1100 case R500_INST_TYPE_ALU: str = "ALU"; break;
1101 case R500_INST_TYPE_OUT: str = "OUT"; break;
1102 case R500_INST_TYPE_FC: str = "FC"; break;
1103 case R500_INST_TYPE_TEX: str = "TEX"; break;
1104 };
1105 fprintf(stderr,"%s %s %s %s %s ", str,
1106 inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
1107 inst & R500_INST_LAST ? "LAST" : "",
1108 inst & R500_INST_NOP ? "NOP" : "",
1109 inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
1110 fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
1111 to_mask((inst >> 15) & 0xf));
1112
1113 switch(inst0 & 0x3) {
1114 case 0:
1115 case 1:
1116 fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", fp->inst[n].inst1);
1117 inst = fp->inst[n].inst1;
1118
1119 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1120 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1121 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1122 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1123 (inst >> 30));
1124
1125 fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", fp->inst[n].inst2);
1126 inst = fp->inst[n].inst2;
1127 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1128 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1129 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1130 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1131 (inst >> 30));
1132 fprintf(stderr,"\t3 RGB_INST: 0x%08x:", fp->inst[n].inst3);
1133 inst = fp->inst[n].inst3;
1134 fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
1135 (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
1136 (inst >> 11) & 0x3,
1137 (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
1138 (inst >> 24) & 0x3);
1139
1140
1141 fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", fp->inst[n].inst4);
1142 inst = fp->inst[n].inst4;
1143 fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_b_src:%d %s %d\n", to_alpha_op(inst & 0xf),
1144 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1145 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
1146 (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3);
1147
1148 fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", fp->inst[n].inst5);
1149 inst = fp->inst[n].inst5;
1150 fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
1151 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1152 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
1153 (inst >> 23) & 0x3,
1154 (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
1155 break;
1156 case 2:
1157 break;
1158 case 3:
1159 fprintf(stderr,"1: TEX INST 0x%08x\n", fp->inst[n].inst1);
1160 fprintf(stderr,"2: TEX ADDR 0x%08x\n", fp->inst[n].inst2);
1161 fprintf(stderr,"2: TEX ADDR DXDY 0x%08x\n", fp->inst[n].inst3);
1162 break;
1163 }
1164 fprintf(stderr,"\n");
1165 }
1166
1167 }