d0675f6eb3a9d44124938cef3e211bc39e1a95a5
[mesa.git] / src / mesa / drivers / dri / r300 / r500_fragprog.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 *
31 * \author Ben Skeggs <darktama@iinet.net.au>
32 *
33 * \author Jerome Glisse <j.glisse@gmail.com>
34 *
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
36 *
37 * \todo Depth write, WPOS/FOGC inputs
38 *
39 * \todo FogOption
40 *
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
42 * specific cases.
43 */
44
45 #include "glheader.h"
46 #include "macros.h"
47 #include "enums.h"
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
51
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
54 #include "r300_reg.h"
55 #include "r300_state.h"
56
57 /*
58 * Useful macros and values
59 */
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
64 } while(0)
65
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
67
68 #define R500_US_NUM_TEMP_REGS 128
69 #define R500_US_NUM_CONST_REGS 256
70
71 /* "Register" flags */
72 #define REG_CONSTANT (1 << 8)
73 #define REG_SRC_REL (1 << 9)
74 #define REG_DEST_REL (1 << 7)
75
76 /* Swizzle tools */
77 #define R500_SWIZZLE_ZERO 4
78 #define R500_SWIZZLE_HALF 5
79 #define R500_SWIZZLE_ONE 6
80 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
81 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
82 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
83 /* Swizzles for inst2 */
84 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
85 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
86 /* Swizzles for inst3 */
87 #define MAKE_SWIZ_RGB_A(x) (x << 2)
88 #define MAKE_SWIZ_RGB_B(x) (x << 15)
89 /* Swizzles for inst4 */
90 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
91 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
92 /* Swizzle for inst5 */
93 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
94 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
95
96 static void dump_program(struct r500_fragment_program *fp);
97
98 static inline GLuint make_rgb_swizzle(struct prog_src_register src) {
99 GLuint swiz = 0x0;
100 GLuint temp;
101 /* This could be optimized, but it should be plenty fast already. */
102 int i;
103 for (i = 0; i < 3; i++) {
104 temp = (src.Swizzle >> i*3) & 0x7;
105 /* Fix SWIZZLE_ONE */
106 if (temp == 5) temp++;
107 swiz += temp << i*3;
108 }
109 return swiz;
110 }
111
112 static inline GLuint make_alpha_swizzle(struct prog_src_register src) {
113 GLuint swiz = (src.Swizzle >> 12) & 0x7;
114 if (swiz == 5) swiz++;
115 return swiz;
116 }
117
118 static inline GLuint make_strq_swizzle(struct prog_src_register src) {
119 GLuint swiz = 0x0;
120 GLuint temp = src.Swizzle;
121 int i;
122 for (i = 0; i < 4; i++) {
123 swiz += (temp & 0x3) << i*2;
124 temp >>= 3;
125 }
126 return swiz;
127 }
128
129 static int get_temp(struct r500_fragment_program *fp, int slot) {
130
131 COMPILE_STATE;
132
133 int r = slot;
134
135 while (cs->inputs[r].refcount != 0) {
136 /* Crap, taken. */
137 r++;
138 }
139
140 fp->temp_reg_offset = r - slot;
141
142 if (r >= R500_US_NUM_TEMP_REGS) {
143 ERROR("Out of hardware temps!\n");
144 return 0;
145 }
146
147 if (r > fp->max_temp_idx)
148 fp->max_temp_idx = r;
149
150 return r;
151 }
152
153 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
154 static GLuint emit_const4fv(struct r500_fragment_program *fp,
155 const GLfloat * cp)
156 {
157 GLuint reg = 0x0;
158 int index;
159
160 for (index = 0; index < fp->const_nr; ++index) {
161 if (fp->constant[index] == cp)
162 break;
163 }
164
165 if (index >= fp->const_nr) {
166 if (index >= R500_US_NUM_CONST_REGS) {
167 ERROR("Out of hw constants!\n");
168 return reg;
169 }
170
171 fp->const_nr++;
172 fp->constant[index] = cp;
173 }
174
175 reg = index | REG_CONSTANT;
176 return reg;
177 }
178
179 static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) {
180 COMPILE_STATE;
181 GLuint reg;
182 switch (src.File) {
183 case PROGRAM_TEMPORARY:
184 reg = src.Index + fp->temp_reg_offset;
185 break;
186 case PROGRAM_INPUT:
187 reg = cs->inputs[src.Index].reg;
188 break;
189 case PROGRAM_STATE_VAR:
190 case PROGRAM_NAMED_PARAM:
191 case PROGRAM_CONSTANT:
192 reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters->
193 ParameterValues[src.Index]);
194 break;
195 default:
196 ERROR("Can't handle src.File %x\n", src.File);
197 reg = 0x0;
198 break;
199 }
200 return reg;
201 }
202
203 static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_register dest) {
204 GLuint reg;
205 switch (dest.File) {
206 case PROGRAM_TEMPORARY:
207 reg = dest.Index + fp->temp_reg_offset;
208 break;
209 case PROGRAM_OUTPUT:
210 /* Eventually we may need to handle multiple
211 * rendering targets... */
212 reg = dest.Index;
213 break;
214 default:
215 ERROR("Can't handle dest.File %x\n", dest.File);
216 reg = 0x0;
217 break;
218 }
219 return reg;
220 }
221
222 static void emit_tex(struct r500_fragment_program *fp,
223 struct prog_instruction *fpi, int opcode, int dest, int counter)
224 {
225 int hwsrc, hwdest;
226 GLuint mask;
227
228 mask = fpi->DstReg.WriteMask << 11;
229 hwsrc = make_src(fp, fpi->SrcReg[0]);
230
231 fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask
232 | R500_INST_TEX_SEM_WAIT;
233
234 fp->inst[counter].inst1 = R500_TEX_ID(fpi->TexSrcUnit)
235 | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
236
237 if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX)
238 fp->inst[counter].inst1 |= R500_TEX_UNSCALED;
239
240 switch (opcode) {
241 case OPCODE_KIL:
242 fp->inst[counter].inst1 |= R500_TEX_INST_TEXKILL;
243 break;
244 case OPCODE_TEX:
245 fp->inst[counter].inst1 |= R500_TEX_INST_LD;
246 break;
247 case OPCODE_TXB:
248 fp->inst[counter].inst1 |= R500_TEX_INST_LODBIAS;
249 break;
250 case OPCODE_TXP:
251 fp->inst[counter].inst1 |= R500_TEX_INST_PROJ;
252 break;
253 default:
254 ERROR("emit_tex can't handle opcode %x\n", opcode);
255 }
256
257 fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc)
258 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
259 | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G
260 | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A
261 | R500_TEX_DST_ADDR(dest)
262 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
263 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
264
265
266
267 fp->inst[counter].inst3 = 0x0;
268 fp->inst[counter].inst4 = 0x0;
269 fp->inst[counter].inst5 = 0x0;
270 }
271
272 static void dumb_shader(struct r500_fragment_program *fp)
273 {
274 fp->inst[0].inst0 = R500_INST_TYPE_TEX
275 | R500_INST_TEX_SEM_WAIT
276 | R500_INST_RGB_WMASK_R
277 | R500_INST_RGB_WMASK_G
278 | R500_INST_RGB_WMASK_B
279 | R500_INST_ALPHA_WMASK
280 | R500_INST_RGB_CLAMP
281 | R500_INST_ALPHA_CLAMP;
282 fp->inst[0].inst1 = R500_TEX_ID(0)
283 | R500_TEX_INST_LD
284 | R500_TEX_SEM_ACQUIRE
285 | R500_TEX_IGNORE_UNCOVERED;
286 fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0)
287 | R500_TEX_SRC_S_SWIZ_R
288 | R500_TEX_SRC_T_SWIZ_G
289 | R500_TEX_DST_ADDR(0)
290 | R500_TEX_DST_R_SWIZ_R
291 | R500_TEX_DST_G_SWIZ_G
292 | R500_TEX_DST_B_SWIZ_B
293 | R500_TEX_DST_A_SWIZ_A;
294 fp->inst[0].inst3 = R500_DX_ADDR(0)
295 | R500_DX_S_SWIZ_R
296 | R500_DX_T_SWIZ_R
297 | R500_DX_R_SWIZ_R
298 | R500_DX_Q_SWIZ_R
299 | R500_DY_ADDR(0)
300 | R500_DY_S_SWIZ_R
301 | R500_DY_T_SWIZ_R
302 | R500_DY_R_SWIZ_R
303 | R500_DY_Q_SWIZ_R;
304 fp->inst[0].inst4 = 0x0;
305 fp->inst[0].inst5 = 0x0;
306
307 fp->inst[1].inst0 = R500_INST_TYPE_OUT |
308 R500_INST_TEX_SEM_WAIT |
309 R500_INST_LAST |
310 R500_INST_RGB_OMASK_R |
311 R500_INST_RGB_OMASK_G |
312 R500_INST_RGB_OMASK_B |
313 R500_INST_ALPHA_OMASK;
314 fp->inst[1].inst1 = R500_RGB_ADDR0(0) |
315 R500_RGB_ADDR1(0) |
316 R500_RGB_ADDR1_CONST |
317 R500_RGB_ADDR2(0) |
318 R500_RGB_ADDR2_CONST |
319 R500_RGB_SRCP_OP_1_MINUS_2RGB0;
320 fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) |
321 R500_ALPHA_ADDR1(0) |
322 R500_ALPHA_ADDR1_CONST |
323 R500_ALPHA_ADDR2(0) |
324 R500_ALPHA_ADDR2_CONST |
325 R500_ALPHA_SRCP_OP_1_MINUS_2A0;
326 fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 |
327 R500_ALU_RGB_R_SWIZ_A_R |
328 R500_ALU_RGB_G_SWIZ_A_G |
329 R500_ALU_RGB_B_SWIZ_A_B |
330 R500_ALU_RGB_SEL_B_SRC0 |
331 R500_ALU_RGB_R_SWIZ_B_1 |
332 R500_ALU_RGB_B_SWIZ_B_1 |
333 R500_ALU_RGB_G_SWIZ_B_1;
334 fp->inst[1].inst4 = R500_ALPHA_OP_MAD |
335 R500_ALPHA_SWIZ_A_A |
336 R500_ALPHA_SWIZ_B_1;
337 fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD |
338 R500_ALU_RGBA_R_SWIZ_0 |
339 R500_ALU_RGBA_G_SWIZ_0 |
340 R500_ALU_RGBA_B_SWIZ_0 |
341 R500_ALU_RGBA_A_SWIZ_0;
342
343 fp->cs->nrslots = 2;
344 fp->translated = GL_TRUE;
345 }
346
347 static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi) {
348 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
349 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
350 /* output_mask */
351 | (fpi->DstReg.WriteMask << 15);
352 } else {
353 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
354 /* pixel_mask */
355 | (fpi->DstReg.WriteMask << 11);
356 }
357
358 fp->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT;
359 }
360
361 static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_src_register src, GLuint dest) {
362 /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
363 * it is technically more accurate and recommended by ATI/AMD. */
364 GLuint src_reg = make_src(fp, src);
365 fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg);
366 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg);
367 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
368 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(src))
369 | R500_ALU_RGB_SEL_B_SRC0
370 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src))
371 | R500_ALU_RGB_OMOD_DISABLE;
372 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
373 | R500_ALPHA_ADDRD(dest)
374 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src))
375 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src))
376 | R500_ALPHA_OMOD_DISABLE;
377 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
378 | R500_ALU_RGBA_ADDRD(dest)
379 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
380 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
381 }
382
383 static GLboolean parse_program(struct r500_fragment_program *fp)
384 {
385 struct gl_fragment_program *mp = &fp->mesa_program;
386 const struct prog_instruction *inst = mp->Base.Instructions;
387 struct prog_instruction *fpi;
388 GLuint src[3], dest, temp[2];
389 int flags, pixel_mask = 0, output_mask = 0, counter = 0;
390
391 if (!inst || inst[0].Opcode == OPCODE_END) {
392 ERROR("The program is empty!\n");
393 return GL_FALSE;
394 }
395
396 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
397
398 if (fpi->Opcode != OPCODE_KIL) {
399 dest = make_dest(fp, fpi->DstReg);
400
401 pixel_mask = fpi->DstReg.WriteMask << 11;
402 output_mask = fpi->DstReg.WriteMask << 15;
403 }
404
405 switch (fpi->Opcode) {
406 case OPCODE_ABS:
407 emit_alu(fp, counter, fpi);
408 emit_mov(fp, counter, fpi->SrcReg[0], dest);
409 fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS
410 | R500_ALU_RGB_MOD_B_ABS;
411 fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS
412 | R500_ALPHA_MOD_B_ABS;
413 break;
414 case OPCODE_ADD:
415 src[0] = make_src(fp, fpi->SrcReg[0]);
416 src[1] = make_src(fp, fpi->SrcReg[1]);
417 /* Variation on MAD: 1*src0+src1 */
418 emit_alu(fp, counter, fpi);
419 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
420 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(0);
421 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
422 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(0);
423 fp->inst[counter].inst3 = /* 1 */
424 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
425 | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
426 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
427 | R500_ALPHA_ADDRD(dest)
428 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
429 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
430 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
431 | R500_ALU_RGBA_ADDRD(dest)
432 | R500_ALU_RGBA_SEL_C_SRC1
433 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
434 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
435 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]));
436 break;
437 case OPCODE_CMP:
438 /* This inst's selects need to be swapped as follows:
439 * 0 -> C ; 1 -> B ; 2 -> A */
440 src[0] = make_src(fp, fpi->SrcReg[0]);
441 src[1] = make_src(fp, fpi->SrcReg[1]);
442 src[2] = make_src(fp, fpi->SrcReg[2]);
443 emit_alu(fp, counter, fpi);
444 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[2])
445 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[0]);
446 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[2])
447 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[0]);
448 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
449 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[2]))
450 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
451 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
452 | R500_ALPHA_ADDRD(dest)
453 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[2]))
454 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
455 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
456 | R500_ALU_RGBA_ADDRD(dest)
457 | R500_ALU_RGBA_SEL_C_SRC2
458 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
459 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
460 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
461 break;
462 case OPCODE_COS:
463 src[0] = make_src(fp, fpi->SrcReg[0]);
464 emit_alu(fp, counter, fpi);
465 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
466 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
467 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
468 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
469 fp->inst[counter].inst4 = R500_ALPHA_OP_COS
470 | R500_ALPHA_ADDRD(dest)
471 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
472 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
473 | R500_ALU_RGBA_ADDRD(dest);
474 break;
475 case OPCODE_DP3:
476 src[0] = make_src(fp, fpi->SrcReg[0]);
477 src[1] = make_src(fp, fpi->SrcReg[1]);
478 emit_alu(fp, counter, fpi);
479 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
480 | R500_RGB_ADDR1(src[1]);
481 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
482 | R500_ALPHA_ADDR1(src[1]);
483 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
484 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
485 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
486 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
487 | R500_ALPHA_ADDRD(dest)
488 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
489 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
490 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3
491 | R500_ALU_RGBA_ADDRD(dest);
492 break;
493 case OPCODE_DP4:
494 src[0] = make_src(fp, fpi->SrcReg[0]);
495 src[1] = make_src(fp, fpi->SrcReg[1]);
496 /* Based on DP3 */
497 emit_alu(fp, counter, fpi);
498 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
499 | R500_RGB_ADDR1(src[1]);
500 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
501 | R500_ALPHA_ADDR1(src[1]);
502 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
503 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
504 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
505 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
506 | R500_ALPHA_ADDRD(dest)
507 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
508 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
509 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
510 | R500_ALU_RGBA_ADDRD(dest);
511 break;
512 case OPCODE_DPH:
513 src[0] = make_src(fp, fpi->SrcReg[0]);
514 src[1] = make_src(fp, fpi->SrcReg[1]);
515 /* Based on DP3 */
516 emit_alu(fp, counter, fpi);
517 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
518 | R500_RGB_ADDR1(src[1]);
519 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
520 | R500_ALPHA_ADDR1(src[1]);
521 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
522 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
523 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
524 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
525 | R500_ALPHA_ADDRD(dest)
526 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
527 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
528 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
529 | R500_ALU_RGBA_ADDRD(dest);
530 break;
531 case OPCODE_EX2:
532 src[0] = make_src(fp, fpi->SrcReg[0]);
533 emit_alu(fp, counter, fpi);
534 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
535 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
536 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
537 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
538 fp->inst[counter].inst4 = R500_ALPHA_OP_EX2
539 | R500_ALPHA_ADDRD(dest)
540 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
541 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
542 | R500_ALU_RGBA_ADDRD(dest);
543 break;
544 case OPCODE_FRC:
545 src[0] = make_src(fp, fpi->SrcReg[0]);
546 emit_alu(fp, counter, fpi);
547 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
548 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
549 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
550 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
551 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
552 | R500_ALPHA_ADDRD(dest)
553 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
554 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
555 | R500_ALU_RGBA_ADDRD(dest);
556 break;
557 case OPCODE_KIL:
558 emit_tex(fp, fpi, OPCODE_KIL, dest, counter);
559 break;
560 case OPCODE_LG2:
561 src[0] = make_src(fp, fpi->SrcReg[0]);
562 emit_alu(fp, counter, fpi);
563 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
564 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
565 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
566 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
567 fp->inst[counter].inst4 = R500_ALPHA_OP_LN2
568 | R500_ALPHA_ADDRD(dest)
569 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
570 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
571 | R500_ALU_RGBA_ADDRD(dest);
572 break;
573 case OPCODE_MAD:
574 src[0] = make_src(fp, fpi->SrcReg[0]);
575 src[1] = make_src(fp, fpi->SrcReg[1]);
576 src[2] = make_src(fp, fpi->SrcReg[2]);
577 emit_alu(fp, counter, fpi);
578 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
579 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]);
580 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
581 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]);
582 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
583 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
584 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
585 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
586 | R500_ALPHA_ADDRD(dest)
587 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
588 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
589 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
590 | R500_ALU_RGBA_ADDRD(dest)
591 | R500_ALU_RGBA_SEL_C_SRC2
592 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
593 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
594 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
595 break;
596 case OPCODE_MAX:
597 src[0] = make_src(fp, fpi->SrcReg[0]);
598 src[1] = make_src(fp, fpi->SrcReg[1]);
599 emit_alu(fp, counter, fpi);
600 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
601 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
602 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
603 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
604 | R500_ALU_RGB_SEL_B_SRC1
605 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
606 fp->inst[counter].inst4 = R500_ALPHA_OP_MAX
607 | R500_ALPHA_ADDRD(dest)
608 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
609 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
610 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
611 | R500_ALU_RGBA_ADDRD(dest);
612 break;
613 case OPCODE_MIN:
614 src[0] = make_src(fp, fpi->SrcReg[0]);
615 src[1] = make_src(fp, fpi->SrcReg[1]);
616 emit_alu(fp, counter, fpi);
617 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
618 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
619 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
620 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
621 | R500_ALU_RGB_SEL_B_SRC1
622 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
623 fp->inst[counter].inst4 = R500_ALPHA_OP_MIN
624 | R500_ALPHA_ADDRD(dest)
625 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
626 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
627 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN
628 | R500_ALU_RGBA_ADDRD(dest);
629 break;
630 case OPCODE_MOV:
631 emit_alu(fp, counter, fpi);
632 emit_mov(fp, counter, fpi->SrcReg[0], dest);
633 break;
634 case OPCODE_MUL:
635 src[0] = make_src(fp, fpi->SrcReg[0]);
636 src[1] = make_src(fp, fpi->SrcReg[1]);
637 /* Variation on MAD: src0*src1+0 */
638 emit_alu(fp, counter, fpi);
639 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
640 | R500_RGB_ADDR1(src[1]);
641 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
642 | R500_ALPHA_ADDR1(src[1]);
643 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
644 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
645 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
646 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
647 | R500_ALPHA_ADDRD(dest)
648 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
649 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
650 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
651 | R500_ALU_RGBA_ADDRD(dest)
652 // | R500_ALU_RGBA_SEL_C_SRC2
653 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
654 // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
655 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
656 break;
657 case OPCODE_RCP:
658 src[0] = make_src(fp, fpi->SrcReg[0]);
659 emit_alu(fp, counter, fpi);
660 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
661 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
662 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
663 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
664 fp->inst[counter].inst4 = R500_ALPHA_OP_RCP
665 | R500_ALPHA_ADDRD(dest)
666 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
667 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
668 | R500_ALU_RGBA_ADDRD(dest);
669 break;
670 case OPCODE_RSQ:
671 src[0] = make_src(fp, fpi->SrcReg[0]);
672 emit_alu(fp, counter, fpi);
673 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
674 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
675 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
676 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
677 fp->inst[counter].inst4 = R500_ALPHA_OP_RSQ
678 | R500_ALPHA_ADDRD(dest)
679 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
680 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
681 | R500_ALU_RGBA_ADDRD(dest);
682 break;
683 case OPCODE_SCS:
684 /* TODO: Make this elegant! */
685 /* Do a cosine, then a sine, masking out the channels we want to protect. */
686 src[0] = make_src(fp, fpi->SrcReg[0]);
687 /* Cosine only goes in R (x) channel. */
688 fpi->DstReg.WriteMask = 0x1;
689 emit_alu(fp, counter, fpi);
690 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
691 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
692 | R500_INST_TEX_SEM_WAIT | 0x1 << 14;
693 } else {
694 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
695 | R500_INST_TEX_SEM_WAIT | 0x1 << 11;
696 }
697 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
698 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
699 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
700 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
701 fp->inst[counter].inst4 = R500_ALPHA_OP_COS
702 | R500_ALPHA_ADDRD(dest)
703 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
704 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
705 | R500_ALU_RGBA_ADDRD(dest);
706 counter++;
707 /* Sine only goes in G (y) channel. */
708 fpi->DstReg.WriteMask = 0x2;
709 emit_alu(fp, counter, fpi);
710 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
711 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
712 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
713 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
714 fp->inst[counter].inst4 = R500_ALPHA_OP_SIN
715 | R500_ALPHA_ADDRD(dest)
716 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
717 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
718 | R500_ALU_RGBA_ADDRD(dest);
719 counter++;
720 /* Put 0 into B,A (z,w) channels. */
721 fpi->DstReg.WriteMask = 0xC;
722 emit_alu(fp, counter, fpi);
723 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
724 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
725 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
726 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
727 | R500_ALU_RGB_SEL_B_SRC0
728 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
729 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
730 | R500_ALPHA_ADDRD(dest)
731 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO)
732 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
733 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
734 | R500_ALU_RGBA_ADDRD(dest)
735 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
736 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
737 break;
738 case OPCODE_SIN:
739 src[0] = make_src(fp, fpi->SrcReg[0]);
740 emit_alu(fp, counter, fpi);
741 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
742 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
743 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
744 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
745 fp->inst[counter].inst4 = R500_ALPHA_OP_SIN
746 | R500_ALPHA_ADDRD(dest)
747 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
748 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
749 | R500_ALU_RGBA_ADDRD(dest);
750 break;
751 case OPCODE_SUB:
752 src[0] = make_src(fp, fpi->SrcReg[0]);
753 src[1] = make_src(fp, fpi->SrcReg[1]);
754 /* Variation on MAD: 1*src0-src1 */
755 emit_alu(fp, counter, fpi);
756 fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
757 | R500_RGB_ADDR2(src[1]);
758 fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
759 | R500_ALPHA_ADDR2(src[1]);
760 fp->inst[counter].inst3 = /* 1 */
761 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
762 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
763 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
764 | R500_ALPHA_ADDRD(dest)
765 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
766 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
767 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
768 | R500_ALU_RGBA_ADDRD(dest)
769 | R500_ALU_RGBA_SEL_C_SRC2
770 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
771 | R500_ALU_RGBA_MOD_C_NEG
772 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
773 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
774 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
775 break;
776 case OPCODE_SWZ:
777 /* TODO: Negation masks! */
778 emit_alu(fp, counter, fpi);
779 emit_mov(fp, counter, fpi->SrcReg[0], dest);
780 break;
781 case OPCODE_TEX:
782 emit_tex(fp, fpi, OPCODE_TEX, dest, counter);
783 break;
784 case OPCODE_TXB:
785 emit_tex(fp, fpi, OPCODE_TXB, dest, counter);
786 break;
787 case OPCODE_TXP:
788 emit_tex(fp, fpi, OPCODE_TXP, dest, counter);
789 break;
790 default:
791 ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
792 break;
793 }
794
795 /* Finishing touches */
796 if (fpi->SaturateMode == SATURATE_ZERO_ONE) {
797 fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
798 }
799
800 counter++;
801
802 if (fp->error)
803 return GL_FALSE;
804
805 }
806
807 /* Finish him! (If it's an ALU/OUT instruction...) */
808 if ((fp->inst[counter-1].inst0 & 0x3) == 1) {
809 fp->inst[counter-1].inst0 |= R500_INST_LAST;
810 } else {
811 /* We still need to put an output inst, right? */
812 WARN_ONCE("Final FP instruction is not an OUT.\n");
813 #if 0
814 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
815 | R500_INST_TEX_SEM_WAIT | R500_INST_LAST |
816 output_mask;
817 fp->inst[counter].inst1 = R500_RGB_ADDR0(dest);
818 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(dest);
819 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
820 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
821 | R500_ALU_RGB_SEL_B_SRC0
822 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
823 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
824 | R500_ALPHA_ADDRD(0)
825 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SEL_B_SRC0
826 | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_1;
827 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
828 | R500_ALU_RGBA_ADDRD(0)
829 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
830 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
831 counter++;
832 #endif
833 }
834
835 fp->cs->nrslots = counter;
836
837 fp->max_temp_idx++;
838
839 return GL_TRUE;
840 }
841
842 static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp)
843 {
844 struct r300_pfs_compile_state *cs = NULL;
845 struct gl_fragment_program *mp = &fp->mesa_program;
846 struct prog_instruction *fpi;
847 GLuint InputsRead = mp->Base.InputsRead;
848 GLuint temps_used = 0; /* for fp->temps[] */
849 int i, j;
850
851 /* New compile, reset tracking data */
852 fp->optimization =
853 driQueryOptioni(&r300->radeon.optionCache, "fp_optimization");
854 fp->translated = GL_FALSE;
855 fp->error = GL_FALSE;
856 fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile);
857 fp->cur_node = 0;
858 fp->first_node_has_tex = 0;
859 fp->const_nr = 0;
860 /* Size of pixel stack, plus 1. */
861 fp->max_temp_idx = 1;
862 /* Temp register offset. */
863 fp->temp_reg_offset = 0;
864 fp->node[0].alu_end = -1;
865 fp->node[0].tex_end = -1;
866
867 _mesa_memset(cs, 0, sizeof(*fp->cs));
868 for (i = 0; i < PFS_MAX_ALU_INST; i++) {
869 for (j = 0; j < 3; j++) {
870 cs->slot[i].vsrc[j] = SRC_CONST;
871 cs->slot[i].ssrc[j] = SRC_CONST;
872 }
873 }
874
875 /* Work out what temps the Mesa inputs correspond to, this must match
876 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
877 * configures itself based on the fragprog's InputsRead
878 *
879 * NOTE: this depends on get_hw_temp() allocating registers in order,
880 * starting from register 0, so we're just going to do that instead.
881 */
882
883 /* Texcoords come first */
884 for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) {
885 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
886 cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0;
887 cs->inputs[FRAG_ATTRIB_TEX0 + i].reg =
888 fp->temp_reg_offset;
889 fp->temp_reg_offset++;
890 }
891 }
892 InputsRead &= ~FRAG_BITS_TEX_ANY;
893
894 /* fragment position treated as a texcoord */
895 if (InputsRead & FRAG_BIT_WPOS) {
896 cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
897 cs->inputs[FRAG_ATTRIB_WPOS].reg =
898 fp->temp_reg_offset;
899 fp->temp_reg_offset++;
900 }
901 InputsRead &= ~FRAG_BIT_WPOS;
902
903 /* Then primary colour */
904 if (InputsRead & FRAG_BIT_COL0) {
905 cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
906 cs->inputs[FRAG_ATTRIB_COL0].reg =
907 fp->temp_reg_offset;
908 fp->temp_reg_offset++;
909 }
910 InputsRead &= ~FRAG_BIT_COL0;
911
912 /* Secondary color */
913 if (InputsRead & FRAG_BIT_COL1) {
914 cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
915 cs->inputs[FRAG_ATTRIB_COL1].reg =
916 fp->temp_reg_offset;
917 fp->temp_reg_offset++;
918 }
919 InputsRead &= ~FRAG_BIT_COL1;
920
921 /* Anything else */
922 if (InputsRead) {
923 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
924 /* force read from hwreg 0 for now */
925 for (i = 0; i < 32; i++)
926 if (InputsRead & (1 << i))
927 cs->inputs[i].reg = 0;
928 }
929
930 /* Pre-parse the mesa program, grabbing refcounts on input/temp regs.
931 * That way, we can free up the reg when it's no longer needed
932 */
933 if (!mp->Base.Instructions) {
934 ERROR("No instructions found in program, going to go die now.\n");
935 return;
936 }
937
938 #if 0
939 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
940 int idx;
941 for (i = 0; i < 3; i++) {
942 idx = fpi->SrcReg[i].Index;
943 if (fpi->SrcReg[i].File == PROGRAM_INPUT) {
944 cs->inputs[idx].refcount++;
945 if (fp->max_temp_idx < idx)
946 fp->max_temp_idx = idx;
947 }
948 }
949 }
950 #endif
951
952 fp->max_temp_idx = fp->temp_reg_offset + 1;
953
954 cs->temp_in_use = temps_used;
955 }
956
957 static void update_params(struct r500_fragment_program *fp)
958 {
959 struct gl_fragment_program *mp = &fp->mesa_program;
960
961 /* Ask Mesa nicely to fill in ParameterValues for us */
962 if (mp->Base.Parameters)
963 _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters);
964 }
965
966 void r500TranslateFragmentShader(r300ContextPtr r300,
967 struct r500_fragment_program *fp)
968 {
969
970 struct r300_pfs_compile_state *cs = NULL;
971
972 if (!fp->translated) {
973
974 /* I need to see what I'm working with! */
975 fprintf(stderr, "Mesa program:\n");
976 fprintf(stderr, "-------------\n");
977 _mesa_print_program(&fp->mesa_program.Base);
978 fflush(stdout);
979
980 init_program(r300, fp);
981 cs = fp->cs;
982
983 if (parse_program(fp) == GL_FALSE) {
984 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
985 dumb_shader(fp);
986 fp->inst_offset = 0;
987 fp->inst_end = cs->nrslots - 1;
988 return;
989 }
990 fp->inst_offset = 0;
991 fp->inst_end = cs->nrslots - 1;
992
993 fp->translated = GL_TRUE;
994 if (RADEON_DEBUG & DEBUG_PIXEL)
995 dump_program(fp);
996
997 r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM);
998 }
999
1000 update_params(fp);
1001
1002 }
1003
1004 static char *toswiz(int swiz_val) {
1005 switch(swiz_val) {
1006 case 0: return "R";
1007 case 1: return "G";
1008 case 2: return "B";
1009 case 3: return "A";
1010 case 4: return "0";
1011 case 5: return "1/2";
1012 case 6: return "1";
1013 case 7: return "U";
1014 }
1015 }
1016
1017 static char *toop(int op_val)
1018 {
1019 char *str;
1020 switch (op_val) {
1021 case 0: str = "MAD"; break;
1022 case 1: str = "DP3"; break;
1023 case 2: str = "DP4"; break;
1024 case 3: str = "D2A"; break;
1025 case 4: str = "MIN"; break;
1026 case 5: str = "MAX"; break;
1027 case 6: str = "Reserved"; break;
1028 case 7: str = "CND"; break;
1029 case 8: str = "CMP"; break;
1030 case 9: str = "FRC"; break;
1031 case 10: str = "SOP"; break;
1032 case 11: str = "MDH"; break;
1033 case 12: str = "MDV"; break;
1034 }
1035 return str;
1036 }
1037
1038 static char *to_alpha_op(int op_val)
1039 {
1040 char *str;
1041 switch (op_val) {
1042 case 0: str = "MAD"; break;
1043 case 1: str = "DP"; break;
1044 case 2: str = "MIN"; break;
1045 case 3: str = "MAX"; break;
1046 case 4: str = "Reserved"; break;
1047 case 5: str = "CND"; break;
1048 case 6: str = "CMP"; break;
1049 case 7: str = "FRC"; break;
1050 case 8: str = "EX2"; break;
1051 case 9: str = "LN2"; break;
1052 case 10: str = "RCP"; break;
1053 case 11: str = "RSQ"; break;
1054 case 12: str = "SIN"; break;
1055 case 13: str = "COS"; break;
1056 case 14: str = "MDH"; break;
1057 case 15: str = "MDV"; break;
1058 }
1059 return str;
1060 }
1061
1062 static char *to_mask(int val)
1063 {
1064 char *str;
1065 switch(val) {
1066 case 0: str = "NONE"; break;
1067 case 1: str = "R"; break;
1068 case 2: str = "G"; break;
1069 case 3: str = "RG"; break;
1070 case 4: str = "B"; break;
1071 case 5: str = "RB"; break;
1072 case 6: str = "GB"; break;
1073 case 7: str = "RGB"; break;
1074 case 8: str = "A"; break;
1075 case 9: str = "AR"; break;
1076 case 10: str = "AG"; break;
1077 case 11: str = "ARG"; break;
1078 case 12: str = "AB"; break;
1079 case 13: str = "ARB"; break;
1080 case 14: str = "AGB"; break;
1081 case 15: str = "ARGB"; break;
1082 }
1083 return str;
1084 }
1085
1086 static void dump_program(struct r500_fragment_program *fp)
1087 {
1088 int pc = 0;
1089 int n;
1090 uint32_t inst;
1091 uint32_t inst0;
1092 char *str;
1093
1094 for (n = 0; n < fp->inst_end+1; n++) {
1095 inst0 = inst = fp->inst[n].inst0;
1096 fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
1097 switch(inst & 0x3) {
1098 case R500_INST_TYPE_ALU: str = "ALU"; break;
1099 case R500_INST_TYPE_OUT: str = "OUT"; break;
1100 case R500_INST_TYPE_FC: str = "FC"; break;
1101 case R500_INST_TYPE_TEX: str = "TEX"; break;
1102 };
1103 fprintf(stderr,"%s %s %s %s %s ", str,
1104 inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
1105 inst & R500_INST_LAST ? "LAST" : "",
1106 inst & R500_INST_NOP ? "NOP" : "",
1107 inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
1108 fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
1109 to_mask((inst >> 15) & 0xf));
1110
1111 switch(inst0 & 0x3) {
1112 case 0:
1113 case 1:
1114 fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", fp->inst[n].inst1);
1115 inst = fp->inst[n].inst1;
1116
1117 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1118 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1119 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1120 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1121 (inst >> 30));
1122
1123 fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", fp->inst[n].inst2);
1124 inst = fp->inst[n].inst2;
1125 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1126 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1127 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1128 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1129 (inst >> 30));
1130 fprintf(stderr,"\t3 RGB_INST: 0x%08x:", fp->inst[n].inst3);
1131 inst = fp->inst[n].inst3;
1132 fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
1133 (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
1134 (inst >> 11) & 0x3,
1135 (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
1136 (inst >> 24) & 0x3);
1137
1138
1139 fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", fp->inst[n].inst4);
1140 inst = fp->inst[n].inst4;
1141 fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_b_src:%d %s %d\n", to_alpha_op(inst & 0xf),
1142 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1143 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
1144 (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3);
1145
1146 fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", fp->inst[n].inst5);
1147 inst = fp->inst[n].inst5;
1148 fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
1149 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1150 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
1151 (inst >> 23) & 0x3,
1152 (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
1153 break;
1154 case 2:
1155 break;
1156 case 3:
1157 fprintf(stderr,"1: TEX INST 0x%08x\n", fp->inst[n].inst1);
1158 fprintf(stderr,"2: TEX ADDR 0x%08x\n", fp->inst[n].inst2);
1159 fprintf(stderr,"2: TEX ADDR DXDY 0x%08x\n", fp->inst[n].inst3);
1160 break;
1161 }
1162 fprintf(stderr,"\n");
1163 }
1164
1165
1166 }