01f6010544a60667a66cc2cf0dc9e89170be6ae3
[mesa.git] / src / mesa / drivers / dri / r300 / r500_fragprog.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 *
31 * \author Ben Skeggs <darktama@iinet.net.au>
32 *
33 * \author Jerome Glisse <j.glisse@gmail.com>
34 *
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
36 *
37 * \todo Depth write, WPOS/FOGC inputs
38 *
39 * \todo FogOption
40 *
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
42 * specific cases.
43 */
44
45 #include "glheader.h"
46 #include "macros.h"
47 #include "enums.h"
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
51
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
54 #include "r300_reg.h"
55 #include "r300_state.h"
56
57 /*
58 * Useful macros and values
59 */
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
64 } while(0)
65
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
67
68 #define R500_US_NUM_TEMP_REGS 128
69 #define R500_US_NUM_CONST_REGS 256
70
71 /* "Register" flags */
72 #define REG_CONSTANT (1 << 8)
73 #define REG_SRC_REL (1 << 9)
74 #define REG_DEST_REL (1 << 7)
75
76 /* Swizzle tools */
77 #define R500_SWIZZLE_ZERO 4
78 #define R500_SWIZZLE_HALF 5
79 #define R500_SWIZZLE_ONE 6
80 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
81 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
82 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
83 /* Swizzles for inst2 */
84 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
85 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
86 /* Swizzles for inst3 */
87 #define MAKE_SWIZ_RGB_A(x) (x << 2)
88 #define MAKE_SWIZ_RGB_B(x) (x << 15)
89 /* Swizzles for inst4 */
90 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
91 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
92 /* Swizzle for inst5 */
93 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
94 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
95
96 static inline GLuint make_rgb_swizzle(struct prog_src_register src) {
97 GLuint swiz = 0x0;
98 GLuint temp;
99 /* This could be optimized, but it should be plenty fast already. */
100 int i;
101 for (i = 0; i < 3; i++) {
102 temp = (src.Swizzle >> i*3) & 0x7;
103 /* Fix SWIZZLE_ONE */
104 if (temp == 5) temp++;
105 swiz += temp << i*3;
106 }
107 return swiz;
108 }
109
110 static inline GLuint make_alpha_swizzle(struct prog_src_register src) {
111 GLuint swiz = (src.Swizzle >> 12) & 0x7;
112 if (swiz == 5) swiz++;
113 return swiz;
114 }
115
116 static inline GLuint make_strq_swizzle(struct prog_src_register src) {
117 GLuint swiz = 0x0;
118 GLuint temp = src.Swizzle;
119 int i;
120 for (i = 0; i < 4; i++) {
121 swiz += (temp & 0x3) << i*2;
122 temp >>= 3;
123 }
124 return swiz;
125 }
126
127 static int get_temp(struct r500_fragment_program *fp, int slot) {
128
129 COMPILE_STATE;
130
131 int r = slot;
132
133 while (cs->inputs[r].refcount != 0) {
134 /* Crap, taken. */
135 r++;
136 }
137
138 fp->temp_reg_offset = r - slot;
139
140 if (r >= R500_US_NUM_TEMP_REGS) {
141 ERROR("Out of hardware temps!\n");
142 return 0;
143 }
144
145 if (r > fp->max_temp_idx)
146 fp->max_temp_idx = r;
147
148 return r;
149 }
150
151 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
152 static GLuint emit_const4fv(struct r500_fragment_program *fp,
153 const GLfloat * cp)
154 {
155 GLuint reg = 0x0;
156 int index;
157
158 for (index = 0; index < fp->const_nr; ++index) {
159 if (fp->constant[index] == cp)
160 break;
161 }
162
163 if (index >= fp->const_nr) {
164 if (index >= R500_US_NUM_CONST_REGS) {
165 ERROR("Out of hw constants!\n");
166 return reg;
167 }
168
169 fp->const_nr++;
170 fp->constant[index] = cp;
171 }
172
173 reg = index | REG_CONSTANT;
174 return reg;
175 }
176
177 static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) {
178 COMPILE_STATE;
179 GLuint reg;
180 switch (src.File) {
181 case PROGRAM_TEMPORARY:
182 reg = src.Index + fp->temp_reg_offset;
183 break;
184 case PROGRAM_INPUT:
185 reg = cs->inputs[src.Index].reg;
186 break;
187 case PROGRAM_STATE_VAR:
188 case PROGRAM_NAMED_PARAM:
189 case PROGRAM_CONSTANT:
190 reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters->
191 ParameterValues[src.Index]);
192 break;
193 default:
194 ERROR("Can't handle src.File %x\n", src.File);
195 reg = 0x0;
196 break;
197 }
198 return reg;
199 }
200
201 static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_register dest) {
202 GLuint reg;
203 switch (dest.File) {
204 case PROGRAM_TEMPORARY:
205 reg = dest.Index + fp->temp_reg_offset;
206 break;
207 case PROGRAM_OUTPUT:
208 /* Eventually we may need to handle multiple
209 * rendering targets... */
210 reg = dest.Index;
211 break;
212 default:
213 ERROR("Can't handle dest.File %x\n", dest.File);
214 reg = 0x0;
215 break;
216 }
217 return reg;
218 }
219
220 static void emit_tex(struct r500_fragment_program *fp,
221 struct prog_instruction *fpi, int opcode, int dest, int counter)
222 {
223 int hwsrc, hwdest;
224 GLuint mask;
225
226 mask = fpi->DstReg.WriteMask << 11;
227 hwsrc = make_src(fp, fpi->SrcReg[0]);
228
229 fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask
230 | R500_INST_TEX_SEM_WAIT;
231
232 fp->inst[counter].inst1 = R500_TEX_ID(fpi->TexSrcUnit)
233 | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
234
235 if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX)
236 fp->inst[counter].inst1 |= R500_TEX_UNSCALED;
237
238 switch (opcode) {
239 case OPCODE_TEX:
240 fp->inst[counter].inst1 |= R500_TEX_INST_LD;
241 break;
242 case OPCODE_TXB:
243 fp->inst[counter].inst1 |= R500_TEX_INST_LODBIAS;
244 break;
245 case OPCODE_TXP:
246 fp->inst[counter].inst1 |= R500_TEX_INST_PROJ;
247 break;
248 default:
249 ERROR("emit_tex can't handle opcode %x\n", opcode);
250 }
251
252 fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc)
253 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
254 | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G
255 | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A
256 | R500_TEX_DST_ADDR(dest)
257 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
258 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
259
260
261
262 fp->inst[counter].inst3 = 0x0;
263 fp->inst[counter].inst4 = 0x0;
264 fp->inst[counter].inst5 = 0x0;
265 }
266
267 static void dumb_shader(struct r500_fragment_program *fp)
268 {
269 fp->inst[0].inst0 = R500_INST_TYPE_TEX
270 | R500_INST_TEX_SEM_WAIT
271 | R500_INST_RGB_WMASK_R
272 | R500_INST_RGB_WMASK_G
273 | R500_INST_RGB_WMASK_B
274 | R500_INST_ALPHA_WMASK
275 | R500_INST_RGB_CLAMP
276 | R500_INST_ALPHA_CLAMP;
277 fp->inst[0].inst1 = R500_TEX_ID(0)
278 | R500_TEX_INST_LD
279 | R500_TEX_SEM_ACQUIRE
280 | R500_TEX_IGNORE_UNCOVERED;
281 fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0)
282 | R500_TEX_SRC_S_SWIZ_R
283 | R500_TEX_SRC_T_SWIZ_G
284 | R500_TEX_DST_ADDR(0)
285 | R500_TEX_DST_R_SWIZ_R
286 | R500_TEX_DST_G_SWIZ_G
287 | R500_TEX_DST_B_SWIZ_B
288 | R500_TEX_DST_A_SWIZ_A;
289 fp->inst[0].inst3 = R500_DX_ADDR(0)
290 | R500_DX_S_SWIZ_R
291 | R500_DX_T_SWIZ_R
292 | R500_DX_R_SWIZ_R
293 | R500_DX_Q_SWIZ_R
294 | R500_DY_ADDR(0)
295 | R500_DY_S_SWIZ_R
296 | R500_DY_T_SWIZ_R
297 | R500_DY_R_SWIZ_R
298 | R500_DY_Q_SWIZ_R;
299 fp->inst[0].inst4 = 0x0;
300 fp->inst[0].inst5 = 0x0;
301
302 fp->inst[1].inst0 = R500_INST_TYPE_OUT |
303 R500_INST_TEX_SEM_WAIT |
304 R500_INST_LAST |
305 R500_INST_RGB_OMASK_R |
306 R500_INST_RGB_OMASK_G |
307 R500_INST_RGB_OMASK_B |
308 R500_INST_ALPHA_OMASK;
309 fp->inst[1].inst1 = R500_RGB_ADDR0(0) |
310 R500_RGB_ADDR1(0) |
311 R500_RGB_ADDR1_CONST |
312 R500_RGB_ADDR2(0) |
313 R500_RGB_ADDR2_CONST |
314 R500_RGB_SRCP_OP_1_MINUS_2RGB0;
315 fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) |
316 R500_ALPHA_ADDR1(0) |
317 R500_ALPHA_ADDR1_CONST |
318 R500_ALPHA_ADDR2(0) |
319 R500_ALPHA_ADDR2_CONST |
320 R500_ALPHA_SRCP_OP_1_MINUS_2A0;
321 fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 |
322 R500_ALU_RGB_R_SWIZ_A_R |
323 R500_ALU_RGB_G_SWIZ_A_G |
324 R500_ALU_RGB_B_SWIZ_A_B |
325 R500_ALU_RGB_SEL_B_SRC0 |
326 R500_ALU_RGB_R_SWIZ_B_1 |
327 R500_ALU_RGB_B_SWIZ_B_1 |
328 R500_ALU_RGB_G_SWIZ_B_1;
329 fp->inst[1].inst4 = R500_ALPHA_OP_MAD |
330 R500_ALPHA_SWIZ_A_A |
331 R500_ALPHA_SWIZ_B_1;
332 fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD |
333 R500_ALU_RGBA_R_SWIZ_0 |
334 R500_ALU_RGBA_G_SWIZ_0 |
335 R500_ALU_RGBA_B_SWIZ_0 |
336 R500_ALU_RGBA_A_SWIZ_0;
337
338 fp->cs->nrslots = 2;
339 fp->translated = GL_TRUE;
340 }
341
342 /* static void emit_alu(struct r500_fragment_program *fp) {
343 * } */
344
345 static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_src_register src, GLuint dest) {
346 /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
347 * it is technically more accurate and recommended by ATI/AMD. */
348 GLuint src_reg = make_src(fp, src);
349 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT;
350 fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg);
351 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg);
352 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
353 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(src))
354 | R500_ALU_RGB_SEL_B_SRC0
355 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src))
356 | R500_ALU_RGB_OMOD_DISABLE;
357 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
358 | R500_ALPHA_ADDRD(dest)
359 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src))
360 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src))
361 | R500_ALPHA_OMOD_DISABLE;
362 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
363 | R500_ALU_RGBA_ADDRD(dest)
364 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
365 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
366 }
367
368 static GLboolean parse_program(struct r500_fragment_program *fp)
369 {
370 struct gl_fragment_program *mp = &fp->mesa_program;
371 const struct prog_instruction *inst = mp->Base.Instructions;
372 struct prog_instruction *fpi;
373 GLuint src[3], dest, temp[2];
374 int flags, pixel_mask = 0, output_mask = 0, counter = 0, temp_pixel_mask = 0;
375
376 if (!inst || inst[0].Opcode == OPCODE_END) {
377 ERROR("The program is empty!\n");
378 return GL_FALSE;
379 }
380
381 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
382
383 if (fpi->Opcode != OPCODE_KIL) {
384 dest = make_dest(fp, fpi->DstReg);
385
386 pixel_mask = fpi->DstReg.WriteMask << 11;
387 output_mask = fpi->DstReg.WriteMask << 14;
388 }
389
390 switch (fpi->Opcode) {
391 case OPCODE_ABS:
392 emit_mov(fp, counter, fpi->SrcReg[0], dest);
393 fp->inst[counter].inst0 |= pixel_mask;
394 fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS
395 | R500_ALU_RGB_MOD_B_ABS;
396 fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS
397 | R500_ALPHA_MOD_B_ABS;
398 break;
399 case OPCODE_ADD:
400 src[0] = make_src(fp, fpi->SrcReg[0]);
401 src[1] = make_src(fp, fpi->SrcReg[1]);
402 /* Variation on MAD: 1*src0+src1 */
403 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
404 | pixel_mask;
405 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
406 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(0);
407 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
408 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(0);
409 fp->inst[counter].inst3 = /* 1 */
410 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
411 | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
412 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
413 | R500_ALPHA_ADDRD(dest)
414 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
415 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
416 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
417 | R500_ALU_RGBA_ADDRD(dest)
418 | R500_ALU_RGBA_SEL_C_SRC1
419 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
420 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
421 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]));
422 break;
423 case OPCODE_CMP:
424 src[0] = make_src(fp, fpi->SrcReg[0]);
425 src[1] = make_src(fp, fpi->SrcReg[1]);
426 src[2] = make_src(fp, fpi->SrcReg[2]);
427 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
428 | pixel_mask;
429 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
430 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]);
431 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
432 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]);
433 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
434 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
435 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
436 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
437 | R500_ALPHA_ADDRD(dest)
438 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
439 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
440 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
441 | R500_ALU_RGBA_ADDRD(dest)
442 | R500_ALU_RGBA_SEL_C_SRC2
443 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
444 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
445 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
446 break;
447 case OPCODE_COS:
448 src[0] = make_src(fp, fpi->SrcReg[0]);
449 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
450 | R500_INST_TEX_SEM_WAIT | pixel_mask;
451 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
452 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
453 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
454 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
455 fp->inst[counter].inst4 = R500_ALPHA_OP_COS
456 | R500_ALPHA_ADDRD(dest)
457 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
458 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
459 | R500_ALU_RGBA_ADDRD(dest);
460 break;
461 case OPCODE_DP3:
462 src[0] = make_src(fp, fpi->SrcReg[0]);
463 src[1] = make_src(fp, fpi->SrcReg[1]);
464 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
465 | R500_INST_TEX_SEM_WAIT | pixel_mask;
466 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
467 | R500_RGB_ADDR1(src[1]);
468 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
469 | R500_ALPHA_ADDR1(src[1]);
470 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
471 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
472 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
473 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
474 | R500_ALPHA_ADDRD(dest)
475 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
476 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
477 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3
478 | R500_ALU_RGBA_ADDRD(dest);
479 break;
480 case OPCODE_DP4:
481 src[0] = make_src(fp, fpi->SrcReg[0]);
482 src[1] = make_src(fp, fpi->SrcReg[1]);
483 /* Based on DP3 */
484 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
485 | R500_INST_TEX_SEM_WAIT | pixel_mask;
486 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
487 | R500_RGB_ADDR1(src[1]);
488 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
489 | R500_ALPHA_ADDR1(src[1]);
490 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
491 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
492 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
493 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
494 | R500_ALPHA_ADDRD(dest)
495 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
496 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
497 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
498 | R500_ALU_RGBA_ADDRD(dest);
499 break;
500 case OPCODE_EX2:
501 src[0] = make_src(fp, fpi->SrcReg[0]);
502 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
503 | R500_INST_TEX_SEM_WAIT | pixel_mask;
504 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
505 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
506 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
507 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
508 fp->inst[counter].inst4 = R500_ALPHA_OP_EX2
509 | R500_ALPHA_ADDRD(dest)
510 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
511 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
512 | R500_ALU_RGBA_ADDRD(dest);
513 break;
514 case OPCODE_FRC:
515 src[0] = make_src(fp, fpi->SrcReg[0]);
516 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
517 | R500_INST_TEX_SEM_WAIT | pixel_mask;
518 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
519 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
520 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
521 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
522 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
523 | R500_ALPHA_ADDRD(dest)
524 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
525 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
526 | R500_ALU_RGBA_ADDRD(dest);
527 case OPCODE_LG2:
528 src[0] = make_src(fp, fpi->SrcReg[0]);
529 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
530 | R500_INST_TEX_SEM_WAIT | pixel_mask;
531 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
532 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
533 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
534 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
535 fp->inst[counter].inst4 = R500_ALPHA_OP_LN2
536 | R500_ALPHA_ADDRD(dest)
537 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
538 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
539 | R500_ALU_RGBA_ADDRD(dest);
540 break;
541 case OPCODE_MAD:
542 src[0] = make_src(fp, fpi->SrcReg[0]);
543 src[1] = make_src(fp, fpi->SrcReg[1]);
544 src[2] = make_src(fp, fpi->SrcReg[2]);
545 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
546 | pixel_mask;
547 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
548 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]);
549 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
550 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]);
551 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
552 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
553 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
554 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
555 | R500_ALPHA_ADDRD(dest)
556 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
557 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
558 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
559 | R500_ALU_RGBA_ADDRD(dest)
560 | R500_ALU_RGBA_SEL_C_SRC2
561 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
562 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
563 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
564 break;
565 case OPCODE_MAX:
566 src[0] = make_src(fp, fpi->SrcReg[0]);
567 src[1] = make_src(fp, fpi->SrcReg[1]);
568 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | pixel_mask;
569 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
570 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
571 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
572 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
573 | R500_ALU_RGB_SEL_B_SRC1
574 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
575 fp->inst[counter].inst4 = R500_ALPHA_OP_MAX
576 | R500_ALPHA_ADDRD(dest)
577 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
578 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
579 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
580 | R500_ALU_RGBA_ADDRD(dest);
581 break;
582 case OPCODE_MIN:
583 src[0] = make_src(fp, fpi->SrcReg[0]);
584 src[1] = make_src(fp, fpi->SrcReg[1]);
585 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | pixel_mask;
586 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
587 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
588 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
589 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
590 | R500_ALU_RGB_SEL_B_SRC1
591 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
592 fp->inst[counter].inst4 = R500_ALPHA_OP_MIN
593 | R500_ALPHA_ADDRD(dest)
594 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
595 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
596 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN
597 | R500_ALU_RGBA_ADDRD(dest);
598 break;
599 case OPCODE_MOV:
600 emit_mov(fp, counter, fpi->SrcReg[0], dest);
601 fp->inst[counter].inst0 |= pixel_mask;
602 break;
603 case OPCODE_MUL:
604 src[0] = make_src(fp, fpi->SrcReg[0]);
605 src[1] = make_src(fp, fpi->SrcReg[1]);
606 /* Variation on MAD: src0*src1+0 */
607 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
608 | R500_INST_TEX_SEM_WAIT | pixel_mask;
609 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
610 | R500_RGB_ADDR1(src[1]);
611 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
612 | R500_ALPHA_ADDR1(src[1]);
613 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
614 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
615 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
616 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
617 | R500_ALPHA_ADDRD(dest)
618 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
619 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
620 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
621 | R500_ALU_RGBA_ADDRD(dest)
622 // | R500_ALU_RGBA_SEL_C_SRC2
623 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
624 // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
625 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
626 break;
627 case OPCODE_RCP:
628 src[0] = make_src(fp, fpi->SrcReg[0]);
629 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
630 | R500_INST_TEX_SEM_WAIT | pixel_mask;
631 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
632 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
633 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
634 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
635 fp->inst[counter].inst4 = R500_ALPHA_OP_RCP
636 | R500_ALPHA_ADDRD(dest)
637 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
638 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
639 | R500_ALU_RGBA_ADDRD(dest);
640 break;
641 case OPCODE_RSQ:
642 src[0] = make_src(fp, fpi->SrcReg[0]);
643 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
644 | R500_INST_TEX_SEM_WAIT | pixel_mask;
645 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
646 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
647 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
648 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
649 fp->inst[counter].inst4 = R500_ALPHA_OP_RSQ
650 | R500_ALPHA_ADDRD(dest)
651 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
652 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
653 | R500_ALU_RGBA_ADDRD(dest);
654 break;
655 case OPCODE_SCS:
656 /* TODO: Make this elegant! */
657 /* Do a cosine, then a sine, masking out the channels we want to protect. */
658 src[0] = make_src(fp, fpi->SrcReg[0]);
659 /* Cosine only goes in R (x) channel. */
660 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
661 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
662 | R500_INST_TEX_SEM_WAIT | 0x1 << 14;
663 } else {
664 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
665 | R500_INST_TEX_SEM_WAIT | 0x1 << 11;
666 }
667 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
668 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
669 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
670 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
671 fp->inst[counter].inst4 = R500_ALPHA_OP_COS
672 | R500_ALPHA_ADDRD(dest)
673 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
674 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
675 | R500_ALU_RGBA_ADDRD(dest);
676 counter++;
677 /* Sine only goes in G (y) channel. */
678 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
679 fp->inst[counter].inst0 = R500_INST_TYPE_OUT | 0x2 << 14;
680 } else {
681 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | 0x2 << 11;
682 }
683 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
684 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
685 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
686 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
687 fp->inst[counter].inst4 = R500_ALPHA_OP_SIN
688 | R500_ALPHA_ADDRD(dest)
689 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
690 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
691 | R500_ALU_RGBA_ADDRD(dest);
692 counter++;
693 /* Put 0 into B,A (z,w) channels. */
694 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
695 fp->inst[counter].inst0 = R500_INST_TYPE_OUT | 0xC << 14;
696 } else {
697 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | 0xC << 11;
698 }
699 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
700 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
701 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
702 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
703 | R500_ALU_RGB_SEL_B_SRC0
704 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
705 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
706 | R500_ALPHA_ADDRD(dest)
707 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO)
708 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
709 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
710 | R500_ALU_RGBA_ADDRD(dest)
711 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
712 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
713 break;
714 case OPCODE_SIN:
715 src[0] = make_src(fp, fpi->SrcReg[0]);
716 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
717 | R500_INST_TEX_SEM_WAIT | pixel_mask;
718 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
719 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
720 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
721 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
722 fp->inst[counter].inst4 = R500_ALPHA_OP_SIN
723 | R500_ALPHA_ADDRD(dest)
724 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
725 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
726 | R500_ALU_RGBA_ADDRD(dest);
727 case OPCODE_SUB:
728 src[0] = make_src(fp, fpi->SrcReg[0]);
729 src[1] = make_src(fp, fpi->SrcReg[1]);
730 /* Variation on MAD: 1*src0-src1 */
731 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
732 | pixel_mask;
733 fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
734 | R500_RGB_ADDR2(src[1]);
735 fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
736 | R500_ALPHA_ADDR2(src[1]);
737 fp->inst[counter].inst3 = /* 1 */
738 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
739 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
740 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
741 | R500_ALPHA_ADDRD(dest)
742 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
743 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
744 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
745 | R500_ALU_RGBA_ADDRD(dest)
746 | R500_ALU_RGBA_SEL_C_SRC2
747 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
748 | R500_ALU_RGBA_MOD_C_NEG
749 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
750 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
751 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
752 break;
753 case OPCODE_SWZ:
754 /* TODO: Negation masks! */
755 emit_mov(fp, counter, fpi->SrcReg[0], dest);
756 fp->inst[counter].inst0 |= pixel_mask;
757 break;
758 case OPCODE_TEX:
759 emit_tex(fp, fpi, OPCODE_TEX, dest, counter);
760 break;
761 case OPCODE_TXB:
762 emit_tex(fp, fpi, OPCODE_TXB, dest, counter);
763 break;
764 case OPCODE_TXP:
765 emit_tex(fp, fpi, OPCODE_TXP, dest, counter);
766 break;
767 default:
768 ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
769 break;
770 }
771
772 /* Finishing touches */
773 if (fpi->SaturateMode == SATURATE_ZERO_ONE) {
774 fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
775 }
776 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
777 fp->inst[counter].inst0 |= R500_INST_TYPE_OUT | output_mask;
778 }
779
780 counter++;
781
782 if (fp->error)
783 return GL_FALSE;
784
785 }
786
787 /* Finish him! (If it's an ALU/OUT instruction...) */
788 if ((fp->inst[counter-1].inst0 & 0x3) <= 1) {
789 fp->inst[counter-1].inst0 |= R500_INST_TYPE_OUT
790 | R500_INST_TEX_SEM_WAIT | R500_INST_LAST;
791 } else {
792 /* We still need to put an output inst, right? */
793 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
794 | R500_INST_TEX_SEM_WAIT | R500_INST_LAST |
795 output_mask;
796 fp->inst[counter].inst1 = R500_RGB_ADDR0(dest);
797 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(dest);
798 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
799 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
800 | R500_ALU_RGB_SEL_B_SRC0
801 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
802 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
803 | R500_ALPHA_ADDRD(0)
804 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SEL_B_SRC0
805 | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_1;
806 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
807 | R500_ALU_RGBA_ADDRD(0)
808 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
809 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
810 counter++;
811 }
812
813 fp->cs->nrslots = counter;
814
815 fp->max_temp_idx++;
816
817 return GL_TRUE;
818 }
819
820 static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp)
821 {
822 struct r300_pfs_compile_state *cs = NULL;
823 struct gl_fragment_program *mp = &fp->mesa_program;
824 struct prog_instruction *fpi;
825 GLuint InputsRead = mp->Base.InputsRead;
826 GLuint temps_used = 0; /* for fp->temps[] */
827 int i, j;
828
829 /* New compile, reset tracking data */
830 fp->optimization =
831 driQueryOptioni(&r300->radeon.optionCache, "fp_optimization");
832 fp->translated = GL_FALSE;
833 fp->error = GL_FALSE;
834 fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile);
835 fp->cur_node = 0;
836 fp->first_node_has_tex = 0;
837 fp->const_nr = 0;
838 /* Size of pixel stack, plus 1. */
839 fp->max_temp_idx = 1;
840 /* Temp register offset. */
841 fp->temp_reg_offset = 0;
842 fp->node[0].alu_end = -1;
843 fp->node[0].tex_end = -1;
844
845 _mesa_memset(cs, 0, sizeof(*fp->cs));
846 for (i = 0; i < PFS_MAX_ALU_INST; i++) {
847 for (j = 0; j < 3; j++) {
848 cs->slot[i].vsrc[j] = SRC_CONST;
849 cs->slot[i].ssrc[j] = SRC_CONST;
850 }
851 }
852
853 /* Work out what temps the Mesa inputs correspond to, this must match
854 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
855 * configures itself based on the fragprog's InputsRead
856 *
857 * NOTE: this depends on get_hw_temp() allocating registers in order,
858 * starting from register 0, so we're just going to do that instead.
859 */
860
861 /* Texcoords come first */
862 for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) {
863 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
864 cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0;
865 cs->inputs[FRAG_ATTRIB_TEX0 + i].reg =
866 fp->temp_reg_offset;
867 fp->temp_reg_offset++;
868 }
869 }
870 InputsRead &= ~FRAG_BITS_TEX_ANY;
871
872 /* fragment position treated as a texcoord */
873 if (InputsRead & FRAG_BIT_WPOS) {
874 cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
875 cs->inputs[FRAG_ATTRIB_WPOS].reg =
876 fp->temp_reg_offset;
877 fp->temp_reg_offset++;
878 }
879 InputsRead &= ~FRAG_BIT_WPOS;
880
881 /* Then primary colour */
882 if (InputsRead & FRAG_BIT_COL0) {
883 cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
884 cs->inputs[FRAG_ATTRIB_COL0].reg =
885 fp->temp_reg_offset;
886 fp->temp_reg_offset++;
887 }
888 InputsRead &= ~FRAG_BIT_COL0;
889
890 /* Secondary color */
891 if (InputsRead & FRAG_BIT_COL1) {
892 cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
893 cs->inputs[FRAG_ATTRIB_COL1].reg =
894 fp->temp_reg_offset;
895 fp->temp_reg_offset++;
896 }
897 InputsRead &= ~FRAG_BIT_COL1;
898
899 /* Anything else */
900 if (InputsRead) {
901 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
902 /* force read from hwreg 0 for now */
903 for (i = 0; i < 32; i++)
904 if (InputsRead & (1 << i))
905 cs->inputs[i].reg = 0;
906 }
907
908 /* Pre-parse the mesa program, grabbing refcounts on input/temp regs.
909 * That way, we can free up the reg when it's no longer needed
910 */
911 if (!mp->Base.Instructions) {
912 ERROR("No instructions found in program, going to go die now.\n");
913 return;
914 }
915
916 #if 0
917 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
918 int idx;
919 for (i = 0; i < 3; i++) {
920 idx = fpi->SrcReg[i].Index;
921 if (fpi->SrcReg[i].File == PROGRAM_INPUT) {
922 cs->inputs[idx].refcount++;
923 if (fp->max_temp_idx < idx)
924 fp->max_temp_idx = idx;
925 }
926 }
927 }
928 #endif
929
930 fp->max_temp_idx = fp->temp_reg_offset + 1;
931
932 cs->temp_in_use = temps_used;
933 }
934
935 static void update_params(struct r500_fragment_program *fp)
936 {
937 struct gl_fragment_program *mp = &fp->mesa_program;
938
939 /* Ask Mesa nicely to fill in ParameterValues for us */
940 if (mp->Base.Parameters)
941 _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters);
942 }
943
944 void r500TranslateFragmentShader(r300ContextPtr r300,
945 struct r500_fragment_program *fp)
946 {
947
948 struct r300_pfs_compile_state *cs = NULL;
949
950 if (!fp->translated) {
951
952 /* I need to see what I'm working with! */
953 fprintf(stderr, "Mesa program:\n");
954 fprintf(stderr, "-------------\n");
955 _mesa_print_program(&fp->mesa_program.Base);
956 fflush(stdout);
957
958 init_program(r300, fp);
959 cs = fp->cs;
960
961 if (parse_program(fp) == GL_FALSE) {
962 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
963 dumb_shader(fp);
964 fp->inst_offset = 0;
965 fp->inst_end = cs->nrslots - 1;
966 return;
967 }
968 fp->inst_offset = 0;
969 fp->inst_end = cs->nrslots - 1;
970
971 fp->translated = GL_TRUE;
972 r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM);
973 }
974
975 update_params(fp);
976 }