2 * Copyright (C) 2005 Ben Skeggs.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 * \author Ben Skeggs <darktama@iinet.net.au>
33 * \author Jerome Glisse <j.glisse@gmail.com>
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
37 * \todo Depth write, WPOS/FOGC inputs
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
55 #include "r300_state.h"
58 * Useful macros and values
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
68 #define R500_US_NUM_TEMP_REGS 128
69 #define R500_US_NUM_CONST_REGS 256
71 /* "Register" flags */
72 #define REG_CONSTANT (1 << 8)
73 #define REG_SRC_REL (1 << 9)
74 #define REG_DEST_REL (1 << 7)
77 #define R500_SWIZZLE_ZERO 4
78 #define R500_SWIZZLE_HALF 5
79 #define R500_SWIZZLE_ONE 6
80 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
81 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
82 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
83 /* Swizzles for inst2 */
84 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
85 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
86 /* Swizzles for inst3 */
87 #define MAKE_SWIZ_RGB_A(x) (x << 2)
88 #define MAKE_SWIZ_RGB_B(x) (x << 15)
89 /* Swizzles for inst4 */
90 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
91 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
92 /* Swizzle for inst5 */
93 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
94 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
96 static inline GLuint
make_rgb_swizzle(struct prog_src_register src
) {
99 /* This could be optimized, but it should be plenty fast already. */
101 for (i
= 0; i
< 3; i
++) {
102 temp
= (src
.Swizzle
>> i
*3) & 0x7;
103 /* Fix SWIZZLE_ONE */
104 if (temp
== 5) temp
++;
110 static inline GLuint
make_alpha_swizzle(struct prog_src_register src
) {
111 GLuint swiz
= (src
.Swizzle
>> 12) & 0x7;
112 if (swiz
== 5) swiz
++;
116 static inline GLuint
make_strq_swizzle(struct prog_src_register src
) {
118 GLuint temp
= src
.Swizzle
;
120 for (i
= 0; i
< 4; i
++) {
121 swiz
+= (temp
& 0x3) << i
*2;
127 static int get_temp(struct r500_fragment_program
*fp
, int slot
) {
133 while (cs
->inputs
[r
].refcount
!= 0) {
138 fp
->temp_reg_offset
= r
- slot
;
140 if (r
>= R500_US_NUM_TEMP_REGS
) {
141 ERROR("Out of hardware temps!\n");
145 if (r
> fp
->max_temp_idx
)
146 fp
->max_temp_idx
= r
;
151 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
152 static GLuint
emit_const4fv(struct r500_fragment_program
*fp
,
158 for (index
= 0; index
< fp
->const_nr
; ++index
) {
159 if (fp
->constant
[index
] == cp
)
163 if (index
>= fp
->const_nr
) {
164 if (index
>= R500_US_NUM_CONST_REGS
) {
165 ERROR("Out of hw constants!\n");
170 fp
->constant
[index
] = cp
;
173 reg
= index
| REG_CONSTANT
;
177 static GLuint
make_src(struct r500_fragment_program
*fp
, struct prog_src_register src
) {
181 case PROGRAM_TEMPORARY
:
182 reg
= src
.Index
+ fp
->temp_reg_offset
;
185 reg
= cs
->inputs
[src
.Index
].reg
;
187 case PROGRAM_STATE_VAR
:
188 case PROGRAM_NAMED_PARAM
:
189 case PROGRAM_CONSTANT
:
190 reg
= emit_const4fv(fp
, fp
->mesa_program
.Base
.Parameters
->
191 ParameterValues
[src
.Index
]);
194 ERROR("Can't handle src.File %x\n", src
.File
);
201 static GLuint
make_dest(struct r500_fragment_program
*fp
, struct prog_dst_register dest
) {
204 case PROGRAM_TEMPORARY
:
205 reg
= dest
.Index
+ fp
->temp_reg_offset
;
208 /* Eventually we may need to handle multiple
209 * rendering targets... */
213 ERROR("Can't handle dest.File %x\n", dest
.File
);
220 static void emit_tex(struct r500_fragment_program
*fp
,
221 struct prog_instruction
*fpi
, int opcode
, int dest
, int counter
)
226 mask
= fpi
->DstReg
.WriteMask
<< 11;
227 hwsrc
= make_src(fp
, fpi
->SrcReg
[0]);
229 fp
->inst
[counter
].inst0
= R500_INST_TYPE_TEX
| mask
230 | R500_INST_TEX_SEM_WAIT
;
232 fp
->inst
[counter
].inst1
= R500_TEX_ID(fpi
->TexSrcUnit
)
233 | R500_TEX_SEM_ACQUIRE
| R500_TEX_IGNORE_UNCOVERED
;
235 if (fpi
->TexSrcTarget
== TEXTURE_RECT_INDEX
)
236 fp
->inst
[counter
].inst1
|= R500_TEX_UNSCALED
;
240 fp
->inst
[counter
].inst1
|= R500_TEX_INST_TEXKILL
;
243 fp
->inst
[counter
].inst1
|= R500_TEX_INST_LD
;
246 fp
->inst
[counter
].inst1
|= R500_TEX_INST_LODBIAS
;
249 fp
->inst
[counter
].inst1
|= R500_TEX_INST_PROJ
;
252 ERROR("emit_tex can't handle opcode %x\n", opcode
);
255 fp
->inst
[counter
].inst2
= R500_TEX_SRC_ADDR(hwsrc
)
256 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
257 | R500_TEX_SRC_S_SWIZ_R
| R500_TEX_SRC_T_SWIZ_G
258 | R500_TEX_SRC_R_SWIZ_B
| R500_TEX_SRC_Q_SWIZ_A
259 | R500_TEX_DST_ADDR(dest
)
260 | R500_TEX_DST_R_SWIZ_R
| R500_TEX_DST_G_SWIZ_G
261 | R500_TEX_DST_B_SWIZ_B
| R500_TEX_DST_A_SWIZ_A
;
265 fp
->inst
[counter
].inst3
= 0x0;
266 fp
->inst
[counter
].inst4
= 0x0;
267 fp
->inst
[counter
].inst5
= 0x0;
270 static void dumb_shader(struct r500_fragment_program
*fp
)
272 fp
->inst
[0].inst0
= R500_INST_TYPE_TEX
273 | R500_INST_TEX_SEM_WAIT
274 | R500_INST_RGB_WMASK_R
275 | R500_INST_RGB_WMASK_G
276 | R500_INST_RGB_WMASK_B
277 | R500_INST_ALPHA_WMASK
278 | R500_INST_RGB_CLAMP
279 | R500_INST_ALPHA_CLAMP
;
280 fp
->inst
[0].inst1
= R500_TEX_ID(0)
282 | R500_TEX_SEM_ACQUIRE
283 | R500_TEX_IGNORE_UNCOVERED
;
284 fp
->inst
[0].inst2
= R500_TEX_SRC_ADDR(0)
285 | R500_TEX_SRC_S_SWIZ_R
286 | R500_TEX_SRC_T_SWIZ_G
287 | R500_TEX_DST_ADDR(0)
288 | R500_TEX_DST_R_SWIZ_R
289 | R500_TEX_DST_G_SWIZ_G
290 | R500_TEX_DST_B_SWIZ_B
291 | R500_TEX_DST_A_SWIZ_A
;
292 fp
->inst
[0].inst3
= R500_DX_ADDR(0)
302 fp
->inst
[0].inst4
= 0x0;
303 fp
->inst
[0].inst5
= 0x0;
305 fp
->inst
[1].inst0
= R500_INST_TYPE_OUT
|
306 R500_INST_TEX_SEM_WAIT
|
308 R500_INST_RGB_OMASK_R
|
309 R500_INST_RGB_OMASK_G
|
310 R500_INST_RGB_OMASK_B
|
311 R500_INST_ALPHA_OMASK
;
312 fp
->inst
[1].inst1
= R500_RGB_ADDR0(0) |
314 R500_RGB_ADDR1_CONST
|
316 R500_RGB_ADDR2_CONST
|
317 R500_RGB_SRCP_OP_1_MINUS_2RGB0
;
318 fp
->inst
[1].inst2
= R500_ALPHA_ADDR0(0) |
319 R500_ALPHA_ADDR1(0) |
320 R500_ALPHA_ADDR1_CONST
|
321 R500_ALPHA_ADDR2(0) |
322 R500_ALPHA_ADDR2_CONST
|
323 R500_ALPHA_SRCP_OP_1_MINUS_2A0
;
324 fp
->inst
[1].inst3
= R500_ALU_RGB_SEL_A_SRC0
|
325 R500_ALU_RGB_R_SWIZ_A_R
|
326 R500_ALU_RGB_G_SWIZ_A_G
|
327 R500_ALU_RGB_B_SWIZ_A_B
|
328 R500_ALU_RGB_SEL_B_SRC0
|
329 R500_ALU_RGB_R_SWIZ_B_1
|
330 R500_ALU_RGB_B_SWIZ_B_1
|
331 R500_ALU_RGB_G_SWIZ_B_1
;
332 fp
->inst
[1].inst4
= R500_ALPHA_OP_MAD
|
333 R500_ALPHA_SWIZ_A_A
|
335 fp
->inst
[1].inst5
= R500_ALU_RGBA_OP_MAD
|
336 R500_ALU_RGBA_R_SWIZ_0
|
337 R500_ALU_RGBA_G_SWIZ_0
|
338 R500_ALU_RGBA_B_SWIZ_0
|
339 R500_ALU_RGBA_A_SWIZ_0
;
342 fp
->translated
= GL_TRUE
;
345 static void emit_alu(struct r500_fragment_program
*fp
, int counter
, struct prog_instruction
*fpi
) {
346 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
347 fp
->inst
[counter
].inst0
= R500_INST_TYPE_OUT
349 | (fpi
->DstReg
.WriteMask
<< 14);
351 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
353 | (fpi
->DstReg
.WriteMask
<< 11);
356 fp
->inst
[counter
].inst0
|= R500_INST_TEX_SEM_WAIT
;
359 static void emit_mov(struct r500_fragment_program
*fp
, int counter
, struct prog_src_register src
, GLuint dest
) {
360 /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
361 * it is technically more accurate and recommended by ATI/AMD. */
362 GLuint src_reg
= make_src(fp
, src
);
363 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src_reg
);
364 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src_reg
);
365 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
366 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(src
))
367 | R500_ALU_RGB_SEL_B_SRC0
368 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src
))
369 | R500_ALU_RGB_OMOD_DISABLE
;
370 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
371 | R500_ALPHA_ADDRD(dest
)
372 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src
))
373 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src
))
374 | R500_ALPHA_OMOD_DISABLE
;
375 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
376 | R500_ALU_RGBA_ADDRD(dest
)
377 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
378 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
381 static GLboolean
parse_program(struct r500_fragment_program
*fp
)
383 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
384 const struct prog_instruction
*inst
= mp
->Base
.Instructions
;
385 struct prog_instruction
*fpi
;
386 GLuint src
[3], dest
, temp
[2];
387 int flags
, pixel_mask
= 0, output_mask
= 0, counter
= 0;
389 if (!inst
|| inst
[0].Opcode
== OPCODE_END
) {
390 ERROR("The program is empty!\n");
394 for (fpi
= mp
->Base
.Instructions
; fpi
->Opcode
!= OPCODE_END
; fpi
++) {
396 if (fpi
->Opcode
!= OPCODE_KIL
) {
397 dest
= make_dest(fp
, fpi
->DstReg
);
399 pixel_mask
= fpi
->DstReg
.WriteMask
<< 11;
400 output_mask
= fpi
->DstReg
.WriteMask
<< 14;
403 switch (fpi
->Opcode
) {
405 emit_alu(fp
, counter
, fpi
);
406 emit_mov(fp
, counter
, fpi
->SrcReg
[0], dest
);
407 fp
->inst
[counter
].inst3
|= R500_ALU_RGB_MOD_A_ABS
408 | R500_ALU_RGB_MOD_B_ABS
;
409 fp
->inst
[counter
].inst4
|= R500_ALPHA_MOD_A_ABS
410 | R500_ALPHA_MOD_B_ABS
;
413 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
414 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
415 /* Variation on MAD: 1*src0+src1 */
416 emit_alu(fp
, counter
, fpi
);
417 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
418 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(0);
419 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
420 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(0);
421 fp
->inst
[counter
].inst3
= /* 1 */
422 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
423 | R500_ALU_RGB_SEL_B_SRC0
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
424 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
425 | R500_ALPHA_ADDRD(dest
)
426 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
427 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
428 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
429 | R500_ALU_RGBA_ADDRD(dest
)
430 | R500_ALU_RGBA_SEL_C_SRC1
431 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[1]))
432 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
433 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[1]));
436 /* This inst's selects need to be swapped as follows:
437 * 0 -> C ; 1 -> B ; 2 -> A */
438 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
439 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
440 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
441 emit_alu(fp
, counter
, fpi
);
442 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[2])
443 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(src
[0]);
444 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[2])
445 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(src
[0]);
446 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
447 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[2]))
448 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
449 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
450 | R500_ALPHA_ADDRD(dest
)
451 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[2]))
452 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
453 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
454 | R500_ALU_RGBA_ADDRD(dest
)
455 | R500_ALU_RGBA_SEL_C_SRC2
456 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[0]))
457 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
458 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[0]));
461 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
462 emit_alu(fp
, counter
, fpi
);
463 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
464 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
465 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
466 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
467 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_COS
468 | R500_ALPHA_ADDRD(dest
)
469 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
470 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
471 | R500_ALU_RGBA_ADDRD(dest
);
474 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
475 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
476 emit_alu(fp
, counter
, fpi
);
477 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
478 | R500_RGB_ADDR1(src
[1]);
479 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
480 | R500_ALPHA_ADDR1(src
[1]);
481 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
482 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
483 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
484 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_DP
485 | R500_ALPHA_ADDRD(dest
)
486 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
487 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
488 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP3
489 | R500_ALU_RGBA_ADDRD(dest
);
492 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
493 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
495 emit_alu(fp
, counter
, fpi
);
496 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
497 | R500_RGB_ADDR1(src
[1]);
498 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
499 | R500_ALPHA_ADDR1(src
[1]);
500 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
501 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
502 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
503 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_DP
504 | R500_ALPHA_ADDRD(dest
)
505 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
506 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
507 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP4
508 | R500_ALU_RGBA_ADDRD(dest
);
511 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
512 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
514 emit_alu(fp
, counter
, fpi
);
515 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
516 | R500_RGB_ADDR1(src
[1]);
517 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
518 | R500_ALPHA_ADDR1(src
[1]);
519 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
520 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
521 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
522 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_DP
523 | R500_ALPHA_ADDRD(dest
)
524 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
525 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
526 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP4
527 | R500_ALU_RGBA_ADDRD(dest
);
530 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
531 emit_alu(fp
, counter
, fpi
);
532 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
533 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
534 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
535 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
536 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_EX2
537 | R500_ALPHA_ADDRD(dest
)
538 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
539 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
540 | R500_ALU_RGBA_ADDRD(dest
);
543 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
544 emit_alu(fp
, counter
, fpi
);
545 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
546 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
547 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
548 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
549 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_FRC
550 | R500_ALPHA_ADDRD(dest
)
551 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
552 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_FRC
553 | R500_ALU_RGBA_ADDRD(dest
);
556 emit_tex(fp
, fpi
, OPCODE_KIL
, dest
, counter
);
559 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
560 emit_alu(fp
, counter
, fpi
);
561 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
562 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
563 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
564 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
565 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_LN2
566 | R500_ALPHA_ADDRD(dest
)
567 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
568 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
569 | R500_ALU_RGBA_ADDRD(dest
);
572 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
573 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
574 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
575 emit_alu(fp
, counter
, fpi
);
576 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
577 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(src
[2]);
578 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
579 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(src
[2]);
580 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
581 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
582 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
583 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
584 | R500_ALPHA_ADDRD(dest
)
585 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
586 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
587 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
588 | R500_ALU_RGBA_ADDRD(dest
)
589 | R500_ALU_RGBA_SEL_C_SRC2
590 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[2]))
591 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
592 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[2]));
595 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
596 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
597 emit_alu(fp
, counter
, fpi
);
598 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]) | R500_RGB_ADDR1(src
[1]);
599 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]) | R500_ALPHA_ADDR1(src
[1]);
600 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
601 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
602 | R500_ALU_RGB_SEL_B_SRC1
603 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
604 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAX
605 | R500_ALPHA_ADDRD(dest
)
606 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
607 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
608 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAX
609 | R500_ALU_RGBA_ADDRD(dest
);
612 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
613 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
614 emit_alu(fp
, counter
, fpi
);
615 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]) | R500_RGB_ADDR1(src
[1]);
616 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]) | R500_ALPHA_ADDR1(src
[1]);
617 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
618 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
619 | R500_ALU_RGB_SEL_B_SRC1
620 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
621 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MIN
622 | R500_ALPHA_ADDRD(dest
)
623 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
624 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
625 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MIN
626 | R500_ALU_RGBA_ADDRD(dest
);
629 emit_alu(fp
, counter
, fpi
);
630 emit_mov(fp
, counter
, fpi
->SrcReg
[0], dest
);
633 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
634 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
635 /* Variation on MAD: src0*src1+0 */
636 emit_alu(fp
, counter
, fpi
);
637 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
638 | R500_RGB_ADDR1(src
[1]);
639 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
640 | R500_ALPHA_ADDR1(src
[1]);
641 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
642 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
643 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
644 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
645 | R500_ALPHA_ADDRD(dest
)
646 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
647 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
648 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
649 | R500_ALU_RGBA_ADDRD(dest
)
650 // | R500_ALU_RGBA_SEL_C_SRC2
651 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
652 // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
653 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
656 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
657 emit_alu(fp
, counter
, fpi
);
658 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
659 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
660 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
661 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
662 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_RCP
663 | R500_ALPHA_ADDRD(dest
)
664 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
665 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
666 | R500_ALU_RGBA_ADDRD(dest
);
669 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
670 emit_alu(fp
, counter
, fpi
);
671 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
672 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
673 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
674 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
675 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_RSQ
676 | R500_ALPHA_ADDRD(dest
)
677 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
678 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
679 | R500_ALU_RGBA_ADDRD(dest
);
682 /* TODO: Make this elegant! */
683 /* Do a cosine, then a sine, masking out the channels we want to protect. */
684 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
685 /* Cosine only goes in R (x) channel. */
686 fpi
->DstReg
.WriteMask
= 0x1;
687 emit_alu(fp
, counter
, fpi
);
688 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
689 fp
->inst
[counter
].inst0
= R500_INST_TYPE_OUT
690 | R500_INST_TEX_SEM_WAIT
| 0x1 << 14;
692 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
693 | R500_INST_TEX_SEM_WAIT
| 0x1 << 11;
695 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
696 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
697 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
698 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
699 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_COS
700 | R500_ALPHA_ADDRD(dest
)
701 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
702 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
703 | R500_ALU_RGBA_ADDRD(dest
);
705 /* Sine only goes in G (y) channel. */
706 fpi
->DstReg
.WriteMask
= 0x2;
707 emit_alu(fp
, counter
, fpi
);
708 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
709 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
710 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
711 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
712 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_SIN
713 | R500_ALPHA_ADDRD(dest
)
714 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
715 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
716 | R500_ALU_RGBA_ADDRD(dest
);
718 /* Put 0 into B,A (z,w) channels. */
719 fpi
->DstReg
.WriteMask
= 0xC;
720 emit_alu(fp
, counter
, fpi
);
721 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
722 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
723 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
724 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO
)
725 | R500_ALU_RGB_SEL_B_SRC0
726 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO
);
727 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
728 | R500_ALPHA_ADDRD(dest
)
729 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO
)
730 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO
);
731 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
732 | R500_ALU_RGBA_ADDRD(dest
)
733 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
734 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
737 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
738 emit_alu(fp
, counter
, fpi
);
739 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
740 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
741 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
742 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
743 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_SIN
744 | R500_ALPHA_ADDRD(dest
)
745 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
746 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
747 | R500_ALU_RGBA_ADDRD(dest
);
750 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
751 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
752 /* Variation on MAD: 1*src0-src1 */
753 emit_alu(fp
, counter
, fpi
);
754 fp
->inst
[counter
].inst1
= R500_RGB_ADDR1(src
[0])
755 | R500_RGB_ADDR2(src
[1]);
756 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR1(src
[0])
757 | R500_ALPHA_ADDR2(src
[1]);
758 fp
->inst
[counter
].inst3
= /* 1 */
759 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
760 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
761 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
762 | R500_ALPHA_ADDRD(dest
)
763 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
764 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
765 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
766 | R500_ALU_RGBA_ADDRD(dest
)
767 | R500_ALU_RGBA_SEL_C_SRC2
768 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[1]))
769 | R500_ALU_RGBA_MOD_C_NEG
770 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
771 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[1]))
772 | R500_ALU_RGBA_ALPHA_MOD_C_NEG
;
775 /* TODO: Negation masks! */
776 emit_alu(fp
, counter
, fpi
);
777 emit_mov(fp
, counter
, fpi
->SrcReg
[0], dest
);
780 emit_tex(fp
, fpi
, OPCODE_TEX
, dest
, counter
);
783 emit_tex(fp
, fpi
, OPCODE_TXB
, dest
, counter
);
786 emit_tex(fp
, fpi
, OPCODE_TXP
, dest
, counter
);
789 ERROR("unknown fpi->Opcode %d\n", fpi
->Opcode
);
793 /* Finishing touches */
794 if (fpi
->SaturateMode
== SATURATE_ZERO_ONE
) {
795 fp
->inst
[counter
].inst0
|= R500_INST_RGB_CLAMP
| R500_INST_ALPHA_CLAMP
;
805 /* Finish him! (If it's an ALU/OUT instruction...) */
806 if ((fp
->inst
[counter
-1].inst0
& 0x3) == 1) {
807 fp
->inst
[counter
-1].inst0
|= R500_INST_LAST
;
809 /* We still need to put an output inst, right? */
810 WARN_ONCE("Final FP instruction is not an OUT.\n");
812 fp
->inst
[counter
].inst0
= R500_INST_TYPE_OUT
813 | R500_INST_TEX_SEM_WAIT
| R500_INST_LAST
|
815 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(dest
);
816 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(dest
);
817 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
818 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
)
819 | R500_ALU_RGB_SEL_B_SRC0
820 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE
);
821 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
822 | R500_ALPHA_ADDRD(0)
823 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SEL_B_SRC0
824 | R500_ALPHA_SWIZ_A_A
| R500_ALPHA_SWIZ_B_1
;
825 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
826 | R500_ALU_RGBA_ADDRD(0)
827 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
828 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
833 fp
->cs
->nrslots
= counter
;
840 static void init_program(r300ContextPtr r300
, struct r500_fragment_program
*fp
)
842 struct r300_pfs_compile_state
*cs
= NULL
;
843 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
844 struct prog_instruction
*fpi
;
845 GLuint InputsRead
= mp
->Base
.InputsRead
;
846 GLuint temps_used
= 0; /* for fp->temps[] */
849 /* New compile, reset tracking data */
851 driQueryOptioni(&r300
->radeon
.optionCache
, "fp_optimization");
852 fp
->translated
= GL_FALSE
;
853 fp
->error
= GL_FALSE
;
854 fp
->cs
= cs
= &(R300_CONTEXT(fp
->ctx
)->state
.pfs_compile
);
856 fp
->first_node_has_tex
= 0;
858 /* Size of pixel stack, plus 1. */
859 fp
->max_temp_idx
= 1;
860 /* Temp register offset. */
861 fp
->temp_reg_offset
= 0;
862 fp
->node
[0].alu_end
= -1;
863 fp
->node
[0].tex_end
= -1;
865 _mesa_memset(cs
, 0, sizeof(*fp
->cs
));
866 for (i
= 0; i
< PFS_MAX_ALU_INST
; i
++) {
867 for (j
= 0; j
< 3; j
++) {
868 cs
->slot
[i
].vsrc
[j
] = SRC_CONST
;
869 cs
->slot
[i
].ssrc
[j
] = SRC_CONST
;
873 /* Work out what temps the Mesa inputs correspond to, this must match
874 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
875 * configures itself based on the fragprog's InputsRead
877 * NOTE: this depends on get_hw_temp() allocating registers in order,
878 * starting from register 0, so we're just going to do that instead.
881 /* Texcoords come first */
882 for (i
= 0; i
< fp
->ctx
->Const
.MaxTextureUnits
; i
++) {
883 if (InputsRead
& (FRAG_BIT_TEX0
<< i
)) {
884 cs
->inputs
[FRAG_ATTRIB_TEX0
+ i
].refcount
= 0;
885 cs
->inputs
[FRAG_ATTRIB_TEX0
+ i
].reg
=
887 fp
->temp_reg_offset
++;
890 InputsRead
&= ~FRAG_BITS_TEX_ANY
;
892 /* fragment position treated as a texcoord */
893 if (InputsRead
& FRAG_BIT_WPOS
) {
894 cs
->inputs
[FRAG_ATTRIB_WPOS
].refcount
= 0;
895 cs
->inputs
[FRAG_ATTRIB_WPOS
].reg
=
897 fp
->temp_reg_offset
++;
899 InputsRead
&= ~FRAG_BIT_WPOS
;
901 /* Then primary colour */
902 if (InputsRead
& FRAG_BIT_COL0
) {
903 cs
->inputs
[FRAG_ATTRIB_COL0
].refcount
= 0;
904 cs
->inputs
[FRAG_ATTRIB_COL0
].reg
=
906 fp
->temp_reg_offset
++;
908 InputsRead
&= ~FRAG_BIT_COL0
;
910 /* Secondary color */
911 if (InputsRead
& FRAG_BIT_COL1
) {
912 cs
->inputs
[FRAG_ATTRIB_COL1
].refcount
= 0;
913 cs
->inputs
[FRAG_ATTRIB_COL1
].reg
=
915 fp
->temp_reg_offset
++;
917 InputsRead
&= ~FRAG_BIT_COL1
;
921 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead
);
922 /* force read from hwreg 0 for now */
923 for (i
= 0; i
< 32; i
++)
924 if (InputsRead
& (1 << i
))
925 cs
->inputs
[i
].reg
= 0;
928 /* Pre-parse the mesa program, grabbing refcounts on input/temp regs.
929 * That way, we can free up the reg when it's no longer needed
931 if (!mp
->Base
.Instructions
) {
932 ERROR("No instructions found in program, going to go die now.\n");
937 for (fpi
= mp
->Base
.Instructions
; fpi
->Opcode
!= OPCODE_END
; fpi
++) {
939 for (i
= 0; i
< 3; i
++) {
940 idx
= fpi
->SrcReg
[i
].Index
;
941 if (fpi
->SrcReg
[i
].File
== PROGRAM_INPUT
) {
942 cs
->inputs
[idx
].refcount
++;
943 if (fp
->max_temp_idx
< idx
)
944 fp
->max_temp_idx
= idx
;
950 fp
->max_temp_idx
= fp
->temp_reg_offset
+ 1;
952 cs
->temp_in_use
= temps_used
;
955 static void update_params(struct r500_fragment_program
*fp
)
957 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
959 /* Ask Mesa nicely to fill in ParameterValues for us */
960 if (mp
->Base
.Parameters
)
961 _mesa_load_state_parameters(fp
->ctx
, mp
->Base
.Parameters
);
964 void r500TranslateFragmentShader(r300ContextPtr r300
,
965 struct r500_fragment_program
*fp
)
968 struct r300_pfs_compile_state
*cs
= NULL
;
970 if (!fp
->translated
) {
972 /* I need to see what I'm working with! */
973 fprintf(stderr
, "Mesa program:\n");
974 fprintf(stderr
, "-------------\n");
975 _mesa_print_program(&fp
->mesa_program
.Base
);
978 init_program(r300
, fp
);
981 if (parse_program(fp
) == GL_FALSE
) {
982 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
985 fp
->inst_end
= cs
->nrslots
- 1;
989 fp
->inst_end
= cs
->nrslots
- 1;
991 fp
->translated
= GL_TRUE
;
992 r300UpdateStateParameters(fp
->ctx
, _NEW_PROGRAM
);