2 * Copyright (C) 2005 Ben Skeggs.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 * \author Ben Skeggs <darktama@iinet.net.au>
33 * \author Jerome Glisse <j.glisse@gmail.com>
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
37 * \todo Depth write, WPOS/FOGC inputs
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
55 #include "r300_state.h"
58 * Useful macros and values
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
68 /* "Register" flags */
69 #define REG_CONSTANT (1 << 8)
70 #define REG_SRC_REL (1 << 9)
71 #define REG_DEST_REL (1 << 7)
74 #define R500_SWIZZLE_ZERO 4
75 #define R500_SWIZZLE_HALF 5
76 #define R500_SWIZZLE_ONE 6
77 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
78 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
79 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
80 /* Swizzles for inst2 */
81 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
82 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
83 /* Swizzles for inst3 */
84 #define MAKE_SWIZ_RGB_A(x) (x << 2)
85 #define MAKE_SWIZ_RGB_B(x) (x << 15)
86 /* Swizzles for inst4 */
87 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
88 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
89 /* Swizzle for inst5 */
90 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
91 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
93 static inline GLuint
make_rgb_swizzle(struct prog_src_register src
) {
96 /* This could be optimized, but it should be plenty fast already. */
98 for (i
= 0; i
< 3; i
++) {
99 temp
= (src
.Swizzle
>> i
*3) & 0x7;
100 /* Fix SWIZZLE_ONE */
101 if (temp
== 5) temp
++;
107 static inline GLuint
make_alpha_swizzle(struct prog_src_register src
) {
108 GLuint swiz
= (src
.Swizzle
>> 12) & 0x7;
109 if (swiz
== 5) swiz
++;
113 static inline GLuint
make_strq_swizzle(struct prog_src_register src
) {
115 GLuint temp
= src
.Swizzle
;
117 for (i
= 0; i
< 4; i
++) {
118 swiz
+= (temp
& 0x3) << i
*2;
124 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
125 static GLuint
emit_const4fv(struct r500_fragment_program
*fp
,
131 for (index
= 0; index
< fp
->const_nr
; ++index
) {
132 if (fp
->constant
[index
] == cp
)
136 if (index
>= fp
->const_nr
) {
137 /* TODO: This should be r5xx nums, not r300 */
138 if (index
>= PFS_NUM_CONST_REGS
) {
139 ERROR("Out of hw constants!\n");
144 fp
->constant
[index
] = cp
;
147 reg
= index
| REG_CONSTANT
;
151 static GLuint
make_src(struct r500_fragment_program
*fp
, struct prog_src_register src
) {
154 case PROGRAM_TEMPORARY
:
155 // reg = (src.Index << 0x1) | 0x1;
157 if (src
.Index
> fp
->max_temp_idx
)
158 fp
->max_temp_idx
= src
.Index
;
161 /* Ugly hack needed to work around Mesa;
162 * fragments don't get loaded right otherwise! */
165 case PROGRAM_STATE_VAR
:
166 case PROGRAM_NAMED_PARAM
:
167 case PROGRAM_CONSTANT
:
168 reg
= emit_const4fv(fp
, fp
->mesa_program
.Base
.Parameters
->
169 ParameterValues
[src
.Index
]);
172 ERROR("Can't handle src.File %x\n", src
.File
);
179 static GLuint
make_dest(struct r500_fragment_program
*fp
, struct prog_dst_register dest
) {
182 case PROGRAM_TEMPORARY
:
183 // reg = (dest.Index << 0x1) | 0x1;
185 if (dest
.Index
> fp
->max_temp_idx
)
186 fp
->max_temp_idx
= dest
.Index
;
189 /* Eventually we may need to handle multiple
190 * rendering targets... */
194 ERROR("Can't handle dest.File %x\n", dest
.File
);
201 static void emit_tex(struct r500_fragment_program
*fp
,
202 struct prog_instruction
*fpi
, int opcode
, int dest
, int counter
)
207 mask
= fpi
->DstReg
.WriteMask
<< 11;
208 hwsrc
= make_src(fp
, fpi
->SrcReg
[0]);
210 fp
->inst
[counter
].inst0
= R500_INST_TYPE_TEX
| mask
211 | R500_INST_TEX_SEM_WAIT
;
213 fp
->inst
[counter
].inst1
= fpi
->TexSrcUnit
214 | R500_TEX_SEM_ACQUIRE
| R500_TEX_IGNORE_UNCOVERED
;
217 fp
->inst
[counter
].inst1
|= R500_TEX_INST_LD
;
220 fp
->inst
[counter
].inst1
|= R500_TEX_INST_PROJ
;
223 fp
->inst
[counter
].inst2
= R500_TEX_SRC_ADDR(hwsrc
)
224 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
225 | R500_TEX_SRC_S_SWIZ_R
| R500_TEX_SRC_T_SWIZ_G
226 | R500_TEX_SRC_R_SWIZ_B
| R500_TEX_SRC_Q_SWIZ_A
227 | R500_TEX_DST_ADDR(dest
)
228 | R500_TEX_DST_R_SWIZ_R
| R500_TEX_DST_G_SWIZ_G
229 | R500_TEX_DST_B_SWIZ_B
| R500_TEX_DST_A_SWIZ_A
;
233 fp
->inst
[counter
].inst3
= 0x0;
234 fp
->inst
[counter
].inst4
= 0x0;
235 fp
->inst
[counter
].inst5
= 0x0;
238 static void dumb_shader(struct r500_fragment_program
*fp
)
240 fp
->inst
[0].inst0
= R500_INST_TYPE_TEX
241 | R500_INST_TEX_SEM_WAIT
242 | R500_INST_RGB_WMASK_R
243 | R500_INST_RGB_WMASK_G
244 | R500_INST_RGB_WMASK_B
245 | R500_INST_ALPHA_WMASK
246 | R500_INST_RGB_CLAMP
247 | R500_INST_ALPHA_CLAMP
;
248 fp
->inst
[0].inst1
= R500_TEX_ID(0)
250 | R500_TEX_SEM_ACQUIRE
251 | R500_TEX_IGNORE_UNCOVERED
;
252 fp
->inst
[0].inst2
= R500_TEX_SRC_ADDR(0)
253 | R500_TEX_SRC_S_SWIZ_R
254 | R500_TEX_SRC_T_SWIZ_G
255 | R500_TEX_DST_ADDR(0)
256 | R500_TEX_DST_R_SWIZ_R
257 | R500_TEX_DST_G_SWIZ_G
258 | R500_TEX_DST_B_SWIZ_B
259 | R500_TEX_DST_A_SWIZ_A
;
260 fp
->inst
[0].inst3
= R500_DX_ADDR(0)
270 fp
->inst
[0].inst4
= 0x0;
271 fp
->inst
[0].inst5
= 0x0;
273 fp
->inst
[1].inst0
= R500_INST_TYPE_OUT
|
274 R500_INST_TEX_SEM_WAIT
|
276 R500_INST_RGB_OMASK_R
|
277 R500_INST_RGB_OMASK_G
|
278 R500_INST_RGB_OMASK_B
|
279 R500_INST_ALPHA_OMASK
;
280 fp
->inst
[1].inst1
= R500_RGB_ADDR0(0) |
282 R500_RGB_ADDR1_CONST
|
284 R500_RGB_ADDR2_CONST
|
285 R500_RGB_SRCP_OP_1_MINUS_2RGB0
;
286 fp
->inst
[1].inst2
= R500_ALPHA_ADDR0(0) |
287 R500_ALPHA_ADDR1(0) |
288 R500_ALPHA_ADDR1_CONST
|
289 R500_ALPHA_ADDR2(0) |
290 R500_ALPHA_ADDR2_CONST
|
291 R500_ALPHA_SRCP_OP_1_MINUS_2A0
;
292 fp
->inst
[1].inst3
= R500_ALU_RGB_SEL_A_SRC0
|
293 R500_ALU_RGB_R_SWIZ_A_R
|
294 R500_ALU_RGB_G_SWIZ_A_G
|
295 R500_ALU_RGB_B_SWIZ_A_B
|
296 R500_ALU_RGB_SEL_B_SRC0
|
297 R500_ALU_RGB_R_SWIZ_B_1
|
298 R500_ALU_RGB_B_SWIZ_B_1
|
299 R500_ALU_RGB_G_SWIZ_B_1
;
300 fp
->inst
[1].inst4
= R500_ALPHA_OP_MAD
|
301 R500_ALPHA_SWIZ_A_A
|
303 fp
->inst
[1].inst5
= R500_ALU_RGBA_OP_MAD
|
304 R500_ALU_RGBA_R_SWIZ_0
|
305 R500_ALU_RGBA_G_SWIZ_0
|
306 R500_ALU_RGBA_B_SWIZ_0
|
307 R500_ALU_RGBA_A_SWIZ_0
;
310 fp
->translated
= GL_TRUE
;
313 static void emit_alu(struct r500_fragment_program
*fp
) {
316 static GLboolean
parse_program(struct r500_fragment_program
*fp
)
318 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
319 const struct prog_instruction
*inst
= mp
->Base
.Instructions
;
320 struct prog_instruction
*fpi
;
321 GLuint src
[3], dest
, temp
[2];
322 int flags
, mask
, counter
= 0;
324 if (!inst
|| inst
[0].Opcode
== OPCODE_END
) {
325 ERROR("The program is empty!\n");
329 for (fpi
= mp
->Base
.Instructions
; fpi
->Opcode
!= OPCODE_END
; fpi
++) {
331 if (fpi
->Opcode
!= OPCODE_KIL
) {
332 dest
= make_dest(fp
, fpi
->DstReg
);
333 mask
= fpi
->DstReg
.WriteMask
<< 11;
336 switch (fpi
->Opcode
) {
338 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
339 /* Variation on MOV */
340 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
342 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
343 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
344 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
345 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
346 | R500_ALU_RGB_MOD_A_ABS
| R500_ALU_RGB_SEL_B_SRC0
347 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
348 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAX
349 | R500_ALPHA_ADDRD(dest
)
350 | R500_ALPHA_SEL_A_SRC0
351 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0])) | R500_ALPHA_MOD_A_ABS
352 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
353 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAX
354 | R500_ALU_RGBA_ADDRD(dest
);
357 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
358 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
359 /* Variation on MAD: 1*src0+src1 */
360 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
362 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
363 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(0);
364 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
365 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(0);
366 fp
->inst
[counter
].inst3
= /* 1 */
367 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
368 | R500_ALU_RGB_SEL_B_SRC0
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
369 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
370 | R500_ALPHA_ADDRD(dest
)
371 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
372 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
373 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
374 | R500_ALU_RGBA_ADDRD(dest
)
375 | R500_ALU_RGBA_SEL_C_SRC1
376 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[1]))
377 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
378 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[1]));
381 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
382 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
383 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
384 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
386 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
387 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(src
[2]);
388 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
389 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(src
[2]);
390 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
391 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
392 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
393 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_DP
394 | R500_ALPHA_ADDRD(dest
)
395 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
396 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
397 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP3
398 | R500_ALU_RGBA_ADDRD(dest
)
399 | R500_ALU_RGBA_SEL_C_SRC2
400 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[2]))
401 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
402 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[2]));
405 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
406 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
407 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
409 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
411 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
412 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(src
[2]);
413 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
414 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(src
[2]);
415 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
416 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
417 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
418 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_DP
419 | R500_ALPHA_ADDRD(dest
)
420 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
421 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
422 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP4
423 | R500_ALU_RGBA_ADDRD(dest
)
424 | R500_ALU_RGBA_SEL_C_SRC2
425 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[2]))
426 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
427 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[2]));
430 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
431 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
432 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
433 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
435 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
436 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(src
[2]);
437 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
438 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(src
[2]);
439 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
440 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
441 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
442 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
443 | R500_ALPHA_ADDRD(dest
)
444 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
445 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
446 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
447 | R500_ALU_RGBA_ADDRD(dest
)
448 | R500_ALU_RGBA_SEL_C_SRC2
449 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[2]))
450 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
451 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[2]));
454 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
455 src
[1] = make_src(fp
, fpi
->SrcReg
[0]);
456 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| mask
;
457 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]) | R500_RGB_ADDR1(src
[1]);
458 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]) | R500_ALPHA_ADDR1(src
[1]);
459 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
460 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
461 | R500_ALU_RGB_SEL_B_SRC1
462 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
463 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAX
464 | R500_ALPHA_ADDRD(dest
)
465 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
466 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
467 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAX
468 | R500_ALU_RGBA_ADDRD(dest
);
471 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
472 src
[1] = make_src(fp
, fpi
->SrcReg
[0]);
473 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| mask
;
474 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]) | R500_RGB_ADDR1(src
[1]);
475 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]) | R500_ALPHA_ADDR1(src
[1]);
476 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
477 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
478 | R500_ALU_RGB_SEL_B_SRC1
479 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
480 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MIN
481 | R500_ALPHA_ADDRD(dest
)
482 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
483 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
484 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MIN
485 | R500_ALU_RGBA_ADDRD(dest
);
488 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
490 /* changed to use MAD - not sure if we
491 ever have negative things which max will fail on */
492 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
494 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
495 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
496 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
497 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
)
498 | R500_ALU_RGB_SEL_B_SRC0
499 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE
);
500 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
501 | R500_ALPHA_ADDRD(dest
)
502 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SEL_B_SRC0
503 | R500_ALPHA_SWIZ_A_A
| R500_ALPHA_SWIZ_B_1
;
505 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
506 | R500_ALU_RGBA_ADDRD(dest
)
507 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
508 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
511 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
512 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
513 /* Variation on MAD: src0*src1+0 */
514 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
516 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
517 | R500_RGB_ADDR1(src
[1]);
518 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
519 | R500_ALPHA_ADDR1(src
[1]);
520 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
521 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
522 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
523 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
524 | R500_ALPHA_ADDRD(dest
)
525 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
526 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
527 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
528 | R500_ALU_RGBA_ADDRD(dest
)
529 // | R500_ALU_RGBA_SEL_C_SRC2
530 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
531 // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
532 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
535 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
536 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
537 /* Variation on MAD: 1*src0-src1 */
538 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
540 fp
->inst
[counter
].inst1
= R500_RGB_ADDR1(src
[0])
541 | R500_RGB_ADDR2(src
[1]);
542 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR1(src
[0])
543 | R500_ALPHA_ADDR2(src
[1]);
544 fp
->inst
[counter
].inst3
= /* 1 */
545 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
546 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
547 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
548 | R500_ALPHA_ADDRD(dest
)
549 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
550 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
551 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
552 | R500_ALU_RGBA_ADDRD(dest
)
553 | R500_ALU_RGBA_SEL_C_SRC2
554 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[1]))
555 | R500_ALU_RGBA_MOD_C_NEG
556 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
557 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[1]))
558 | R500_ALU_RGBA_ALPHA_MOD_C_NEG
;
561 emit_tex(fp
, fpi
, OPCODE_TEX
, dest
, counter
);
564 emit_tex(fp
, fpi
, OPCODE_TXP
, dest
, counter
);
567 ERROR("unknown fpi->Opcode %d\n", fpi
->Opcode
);
571 /* Finishing touches */
572 if (fpi
->SaturateMode
== SATURATE_ZERO_ONE
) {
573 fp
->inst
[counter
].inst0
|= R500_INST_RGB_CLAMP
| R500_INST_ALPHA_CLAMP
;
575 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
576 fp
->inst
[counter
].inst0
|= R500_INST_TYPE_OUT
577 | R500_INST_RGB_OMASK_R
| R500_INST_RGB_OMASK_G
578 | R500_INST_RGB_OMASK_B
| R500_INST_ALPHA_OMASK
;
588 fp
->cs
->nrslots
= counter
;
590 /* Finish him! (If it's an output instruction...)
591 * Yes, I know it's ugly... */
592 if ((fp
->inst
[counter
].inst0
& 0x3) ^ 0x2) {
593 fp
->inst
[counter
].inst0
|= R500_INST_TYPE_OUT
594 | R500_INST_TEX_SEM_WAIT
| R500_INST_LAST
;
596 /* We still need to put an output inst, right? */
604 static void init_program(r300ContextPtr r300
, struct r500_fragment_program
*fp
)
606 struct r300_pfs_compile_state
*cs
= NULL
;
607 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
608 struct prog_instruction
*fpi
;
609 GLuint InputsRead
= mp
->Base
.InputsRead
;
610 GLuint temps_used
= 0; /* for fp->temps[] */
613 /* New compile, reset tracking data */
615 driQueryOptioni(&r300
->radeon
.optionCache
, "fp_optimization");
616 fp
->translated
= GL_FALSE
;
617 fp
->error
= GL_FALSE
;
618 fp
->cs
= cs
= &(R300_CONTEXT(fp
->ctx
)->state
.pfs_compile
);
620 fp
->first_node_has_tex
= 0;
622 fp
->max_temp_idx
= 0;
623 fp
->node
[0].alu_end
= -1;
624 fp
->node
[0].tex_end
= -1;
626 _mesa_memset(cs
, 0, sizeof(*fp
->cs
));
627 for (i
= 0; i
< PFS_MAX_ALU_INST
; i
++) {
628 for (j
= 0; j
< 3; j
++) {
629 cs
->slot
[i
].vsrc
[j
] = SRC_CONST
;
630 cs
->slot
[i
].ssrc
[j
] = SRC_CONST
;
634 /* Work out what temps the Mesa inputs correspond to, this must match
635 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
636 * configures itself based on the fragprog's InputsRead
638 * NOTE: this depends on get_hw_temp() allocating registers in order,
639 * starting from register 0.
643 /* Texcoords come first */
644 for (i
= 0; i
< fp
->ctx
->Const
.MaxTextureUnits
; i
++) {
645 if (InputsRead
& (FRAG_BIT_TEX0
<< i
)) {
646 cs
->inputs
[FRAG_ATTRIB_TEX0
+ i
].refcount
= 0;
647 cs
->inputs
[FRAG_ATTRIB_TEX0
+ i
].reg
=
651 InputsRead
&= ~FRAG_BITS_TEX_ANY
;
653 /* fragment position treated as a texcoord */
654 if (InputsRead
& FRAG_BIT_WPOS
) {
655 cs
->inputs
[FRAG_ATTRIB_WPOS
].refcount
= 0;
656 cs
->inputs
[FRAG_ATTRIB_WPOS
].reg
= get_hw_temp(fp
, 0);
657 insert_wpos(&mp
->Base
);
659 InputsRead
&= ~FRAG_BIT_WPOS
;
661 /* Then primary colour */
662 if (InputsRead
& FRAG_BIT_COL0
) {
663 cs
->inputs
[FRAG_ATTRIB_COL0
].refcount
= 0;
664 cs
->inputs
[FRAG_ATTRIB_COL0
].reg
= get_hw_temp(fp
, 0);
666 InputsRead
&= ~FRAG_BIT_COL0
;
668 /* Secondary color */
669 if (InputsRead
& FRAG_BIT_COL1
) {
670 cs
->inputs
[FRAG_ATTRIB_COL1
].refcount
= 0;
671 cs
->inputs
[FRAG_ATTRIB_COL1
].reg
= get_hw_temp(fp
, 0);
673 InputsRead
&= ~FRAG_BIT_COL1
;
677 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead
);
678 /* force read from hwreg 0 for now */
679 for (i
= 0; i
< 32; i
++)
680 if (InputsRead
& (1 << i
))
681 cs
->inputs
[i
].reg
= 0;
685 /* Pre-parse the mesa program, grabbing refcounts on input/temp regs.
686 * That way, we can free up the reg when it's no longer needed
688 if (!mp
->Base
.Instructions
) {
689 ERROR("No instructions found in program\n");
693 for (fpi
= mp
->Base
.Instructions
; fpi
->Opcode
!= OPCODE_END
; fpi
++) {
696 for (i
= 0; i
< 3; i
++) {
697 idx
= fpi
->SrcReg
[i
].Index
;
698 switch (fpi
->SrcReg
[i
].File
) {
699 case PROGRAM_TEMPORARY
:
700 if (!(temps_used
& (1 << idx
))) {
701 cs
->temps
[idx
].reg
= -1;
702 cs
->temps
[idx
].refcount
= 1;
703 temps_used
|= (1 << idx
);
705 cs
->temps
[idx
].refcount
++;
708 cs
->inputs
[idx
].refcount
++;
715 idx
= fpi
->DstReg
.Index
;
716 if (fpi
->DstReg
.File
== PROGRAM_TEMPORARY
) {
717 if (!(temps_used
& (1 << idx
))) {
718 cs
->temps
[idx
].reg
= -1;
719 cs
->temps
[idx
].refcount
= 1;
720 temps_used
|= (1 << idx
);
722 cs
->temps
[idx
].refcount
++;
725 cs
->temp_in_use
= temps_used
;
728 static void update_params(struct r500_fragment_program
*fp
)
730 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
732 /* Ask Mesa nicely to fill in ParameterValues for us */
733 if (mp
->Base
.Parameters
)
734 _mesa_load_state_parameters(fp
->ctx
, mp
->Base
.Parameters
);
737 void r500TranslateFragmentShader(r300ContextPtr r300
,
738 struct r500_fragment_program
*fp
)
741 struct r300_pfs_compile_state
*cs
= NULL
;
743 if (!fp
->translated
) {
745 /* I need to see what I'm working with! */
746 fprintf(stderr
, "Mesa program:\n");
747 fprintf(stderr
, "-------------\n");
748 _mesa_print_program(&fp
->mesa_program
.Base
);
751 init_program(r300
, fp
);
754 if (parse_program(fp
) == GL_FALSE
) {
755 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
761 fp
->node
[fp
->cur_node
].alu_end
=
762 cs
->nrslots
- fp
->node
[fp
->cur_node
].alu_offset
- 1;
763 if (fp
->node
[fp
->cur_node
].tex_end
< 0)
764 fp
->node
[fp
->cur_node
].tex_end
= 0;
766 fp
->alu_end
= cs
->nrslots
- 1;
767 //assert(fp->node[fp->cur_node].alu_end >= 0);
768 //assert(fp->alu_end >= 0);
770 fp
->translated
= GL_TRUE
;
771 r300UpdateStateParameters(fp
->ctx
, _NEW_PROGRAM
);