2 * Copyright (C) 2005 Ben Skeggs.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 * \author Ben Skeggs <darktama@iinet.net.au>
33 * \author Jerome Glisse <j.glisse@gmail.com>
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
37 * \todo Depth write, WPOS/FOGC inputs
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
55 #include "r300_state.h"
58 * Useful macros and values
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
68 /* "Register" flags */
69 #define REG_CONSTANT (1 << 8)
72 #define R500_SWIZZLE_ZERO 4
73 #define R500_SWIZZLE_HALF 5
74 #define R500_SWIZZLE_ONE 6
75 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
76 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
77 /* Swizzles for inst2 */
78 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
79 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
80 /* Swizzles for inst3 */
81 #define MAKE_SWIZ_RGB_A(x) (x << 2)
82 #define MAKE_SWIZ_RGB_B(x) (x << 15)
83 /* Swizzles for inst4 */
84 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
85 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
86 /* Swizzle for inst5 */
87 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
88 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
90 static inline GLuint
make_rgb_swizzle(struct prog_src_register src
) {
93 /* This could be optimized, but it should be plenty fast already. */
94 for (int i
= 0; i
< 3; i
++) {
95 temp
= (src
.Swizzle
>> i
*3) & 0x7;
97 if (temp
== 5) temp
++;
103 static inline GLuint
make_alpha_swizzle(struct prog_src_register src
) {
104 GLuint swiz
= (src
.Swizzle
>> 12) & 0x7;
105 if (swiz
== 5) swiz
++;
109 static inline GLuint
make_strq_swizzle(struct prog_src_register src
) {
111 GLuint temp
= src
.Swizzle
;
112 for (int i
= 0; i
< 4; i
++) {
113 swiz
+= (temp
& 0x3) << i
*2;
119 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
120 static GLuint
emit_const4fv(struct r500_fragment_program
*fp
,
126 for (index
= 0; index
< fp
->const_nr
; ++index
) {
127 if (fp
->constant
[index
] == cp
)
131 if (index
>= fp
->const_nr
) {
132 /* TODO: This should be r5xx nums, not r300 */
133 if (index
>= PFS_NUM_CONST_REGS
) {
134 ERROR("Out of hw constants!\n");
139 fp
->constant
[index
] = cp
;
142 reg
= index
| REG_CONSTANT
;
146 static GLuint
make_src(struct r500_fragment_program
*fp
, struct prog_src_register src
) {
149 case PROGRAM_TEMPORARY
:
150 reg
= (src
.Index
<< 0x1) | 0x1;
153 /* Ugly hack needed to work around Mesa;
154 * fragments don't get loaded right otherwise! */
157 case PROGRAM_CONSTANT
:
158 reg
= emit_const4fv(fp
, fp
->mesa_program
.Base
.Parameters
->
159 ParameterValues
[src
.Index
]);
162 ERROR("Can't handle src.File %x\n", src
.File
);
169 static GLuint
make_dest(struct r500_fragment_program
*fp
, struct prog_dst_register dest
) {
172 case PROGRAM_TEMPORARY
:
173 reg
= (dest
.Index
<< 0x1) | 0x1;
176 /* Eventually we may need to handle multiple
177 * rendering targets... */
181 ERROR("Can't handle dest.File %x\n", dest
.File
);
188 static void dumb_shader(struct r500_fragment_program
*fp
)
190 /* R500_INST_TYPE_TEX? */
191 fp
->inst
[0].inst0
= 0x7808;
192 fp
->inst
[0].inst1
= R500_TEX_ID(0) | R500_TEX_INST_LD
| R500_TEX_SEM_ACQUIRE
| R500_TEX_IGNORE_UNCOVERED
;
193 fp
->inst
[0].inst2
= R500_TEX_SRC_ADDR(0) | R500_TEX_SRC_S_SWIZ_R
|
194 R500_TEX_SRC_T_SWIZ_G
|
195 R500_TEX_DST_ADDR(0) |
196 R500_TEX_DST_R_SWIZ_R
|
197 R500_TEX_DST_G_SWIZ_G
|
198 R500_TEX_DST_B_SWIZ_B
|
199 R500_TEX_DST_A_SWIZ_A
;
200 fp
->inst
[0].inst3
= R500_DX_ADDR(0) |
210 fp
->inst
[0].inst4
= 0x0;
211 fp
->inst
[0].inst5
= 0x0;
213 fp
->inst
[1].inst0
= R500_INST_TYPE_OUT
|
214 R500_INST_TEX_SEM_WAIT
|
216 R500_INST_RGB_OMASK_R
|
217 R500_INST_RGB_OMASK_G
|
218 R500_INST_RGB_OMASK_B
|
219 R500_INST_ALPHA_OMASK
;
220 fp
->inst
[1].inst1
= R500_RGB_ADDR0(0) |
222 R500_RGB_ADDR1_CONST
|
224 R500_RGB_ADDR2_CONST
|
225 R500_RGB_SRCP_OP_1_MINUS_2RGB0
;
226 fp
->inst
[1].inst2
= R500_ALPHA_ADDR0(0) |
227 R500_ALPHA_ADDR1(0) |
228 R500_ALPHA_ADDR1_CONST
|
229 R500_ALPHA_ADDR2(0) |
230 R500_ALPHA_ADDR2_CONST
|
231 R500_ALPHA_SRCP_OP_1_MINUS_2A0
;
232 fp
->inst
[1].inst3
= R500_ALU_RGB_SEL_A_SRC0
|
233 R500_ALU_RGB_R_SWIZ_A_R
|
234 R500_ALU_RGB_G_SWIZ_A_G
|
235 R500_ALU_RGB_B_SWIZ_A_B
|
236 R500_ALU_RGB_SEL_B_SRC0
|
237 R500_ALU_RGB_R_SWIZ_B_1
|
238 R500_ALU_RGB_B_SWIZ_B_1
|
239 R500_ALU_RGB_G_SWIZ_B_1
;
240 fp
->inst
[1].inst4
= R500_ALPHA_OP_MAD
|
241 R500_ALPHA_SWIZ_A_A
|
243 fp
->inst
[1].inst5
= R500_ALU_RGBA_OP_MAD
|
244 R500_ALU_RGBA_R_SWIZ_0
|
245 R500_ALU_RGBA_G_SWIZ_0
|
246 R500_ALU_RGBA_B_SWIZ_0
|
247 R500_ALU_RGBA_A_SWIZ_0
;
250 fp
->translated
= GL_TRUE
;
253 static void emit_alu(struct r500_fragment_program
*fp
) {
256 static GLboolean
parse_program(struct r500_fragment_program
*fp
)
258 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
259 const struct prog_instruction
*inst
= mp
->Base
.Instructions
;
260 struct prog_instruction
*fpi
;
261 GLuint src
[3], dest
, temp
[2];
262 int flags
, mask
, counter
= 0;
264 if (!inst
|| inst
[0].Opcode
== OPCODE_END
) {
265 ERROR("The program is empty!\n");
269 for (fpi
= mp
->Base
.Instructions
; fpi
->Opcode
!= OPCODE_END
; fpi
++) {
271 if (fpi
->Opcode
!= OPCODE_KIL
) {
272 dest
= make_dest(fp
, fpi
->DstReg
);
273 mask
= fpi
->DstReg
.WriteMask
<< 11;
276 switch (fpi
->Opcode
) {
278 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
279 /* Variation on MOV */
280 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
282 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
283 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
284 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
285 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
286 | R500_ALU_RGB_MOD_A_ABS
| R500_ALU_RGB_SEL_B_SRC0
287 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
288 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAX
289 | R500_ALPHA_ADDRD(dest
)
290 | R500_ALPHA_SEL_A_SRC0
291 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0])) | R500_ALPHA_MOD_A_ABS
292 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
293 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAX
294 | R500_ALU_RGBA_ADDRD(dest
);
297 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
298 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
299 /* Variation on MAD: 1*src0+src1 */
300 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
302 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
303 | R500_RGB_ADDR1(src
[1]);
304 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
305 | R500_ALPHA_ADDR1(src
[1]);
306 fp
->inst
[counter
].inst3
= /* 1 */
307 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
308 | R500_ALU_RGB_SEL_B_SRC0
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
309 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
310 | R500_ALPHA_ADDRD(dest
)
311 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
312 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
313 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
314 | R500_ALU_RGBA_ADDRD(dest
)
315 | R500_ALU_RGBA_SEL_C_SRC1
316 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[1]))
317 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
318 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[1]));
321 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
322 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
323 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
324 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
326 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
327 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(src
[2]);
328 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
329 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(src
[2]);
330 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
331 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
332 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
333 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_DP
334 | R500_ALPHA_ADDRD(dest
)
335 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
336 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
337 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP3
338 | R500_ALU_RGBA_ADDRD(dest
)
339 | R500_ALU_RGBA_SEL_C_SRC2
340 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[2]))
341 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
342 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[2]));
345 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
346 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
347 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
349 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
351 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
352 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(src
[2]);
353 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
354 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(src
[2]);
355 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
356 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
357 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
358 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_DP
359 | R500_ALPHA_ADDRD(dest
)
360 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
361 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
362 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP4
363 | R500_ALU_RGBA_ADDRD(dest
)
364 | R500_ALU_RGBA_SEL_C_SRC2
365 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[2]))
366 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
367 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[2]));
370 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
371 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
372 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
373 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
375 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
376 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(src
[2]);
377 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
378 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(src
[2]);
379 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
380 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
381 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
382 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
383 | R500_ALPHA_ADDRD(dest
)
384 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
385 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
386 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
387 | R500_ALU_RGBA_ADDRD(dest
)
388 | R500_ALU_RGBA_SEL_C_SRC2
389 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[2]))
390 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
391 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[2]));
394 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
395 src
[1] = make_src(fp
, fpi
->SrcReg
[0]);
396 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| mask
;
397 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]) | R500_RGB_ADDR1(src
[1]);
398 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]) | R500_ALPHA_ADDR1(src
[1]);
399 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
400 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
401 | R500_ALU_RGB_SEL_B_SRC1
402 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
403 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAX
404 | R500_ALPHA_ADDRD(dest
)
405 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
406 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
407 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAX
408 | R500_ALU_RGBA_ADDRD(dest
);
411 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
412 src
[1] = make_src(fp
, fpi
->SrcReg
[0]);
413 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| mask
;
414 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]) | R500_RGB_ADDR1(src
[1]);
415 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]) | R500_ALPHA_ADDR1(src
[1]);
416 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
417 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
418 | R500_ALU_RGB_SEL_B_SRC1
419 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
420 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MIN
421 | R500_ALPHA_ADDRD(dest
)
422 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
423 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
424 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MIN
425 | R500_ALU_RGBA_ADDRD(dest
);
428 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
429 /* We use MAX, but MIN, CND, and CMP also work.
430 * Just remember to disable the OMOD! */
431 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
433 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
434 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
435 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
436 | R500_ALU_RGB_R_SWIZ_A_R
| R500_ALU_RGB_G_SWIZ_A_G
| R500_ALU_RGB_B_SWIZ_A_B
437 | R500_ALU_RGB_SEL_B_SRC0
438 | R500_ALU_RGB_R_SWIZ_B_R
| R500_ALU_RGB_G_SWIZ_B_G
| R500_ALU_RGB_B_SWIZ_B_B
439 | R500_ALU_RGB_OMOD_DISABLE
;
440 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAX
441 | R500_ALPHA_ADDRD(dest
)
442 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SEL_B_SRC0
443 | R500_ALPHA_OMOD_DISABLE
;
444 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAX
445 | R500_ALU_RGBA_ADDRD(dest
);
448 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
449 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
450 /* Variation on MAD: src0*src1+0 */
451 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
453 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
454 | R500_RGB_ADDR1(src
[1]);
455 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
456 | R500_ALPHA_ADDR1(src
[1]);
457 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
458 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
459 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
460 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
461 | R500_ALPHA_ADDRD(dest
)
462 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
463 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
464 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
465 | R500_ALU_RGBA_ADDRD(dest
)
466 // | R500_ALU_RGBA_SEL_C_SRC2
467 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
468 // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
469 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
472 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
473 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
474 /* Variation on MAD: 1*src0-src1 */
475 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
477 fp
->inst
[counter
].inst1
= R500_RGB_ADDR1(src
[0])
478 | R500_RGB_ADDR2(src
[1]);
479 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR1(src
[0])
480 | R500_ALPHA_ADDR2(src
[1]);
481 fp
->inst
[counter
].inst3
= /* 1 */
482 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
483 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
484 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
485 | R500_ALPHA_ADDRD(dest
)
486 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
487 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
488 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
489 | R500_ALU_RGBA_ADDRD(dest
)
490 | R500_ALU_RGBA_SEL_C_SRC2
491 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[1]))
492 | R500_ALU_RGBA_MOD_C_NEG
493 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
494 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[1]))
495 | R500_ALU_RGBA_ALPHA_MOD_C_NEG
;
498 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
499 fp
->inst
[counter
].inst0
= R500_INST_TYPE_TEX
| mask
500 | R500_INST_TEX_SEM_WAIT
;
501 fp
->inst
[counter
].inst1
= fpi
->TexSrcUnit
502 | R500_TEX_INST_LD
| R500_TEX_SEM_ACQUIRE
| R500_TEX_IGNORE_UNCOVERED
;
503 fp
->inst
[counter
].inst2
= R500_TEX_SRC_ADDR(src
[0])
504 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
505 | R500_TEX_SRC_S_SWIZ_R
| R500_TEX_SRC_T_SWIZ_G
506 | R500_TEX_SRC_R_SWIZ_B
| R500_TEX_SRC_Q_SWIZ_A
507 | R500_TEX_DST_ADDR(dest
)
508 | R500_TEX_DST_R_SWIZ_R
| R500_TEX_DST_G_SWIZ_G
509 | R500_TEX_DST_B_SWIZ_B
| R500_TEX_DST_A_SWIZ_A
;
510 fp
->inst
[counter
].inst3
= 0x0;
511 fp
->inst
[counter
].inst4
= 0x0;
512 fp
->inst
[counter
].inst5
= 0x0;
515 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
516 fp
->inst
[counter
].inst0
= R500_INST_TYPE_TEX
| mask
;
517 fp
->inst
[counter
].inst1
= fpi
->TexSrcUnit
518 | R500_TEX_INST_PROJ
| R500_TEX_SEM_ACQUIRE
| R500_TEX_IGNORE_UNCOVERED
;
519 fp
->inst
[counter
].inst2
= R500_TEX_SRC_ADDR(src
[0])
520 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
521 | R500_TEX_SRC_S_SWIZ_R
| R500_TEX_SRC_T_SWIZ_G
522 | R500_TEX_SRC_R_SWIZ_B
| R500_TEX_SRC_Q_SWIZ_A
523 | R500_TEX_DST_ADDR(dest
)
524 | R500_TEX_DST_R_SWIZ_R
| R500_TEX_DST_G_SWIZ_G
525 | R500_TEX_DST_B_SWIZ_B
| R500_TEX_DST_A_SWIZ_A
;
526 fp
->inst
[counter
].inst3
= 0x0;
527 fp
->inst
[counter
].inst4
= 0x0;
528 fp
->inst
[counter
].inst5
= 0x0;
531 ERROR("unknown fpi->Opcode %d\n", fpi
->Opcode
);
535 /* Finishing touches */
536 if (fpi
->SaturateMode
== SATURATE_ZERO_ONE
) {
537 fp
->inst
[counter
].inst0
|= R500_INST_RGB_CLAMP
| R500_INST_ALPHA_CLAMP
;
539 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
540 fp
->inst
[counter
].inst0
|= R500_INST_TYPE_OUT
541 | R500_INST_RGB_OMASK_R
| R500_INST_RGB_OMASK_G
542 | R500_INST_RGB_OMASK_B
| R500_INST_ALPHA_OMASK
;
552 fp
->cs
->nrslots
= counter
;
554 /* Finish him! (If it's an output instruction...)
555 * Yes, I know it's ugly... */
556 if ((fp
->inst
[counter
].inst0
& 0x3) ^ 0x2) {
557 fp
->inst
[counter
].inst0
|= R500_INST_TYPE_OUT
558 | R500_INST_TEX_SEM_WAIT
| R500_INST_LAST
;
564 static void init_program(r300ContextPtr r300
, struct r500_fragment_program
*fp
)
566 struct r300_pfs_compile_state
*cs
= NULL
;
567 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
568 struct prog_instruction
*fpi
;
569 GLuint InputsRead
= mp
->Base
.InputsRead
;
570 GLuint temps_used
= 0; /* for fp->temps[] */
573 /* New compile, reset tracking data */
575 driQueryOptioni(&r300
->radeon
.optionCache
, "fp_optimization");
576 fp
->translated
= GL_FALSE
;
577 fp
->error
= GL_FALSE
;
578 fp
->cs
= cs
= &(R300_CONTEXT(fp
->ctx
)->state
.pfs_compile
);
580 fp
->first_node_has_tex
= 0;
582 fp
->max_temp_idx
= 0;
583 fp
->node
[0].alu_end
= -1;
584 fp
->node
[0].tex_end
= -1;
586 _mesa_memset(cs
, 0, sizeof(*fp
->cs
));
587 for (i
= 0; i
< PFS_MAX_ALU_INST
; i
++) {
588 for (j
= 0; j
< 3; j
++) {
589 cs
->slot
[i
].vsrc
[j
] = SRC_CONST
;
590 cs
->slot
[i
].ssrc
[j
] = SRC_CONST
;
594 /* Work out what temps the Mesa inputs correspond to, this must match
595 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
596 * configures itself based on the fragprog's InputsRead
598 * NOTE: this depends on get_hw_temp() allocating registers in order,
599 * starting from register 0.
603 /* Texcoords come first */
604 for (i
= 0; i
< fp
->ctx
->Const
.MaxTextureUnits
; i
++) {
605 if (InputsRead
& (FRAG_BIT_TEX0
<< i
)) {
606 cs
->inputs
[FRAG_ATTRIB_TEX0
+ i
].refcount
= 0;
607 cs
->inputs
[FRAG_ATTRIB_TEX0
+ i
].reg
=
611 InputsRead
&= ~FRAG_BITS_TEX_ANY
;
613 /* fragment position treated as a texcoord */
614 if (InputsRead
& FRAG_BIT_WPOS
) {
615 cs
->inputs
[FRAG_ATTRIB_WPOS
].refcount
= 0;
616 cs
->inputs
[FRAG_ATTRIB_WPOS
].reg
= get_hw_temp(fp
, 0);
617 insert_wpos(&mp
->Base
);
619 InputsRead
&= ~FRAG_BIT_WPOS
;
621 /* Then primary colour */
622 if (InputsRead
& FRAG_BIT_COL0
) {
623 cs
->inputs
[FRAG_ATTRIB_COL0
].refcount
= 0;
624 cs
->inputs
[FRAG_ATTRIB_COL0
].reg
= get_hw_temp(fp
, 0);
626 InputsRead
&= ~FRAG_BIT_COL0
;
628 /* Secondary color */
629 if (InputsRead
& FRAG_BIT_COL1
) {
630 cs
->inputs
[FRAG_ATTRIB_COL1
].refcount
= 0;
631 cs
->inputs
[FRAG_ATTRIB_COL1
].reg
= get_hw_temp(fp
, 0);
633 InputsRead
&= ~FRAG_BIT_COL1
;
637 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead
);
638 /* force read from hwreg 0 for now */
639 for (i
= 0; i
< 32; i
++)
640 if (InputsRead
& (1 << i
))
641 cs
->inputs
[i
].reg
= 0;
645 /* Pre-parse the mesa program, grabbing refcounts on input/temp regs.
646 * That way, we can free up the reg when it's no longer needed
648 if (!mp
->Base
.Instructions
) {
649 ERROR("No instructions found in program\n");
653 for (fpi
= mp
->Base
.Instructions
; fpi
->Opcode
!= OPCODE_END
; fpi
++) {
656 for (i
= 0; i
< 3; i
++) {
657 idx
= fpi
->SrcReg
[i
].Index
;
658 switch (fpi
->SrcReg
[i
].File
) {
659 case PROGRAM_TEMPORARY
:
660 if (!(temps_used
& (1 << idx
))) {
661 cs
->temps
[idx
].reg
= -1;
662 cs
->temps
[idx
].refcount
= 1;
663 temps_used
|= (1 << idx
);
665 cs
->temps
[idx
].refcount
++;
668 cs
->inputs
[idx
].refcount
++;
675 idx
= fpi
->DstReg
.Index
;
676 if (fpi
->DstReg
.File
== PROGRAM_TEMPORARY
) {
677 if (!(temps_used
& (1 << idx
))) {
678 cs
->temps
[idx
].reg
= -1;
679 cs
->temps
[idx
].refcount
= 1;
680 temps_used
|= (1 << idx
);
682 cs
->temps
[idx
].refcount
++;
685 cs
->temp_in_use
= temps_used
;
688 static void update_params(struct r500_fragment_program
*fp
)
690 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
692 /* Ask Mesa nicely to fill in ParameterValues for us */
693 if (mp
->Base
.Parameters
)
694 _mesa_load_state_parameters(fp
->ctx
, mp
->Base
.Parameters
);
697 void r500TranslateFragmentShader(r300ContextPtr r300
,
698 struct r500_fragment_program
*fp
)
701 struct r300_pfs_compile_state
*cs
= NULL
;
703 if (!fp
->translated
) {
705 /* I need to see what I'm working with! */
706 fprintf(stderr
, "Mesa program:\n");
707 fprintf(stderr
, "-------------\n");
708 _mesa_print_program(&fp
->mesa_program
.Base
);
711 init_program(r300
, fp
);
714 if (parse_program(fp
) == GL_FALSE
) {
715 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
721 fp
->node
[fp
->cur_node
].alu_end
=
722 cs
->nrslots
- fp
->node
[fp
->cur_node
].alu_offset
- 1;
723 if (fp
->node
[fp
->cur_node
].tex_end
< 0)
724 fp
->node
[fp
->cur_node
].tex_end
= 0;
726 fp
->alu_end
= cs
->nrslots
- 1;
727 //assert(fp->node[fp->cur_node].alu_end >= 0);
728 //assert(fp->alu_end >= 0);
730 fp
->translated
= GL_TRUE
;
731 r300UpdateStateParameters(fp
->ctx
, _NEW_PROGRAM
);