2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23 #include "radeon_compiler.h"
25 #include "../r300_reg.h"
27 #include "radeon_nqssadce.h"
28 #include "radeon_program.h"
29 #include "radeon_program_alu.h"
31 #include "shader/prog_optimize.h"
32 #include "shader/prog_print.h"
35 /* TODO: Get rid of t_src_class call */
36 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
37 ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
38 t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
39 (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
40 t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
43 * Take an already-setup and valid source then swizzle it appropriately to
44 * obtain a constant ZERO or ONE source.
46 #define __CONST(x, y) \
47 (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \
52 t_src_class(src[x].File), \
53 NEGATE_NONE) | (src[x].RelAddr << 4))
58 static unsigned long t_dst_mask(GLuint mask
)
60 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
61 return mask
& WRITEMASK_XYZW
;
64 static unsigned long t_dst_class(gl_register_file file
)
68 case PROGRAM_TEMPORARY
:
69 return PVS_DST_REG_TEMPORARY
;
71 return PVS_DST_REG_OUT
;
73 return PVS_DST_REG_A0
;
76 case PROGRAM_LOCAL_PARAM:
77 case PROGRAM_ENV_PARAM:
78 case PROGRAM_NAMED_PARAM:
79 case PROGRAM_STATE_VAR:
80 case PROGRAM_WRITE_ONLY:
84 fprintf(stderr
, "problem in %s", __FUNCTION__
);
90 static unsigned long t_dst_index(struct r300_vertex_program_code
*vp
,
91 struct prog_dst_register
*dst
)
93 if (dst
->File
== PROGRAM_OUTPUT
)
94 return vp
->outputs
[dst
->Index
];
99 static unsigned long t_src_class(gl_register_file file
)
102 case PROGRAM_TEMPORARY
:
103 return PVS_SRC_REG_TEMPORARY
;
105 return PVS_SRC_REG_INPUT
;
106 case PROGRAM_LOCAL_PARAM
:
107 case PROGRAM_ENV_PARAM
:
108 case PROGRAM_NAMED_PARAM
:
109 case PROGRAM_CONSTANT
:
110 case PROGRAM_STATE_VAR
:
111 return PVS_SRC_REG_CONSTANT
;
114 case PROGRAM_WRITE_ONLY:
115 case PROGRAM_ADDRESS:
118 fprintf(stderr
, "problem in %s", __FUNCTION__
);
124 static INLINE
unsigned long t_swizzle(GLubyte swizzle
)
126 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
130 static unsigned long t_src_index(struct r300_vertex_program_code
*vp
,
131 struct prog_src_register
*src
)
133 if (src
->File
== PROGRAM_INPUT
) {
134 assert(vp
->inputs
[src
->Index
] != -1);
135 return vp
->inputs
[src
->Index
];
137 if (src
->Index
< 0) {
139 "negative offsets for indirect addressing do not work.\n");
146 /* these two functions should probably be merged... */
148 static unsigned long t_src(struct r300_vertex_program_code
*vp
,
149 struct prog_src_register
*src
)
151 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
152 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
154 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
155 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
156 t_swizzle(GET_SWZ(src
->Swizzle
, 1)),
157 t_swizzle(GET_SWZ(src
->Swizzle
, 2)),
158 t_swizzle(GET_SWZ(src
->Swizzle
, 3)),
159 t_src_class(src
->File
),
160 src
->Negate
) | (src
->RelAddr
<< 4);
163 static unsigned long t_src_scalar(struct r300_vertex_program_code
*vp
,
164 struct prog_src_register
*src
)
166 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
167 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
169 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
170 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
171 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
172 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
173 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
174 t_src_class(src
->File
),
175 src
->Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
179 static GLboolean
valid_dst(struct r300_vertex_program_code
*vp
,
180 struct prog_dst_register
*dst
)
182 if (dst
->File
== PROGRAM_OUTPUT
&& vp
->outputs
[dst
->Index
] == -1) {
184 } else if (dst
->File
== PROGRAM_ADDRESS
) {
185 assert(dst
->Index
== 0);
191 static GLuint
* ei_vector1(struct r300_vertex_program_code
*vp
,
193 struct prog_instruction
*vpi
,
195 struct prog_src_register src
[3])
197 inst
[0] = PVS_OP_DST_OPERAND(hw_opcode
,
200 t_dst_index(vp
, &vpi
->DstReg
),
201 t_dst_mask(vpi
->DstReg
.WriteMask
),
202 t_dst_class(vpi
->DstReg
.File
));
203 inst
[1] = t_src(vp
, &src
[0]);
204 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
205 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
210 static GLuint
* ei_vector2(struct r300_vertex_program_code
*vp
,
212 struct prog_instruction
*vpi
,
214 struct prog_src_register src
[3])
216 inst
[0] = PVS_OP_DST_OPERAND(hw_opcode
,
219 t_dst_index(vp
, &vpi
->DstReg
),
220 t_dst_mask(vpi
->DstReg
.WriteMask
),
221 t_dst_class(vpi
->DstReg
.File
));
222 inst
[1] = t_src(vp
, &src
[0]);
223 inst
[2] = t_src(vp
, &src
[1]);
224 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
229 static GLuint
*ei_math1(struct r300_vertex_program_code
*vp
,
231 struct prog_instruction
*vpi
,
233 struct prog_src_register src
[3])
235 inst
[0] = PVS_OP_DST_OPERAND(hw_opcode
,
238 t_dst_index(vp
, &vpi
->DstReg
),
239 t_dst_mask(vpi
->DstReg
.WriteMask
),
240 t_dst_class(vpi
->DstReg
.File
));
241 inst
[1] = t_src_scalar(vp
, &src
[0]);
242 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
243 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
248 static GLuint
*ei_lit(struct r300_vertex_program_code
*vp
,
249 struct prog_instruction
*vpi
,
251 struct prog_src_register src
[3])
253 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
255 inst
[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX
,
258 t_dst_index(vp
, &vpi
->DstReg
),
259 t_dst_mask(vpi
->DstReg
.WriteMask
),
260 t_dst_class(vpi
->DstReg
.File
));
261 /* NOTE: Users swizzling might not work. */
262 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
263 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
264 PVS_SRC_SELECT_FORCE_0
, // Z
265 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
266 t_src_class(src
[0].File
),
267 src
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
268 (src
[0].RelAddr
<< 4);
269 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
270 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
271 PVS_SRC_SELECT_FORCE_0
, // Z
272 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
273 t_src_class(src
[0].File
),
274 src
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
275 (src
[0].RelAddr
<< 4);
276 inst
[3] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
277 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
278 PVS_SRC_SELECT_FORCE_0
, // Z
279 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
280 t_src_class(src
[0].File
),
281 src
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
282 (src
[0].RelAddr
<< 4);
287 static GLuint
*ei_mad(struct r300_vertex_program_code
*vp
,
288 struct prog_instruction
*vpi
,
290 struct prog_src_register src
[3])
292 inst
[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD
,
295 t_dst_index(vp
, &vpi
->DstReg
),
296 t_dst_mask(vpi
->DstReg
.WriteMask
),
297 t_dst_class(vpi
->DstReg
.File
));
298 inst
[1] = t_src(vp
, &src
[0]);
299 inst
[2] = t_src(vp
, &src
[1]);
300 inst
[3] = t_src(vp
, &src
[2]);
305 static GLuint
*ei_pow(struct r300_vertex_program_code
*vp
,
306 struct prog_instruction
*vpi
,
308 struct prog_src_register src
[3])
310 inst
[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF
,
313 t_dst_index(vp
, &vpi
->DstReg
),
314 t_dst_mask(vpi
->DstReg
.WriteMask
),
315 t_dst_class(vpi
->DstReg
.File
));
316 inst
[1] = t_src_scalar(vp
, &src
[0]);
317 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
318 inst
[3] = t_src_scalar(vp
, &src
[1]);
323 static void t_inputs_outputs(struct r300_vertex_program_code
*vp
, struct gl_program
* glvp
)
327 GLuint OutputsWritten
, InputsRead
;
329 OutputsWritten
= glvp
->OutputsWritten
;
330 InputsRead
= glvp
->InputsRead
;
333 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++) {
334 if (InputsRead
& (1 << i
))
335 vp
->inputs
[i
] = ++cur_reg
;
341 for (i
= 0; i
< VERT_RESULT_MAX
; i
++)
344 assert(OutputsWritten
& (1 << VERT_RESULT_HPOS
));
346 if (OutputsWritten
& (1 << VERT_RESULT_HPOS
)) {
347 vp
->outputs
[VERT_RESULT_HPOS
] = cur_reg
++;
350 if (OutputsWritten
& (1 << VERT_RESULT_PSIZ
)) {
351 vp
->outputs
[VERT_RESULT_PSIZ
] = cur_reg
++;
354 /* If we're writing back facing colors we need to send
355 * four colors to make front/back face colors selection work.
356 * If the vertex program doesn't write all 4 colors, lets
357 * pretend it does by skipping output index reg so the colors
358 * get written into appropriate output vectors.
360 if (OutputsWritten
& (1 << VERT_RESULT_COL0
)) {
361 vp
->outputs
[VERT_RESULT_COL0
] = cur_reg
++;
362 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC0
) ||
363 OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
367 if (OutputsWritten
& (1 << VERT_RESULT_COL1
)) {
368 vp
->outputs
[VERT_RESULT_COL1
] = cur_reg
++;
369 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC0
) ||
370 OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
374 if (OutputsWritten
& (1 << VERT_RESULT_BFC0
)) {
375 vp
->outputs
[VERT_RESULT_BFC0
] = cur_reg
++;
376 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
380 if (OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
381 vp
->outputs
[VERT_RESULT_BFC1
] = cur_reg
++;
382 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC0
)) {
386 for (i
= VERT_RESULT_TEX0
; i
<= VERT_RESULT_TEX7
; i
++) {
387 if (OutputsWritten
& (1 << i
)) {
388 vp
->outputs
[i
] = cur_reg
++;
392 if (OutputsWritten
& (1 << VERT_RESULT_FOGC
)) {
393 vp
->outputs
[VERT_RESULT_FOGC
] = cur_reg
++;
397 static GLboolean
translate_vertex_program(struct r300_vertex_program_compiler
* compiler
)
399 struct prog_instruction
*vpi
= compiler
->program
->Instructions
;
402 unsigned long num_operands
;
403 /* Initial value should be last tmp reg that hw supports.
404 Strangely enough r300 doesnt mind even though these would be out of range.
405 Smart enough to realize that it doesnt need it? */
406 int u_temp_i
= VSF_MAX_FRAGMENT_TEMPS
- 1;
407 struct prog_src_register src
[3];
408 struct r300_vertex_program_code
* vp
= compiler
->code
;
410 compiler
->code
->pos_end
= 0; /* Not supported yet */
411 compiler
->code
->length
= 0;
413 t_inputs_outputs(compiler
->code
, compiler
->program
);
415 for (inst
= compiler
->code
->body
.d
; vpi
->Opcode
!= OPCODE_END
;
419 int u_temp_used
= (VSF_MAX_FRAGMENT_TEMPS
- 1) - u_temp_i
;
420 if((compiler
->code
->num_temporaries
+ u_temp_used
) > VSF_MAX_FRAGMENT_TEMPS
) {
421 fprintf(stderr
, "Ran out of temps, num temps %d, us %d\n", compiler
->code
->num_temporaries
, u_temp_used
);
424 u_temp_i
=VSF_MAX_FRAGMENT_TEMPS
-1;
427 if (!valid_dst(compiler
->code
, &vpi
->DstReg
)) {
428 /* redirect result to unused temp */
429 vpi
->DstReg
.File
= PROGRAM_TEMPORARY
;
430 vpi
->DstReg
.Index
= u_temp_i
;
433 num_operands
= _mesa_num_inst_src_regs(vpi
->Opcode
);
435 /* copy the sources (src) from mesa into a local variable... is this needed? */
436 for (i
= 0; i
< num_operands
; i
++) {
437 src
[i
] = vpi
->SrcReg
[i
];
440 if (num_operands
== 3) { /* TODO: scalars */
441 if (CMP_SRCS(src
[1], src
[2])
442 || CMP_SRCS(src
[0], src
[2])) {
443 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
448 PVS_DST_REG_TEMPORARY
);
450 PVS_SRC_OPERAND(t_src_index(compiler
->code
, &src
[2]),
455 t_src_class(src
[2].File
),
456 NEGATE_NONE
) | (src
[2].
459 inst
[2] = __CONST(2, SWIZZLE_ZERO
);
460 inst
[3] = __CONST(2, SWIZZLE_ZERO
);
463 src
[2].File
= PROGRAM_TEMPORARY
;
464 src
[2].Index
= u_temp_i
;
470 if (num_operands
>= 2) {
471 if (CMP_SRCS(src
[1], src
[0])) {
472 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
477 PVS_DST_REG_TEMPORARY
);
479 PVS_SRC_OPERAND(t_src_index(compiler
->code
, &src
[0]),
484 t_src_class(src
[0].File
),
485 NEGATE_NONE
) | (src
[0].
488 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
489 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
492 src
[0].File
= PROGRAM_TEMPORARY
;
493 src
[0].Index
= u_temp_i
;
499 switch (vpi
->Opcode
) {
500 case OPCODE_ADD
: inst
= ei_vector2(compiler
->code
, VE_ADD
, vpi
, inst
, src
); break;
501 case OPCODE_ARL
: inst
= ei_vector1(compiler
->code
, VE_FLT2FIX_DX
, vpi
, inst
, src
); break;
502 case OPCODE_DP4
: inst
= ei_vector2(compiler
->code
, VE_DOT_PRODUCT
, vpi
, inst
, src
); break;
503 case OPCODE_DST
: inst
= ei_vector2(compiler
->code
, VE_DISTANCE_VECTOR
, vpi
, inst
, src
); break;
504 case OPCODE_EX2
: inst
= ei_math1(compiler
->code
, ME_EXP_BASE2_FULL_DX
, vpi
, inst
, src
); break;
505 case OPCODE_EXP
: inst
= ei_math1(compiler
->code
, ME_EXP_BASE2_DX
, vpi
, inst
, src
); break;
506 case OPCODE_FRC
: inst
= ei_vector1(compiler
->code
, VE_FRACTION
, vpi
, inst
, src
); break;
507 case OPCODE_LG2
: inst
= ei_math1(compiler
->code
, ME_LOG_BASE2_FULL_DX
, vpi
, inst
, src
); break;
508 case OPCODE_LIT
: inst
= ei_lit(compiler
->code
, vpi
, inst
, src
); break;
509 case OPCODE_LOG
: inst
= ei_math1(compiler
->code
, ME_LOG_BASE2_DX
, vpi
, inst
, src
); break;
510 case OPCODE_MAD
: inst
= ei_mad(compiler
->code
, vpi
, inst
, src
); break;
511 case OPCODE_MAX
: inst
= ei_vector2(compiler
->code
, VE_MAXIMUM
, vpi
, inst
, src
); break;
512 case OPCODE_MIN
: inst
= ei_vector2(compiler
->code
, VE_MINIMUM
, vpi
, inst
, src
); break;
513 case OPCODE_MOV
: inst
= ei_vector1(compiler
->code
, VE_ADD
, vpi
, inst
, src
); break;
514 case OPCODE_MUL
: inst
= ei_vector2(compiler
->code
, VE_MULTIPLY
, vpi
, inst
, src
); break;
515 case OPCODE_POW
: inst
= ei_pow(compiler
->code
, vpi
, inst
, src
); break;
516 case OPCODE_RCP
: inst
= ei_math1(compiler
->code
, ME_RECIP_DX
, vpi
, inst
, src
); break;
517 case OPCODE_RSQ
: inst
= ei_math1(compiler
->code
, ME_RECIP_SQRT_DX
, vpi
, inst
, src
); break;
518 case OPCODE_SGE
: inst
= ei_vector2(compiler
->code
, VE_SET_GREATER_THAN_EQUAL
, vpi
, inst
, src
); break;
519 case OPCODE_SLT
: inst
= ei_vector2(compiler
->code
, VE_SET_LESS_THAN
, vpi
, inst
, src
); break;
521 fprintf(stderr
, "Unknown opcode %i\n", vpi
->Opcode
);
526 compiler
->code
->length
= (inst
- compiler
->code
->body
.d
);
527 if (compiler
->code
->length
>= VSF_MAX_FRAGMENT_LENGTH
) {
534 static void insert_wpos(struct gl_program
*prog
, GLuint temp_index
, int tex_id
)
536 struct prog_instruction
*vpi
;
538 _mesa_insert_instructions(prog
, prog
->NumInstructions
- 1, 2);
540 vpi
= &prog
->Instructions
[prog
->NumInstructions
- 3];
542 vpi
->Opcode
= OPCODE_MOV
;
544 vpi
->DstReg
.File
= PROGRAM_OUTPUT
;
545 vpi
->DstReg
.Index
= VERT_RESULT_HPOS
;
546 vpi
->DstReg
.WriteMask
= WRITEMASK_XYZW
;
547 vpi
->DstReg
.CondMask
= COND_TR
;
549 vpi
->SrcReg
[0].File
= PROGRAM_TEMPORARY
;
550 vpi
->SrcReg
[0].Index
= temp_index
;
551 vpi
->SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
555 vpi
->Opcode
= OPCODE_MOV
;
557 vpi
->DstReg
.File
= PROGRAM_OUTPUT
;
558 vpi
->DstReg
.Index
= VERT_RESULT_TEX0
+ tex_id
;
559 vpi
->DstReg
.WriteMask
= WRITEMASK_XYZW
;
560 vpi
->DstReg
.CondMask
= COND_TR
;
562 vpi
->SrcReg
[0].File
= PROGRAM_TEMPORARY
;
563 vpi
->SrcReg
[0].Index
= temp_index
;
564 vpi
->SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
568 vpi
->Opcode
= OPCODE_END
;
571 static void pos_as_texcoord(struct gl_program
*prog
, int tex_id
)
573 struct prog_instruction
*vpi
;
574 GLuint tempregi
= prog
->NumTemporaries
;
576 prog
->NumTemporaries
++;
578 for (vpi
= prog
->Instructions
; vpi
->Opcode
!= OPCODE_END
; vpi
++) {
579 if (vpi
->DstReg
.File
== PROGRAM_OUTPUT
&& vpi
->DstReg
.Index
== VERT_RESULT_HPOS
) {
580 vpi
->DstReg
.File
= PROGRAM_TEMPORARY
;
581 vpi
->DstReg
.Index
= tempregi
;
585 insert_wpos(prog
, tempregi
, tex_id
);
587 prog
->OutputsWritten
|= 1 << (VERT_RESULT_TEX0
+ tex_id
);
591 * The fogcoord attribute is special in that only the first component
592 * is relevant, and the remaining components are always fixed (when read
593 * from by the fragment program) to yield an X001 pattern.
595 * We need to enforce this either in the vertex program or in the fragment
596 * program, and this code chooses not to enforce it in the vertex program.
597 * This is slightly cheaper, as long as the fragment program does not use
600 * And it seems that usually, weird swizzles are not used, so...
602 * See also the counterpart rewriting for fragment programs.
604 static void fog_as_texcoord(struct gl_program
*prog
, int tex_id
)
606 struct prog_instruction
*vpi
;
608 vpi
= prog
->Instructions
;
609 while (vpi
->Opcode
!= OPCODE_END
) {
610 if (vpi
->DstReg
.File
== PROGRAM_OUTPUT
&& vpi
->DstReg
.Index
== VERT_RESULT_FOGC
) {
611 vpi
->DstReg
.Index
= VERT_RESULT_TEX0
+ tex_id
;
612 vpi
->DstReg
.WriteMask
= WRITEMASK_X
;
618 prog
->OutputsWritten
&= ~(1 << VERT_RESULT_FOGC
);
619 prog
->OutputsWritten
|= 1 << (VERT_RESULT_TEX0
+ tex_id
);
623 #define ADD_OUTPUT(fp_attr, vp_result) \
625 if ((FpReads & (1 << (fp_attr))) && !(compiler->program->OutputsWritten & (1 << (vp_result)))) { \
626 OutputsAdded |= 1 << (vp_result); \
631 static void addArtificialOutputs(struct r300_vertex_program_compiler
* compiler
)
633 GLuint OutputsAdded
, FpReads
;
638 FpReads
= compiler
->state
.FpReads
;
640 ADD_OUTPUT(FRAG_ATTRIB_COL0
, VERT_RESULT_COL0
);
641 ADD_OUTPUT(FRAG_ATTRIB_COL1
, VERT_RESULT_COL1
);
643 for (i
= 0; i
< 7; ++i
) {
644 ADD_OUTPUT(FRAG_ATTRIB_TEX0
+ i
, VERT_RESULT_TEX0
+ i
);
647 /* Some outputs may be artificially added, to match the inputs of the fragment program.
648 * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by
649 * vertex program are undefined, so just use MOV [vertex_result], CONST[0]
652 struct prog_instruction
*inst
;
654 _mesa_insert_instructions(compiler
->program
, compiler
->program
->NumInstructions
- 1, count
);
655 inst
= &compiler
->program
->Instructions
[compiler
->program
->NumInstructions
- 1 - count
];
657 for (i
= 0; i
< VERT_RESULT_MAX
; ++i
) {
658 if (OutputsAdded
& (1 << i
)) {
659 inst
->Opcode
= OPCODE_MOV
;
661 inst
->DstReg
.File
= PROGRAM_OUTPUT
;
662 inst
->DstReg
.Index
= i
;
663 inst
->DstReg
.WriteMask
= WRITEMASK_XYZW
;
664 inst
->DstReg
.CondMask
= COND_TR
;
666 inst
->SrcReg
[0].File
= PROGRAM_CONSTANT
;
667 inst
->SrcReg
[0].Index
= 0;
668 inst
->SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
674 compiler
->program
->OutputsWritten
|= OutputsAdded
;
680 static void nqssadceInit(struct nqssadce_state
* s
)
682 struct r300_vertex_program_compiler
* compiler
= s
->UserData
;
685 fp_reads
= compiler
->state
.FpReads
;
687 if (fp_reads
& FRAG_BIT_COL0
) {
688 s
->Outputs
[VERT_RESULT_COL0
].Sourced
= WRITEMASK_XYZW
;
689 s
->Outputs
[VERT_RESULT_BFC0
].Sourced
= WRITEMASK_XYZW
;
692 if (fp_reads
& FRAG_BIT_COL1
) {
693 s
->Outputs
[VERT_RESULT_COL1
].Sourced
= WRITEMASK_XYZW
;
694 s
->Outputs
[VERT_RESULT_BFC1
].Sourced
= WRITEMASK_XYZW
;
700 for (i
= 0; i
< 8; ++i
) {
701 if (fp_reads
& FRAG_BIT_TEX(i
)) {
702 s
->Outputs
[VERT_RESULT_TEX0
+ i
].Sourced
= WRITEMASK_XYZW
;
707 s
->Outputs
[VERT_RESULT_HPOS
].Sourced
= WRITEMASK_XYZW
;
708 if (s
->Program
->OutputsWritten
& (1 << VERT_RESULT_PSIZ
))
709 s
->Outputs
[VERT_RESULT_PSIZ
].Sourced
= WRITEMASK_X
;
712 static GLboolean
swizzleIsNative(GLuint opcode
, struct prog_src_register reg
)
722 GLboolean
r3xx_compile_vertex_program(struct r300_vertex_program_compiler
* compiler
, GLcontext
* ctx
)
726 if (compiler
->state
.WPosAttr
!= FRAG_ATTRIB_MAX
) {
727 pos_as_texcoord(compiler
->program
, compiler
->state
.WPosAttr
- FRAG_ATTRIB_TEX0
);
730 if (compiler
->state
.FogAttr
!= FRAG_ATTRIB_MAX
) {
731 fog_as_texcoord(compiler
->program
, compiler
->state
.FogAttr
- FRAG_ATTRIB_TEX0
);
734 addArtificialOutputs(compiler
);
737 struct radeon_program_transformation transformations
[] = {
738 { &r300_transform_vertex_alu
, 0 },
740 radeonLocalTransform(compiler
->program
, 1, transformations
);
743 if (compiler
->Base
.Debug
) {
744 fprintf(stderr
, "Vertex program after native rewrite:\n");
745 _mesa_print_program(compiler
->program
);
750 struct radeon_nqssadce_descr nqssadce
= {
751 .Init
= &nqssadceInit
,
752 .IsNativeSwizzle
= &swizzleIsNative
,
755 radeonNqssaDce(compiler
->program
, &nqssadce
, compiler
);
757 /* We need this step for reusing temporary registers */
758 _mesa_optimize_program(ctx
, compiler
->program
);
760 if (compiler
->Base
.Debug
) {
761 fprintf(stderr
, "Vertex program after NQSSADCE:\n");
762 _mesa_print_program(compiler
->program
);
767 assert(compiler
->program
->NumInstructions
);
769 struct prog_instruction
*inst
;
772 inst
= compiler
->program
->Instructions
;
774 while (inst
->Opcode
!= OPCODE_END
) {
775 tmp
= _mesa_num_inst_src_regs(inst
->Opcode
);
776 for (i
= 0; i
< tmp
; ++i
) {
777 if (inst
->SrcReg
[i
].File
== PROGRAM_TEMPORARY
) {
778 if ((int) inst
->SrcReg
[i
].Index
> max
) {
779 max
= inst
->SrcReg
[i
].Index
;
784 if (_mesa_num_inst_dst_regs(inst
->Opcode
)) {
785 if (inst
->DstReg
.File
== PROGRAM_TEMPORARY
) {
786 if ((int) inst
->DstReg
.Index
> max
) {
787 max
= inst
->DstReg
.Index
;
794 /* We actually want highest index of used temporary register,
795 * not the number of temporaries used.
796 * These values aren't always the same.
798 compiler
->code
->num_temporaries
= max
+ 1;
801 success
= translate_vertex_program(compiler
);
803 compiler
->code
->InputsRead
= compiler
->program
->InputsRead
;
804 compiler
->code
->OutputsWritten
= compiler
->program
->OutputsWritten
;