2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23 #include "radeon_compiler.h"
25 #include "../r300_reg.h"
27 #include "radeon_nqssadce.h"
28 #include "radeon_program.h"
29 #include "radeon_program_alu.h"
31 #include "shader/prog_print.h"
35 * Take an already-setup and valid source then swizzle it appropriately to
36 * obtain a constant ZERO or ONE source.
38 #define __CONST(x, y) \
39 (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \
44 t_src_class(vpi->SrcReg[x].File), \
45 NEGATE_NONE) | (vpi->SrcReg[x].RelAddr << 4))
48 static unsigned long t_dst_mask(GLuint mask
)
50 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
51 return mask
& WRITEMASK_XYZW
;
54 static unsigned long t_dst_class(gl_register_file file
)
58 case PROGRAM_TEMPORARY
:
59 return PVS_DST_REG_TEMPORARY
;
61 return PVS_DST_REG_OUT
;
63 return PVS_DST_REG_A0
;
66 case PROGRAM_LOCAL_PARAM:
67 case PROGRAM_ENV_PARAM:
68 case PROGRAM_NAMED_PARAM:
69 case PROGRAM_STATE_VAR:
70 case PROGRAM_WRITE_ONLY:
74 fprintf(stderr
, "problem in %s", __FUNCTION__
);
80 static unsigned long t_dst_index(struct r300_vertex_program_code
*vp
,
81 struct prog_dst_register
*dst
)
83 if (dst
->File
== PROGRAM_OUTPUT
)
84 return vp
->outputs
[dst
->Index
];
89 static unsigned long t_src_class(gl_register_file file
)
93 case PROGRAM_TEMPORARY
:
94 return PVS_SRC_REG_TEMPORARY
;
96 return PVS_SRC_REG_INPUT
;
97 case PROGRAM_LOCAL_PARAM
:
98 case PROGRAM_ENV_PARAM
:
99 case PROGRAM_NAMED_PARAM
:
100 case PROGRAM_CONSTANT
:
101 case PROGRAM_STATE_VAR
:
102 return PVS_SRC_REG_CONSTANT
;
105 case PROGRAM_WRITE_ONLY:
106 case PROGRAM_ADDRESS:
109 fprintf(stderr
, "problem in %s", __FUNCTION__
);
115 static GLboolean
t_src_conflict(struct prog_src_register a
, struct prog_src_register b
)
117 unsigned long aclass
= t_src_class(a
.File
);
118 unsigned long bclass
= t_src_class(b
.File
);
120 if (aclass
!= bclass
)
122 if (aclass
== PVS_SRC_REG_TEMPORARY
)
125 if (a
.RelAddr
|| b
.RelAddr
)
127 if (a
.Index
!= b
.Index
)
133 static INLINE
unsigned long t_swizzle(GLubyte swizzle
)
135 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
139 static unsigned long t_src_index(struct r300_vertex_program_code
*vp
,
140 struct prog_src_register
*src
)
142 if (src
->File
== PROGRAM_INPUT
) {
143 assert(vp
->inputs
[src
->Index
] != -1);
144 return vp
->inputs
[src
->Index
];
146 if (src
->Index
< 0) {
148 "negative offsets for indirect addressing do not work.\n");
155 /* these two functions should probably be merged... */
157 static unsigned long t_src(struct r300_vertex_program_code
*vp
,
158 struct prog_src_register
*src
)
160 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
161 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
163 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
164 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
165 t_swizzle(GET_SWZ(src
->Swizzle
, 1)),
166 t_swizzle(GET_SWZ(src
->Swizzle
, 2)),
167 t_swizzle(GET_SWZ(src
->Swizzle
, 3)),
168 t_src_class(src
->File
),
169 src
->Negate
) | (src
->RelAddr
<< 4);
172 static unsigned long t_src_scalar(struct r300_vertex_program_code
*vp
,
173 struct prog_src_register
*src
)
175 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
176 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
178 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
179 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
180 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
181 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
182 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
183 t_src_class(src
->File
),
184 src
->Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
188 static GLboolean
valid_dst(struct r300_vertex_program_code
*vp
,
189 struct prog_dst_register
*dst
)
191 if (dst
->File
== PROGRAM_OUTPUT
&& vp
->outputs
[dst
->Index
] == -1) {
193 } else if (dst
->File
== PROGRAM_ADDRESS
) {
194 assert(dst
->Index
== 0);
200 static void ei_vector1(struct r300_vertex_program_code
*vp
,
202 struct prog_instruction
*vpi
,
205 inst
[0] = PVS_OP_DST_OPERAND(hw_opcode
,
208 t_dst_index(vp
, &vpi
->DstReg
),
209 t_dst_mask(vpi
->DstReg
.WriteMask
),
210 t_dst_class(vpi
->DstReg
.File
));
211 inst
[1] = t_src(vp
, &vpi
->SrcReg
[0]);
212 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
213 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
216 static void ei_vector2(struct r300_vertex_program_code
*vp
,
218 struct prog_instruction
*vpi
,
221 inst
[0] = PVS_OP_DST_OPERAND(hw_opcode
,
224 t_dst_index(vp
, &vpi
->DstReg
),
225 t_dst_mask(vpi
->DstReg
.WriteMask
),
226 t_dst_class(vpi
->DstReg
.File
));
227 inst
[1] = t_src(vp
, &vpi
->SrcReg
[0]);
228 inst
[2] = t_src(vp
, &vpi
->SrcReg
[1]);
229 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
232 static void ei_math1(struct r300_vertex_program_code
*vp
,
234 struct prog_instruction
*vpi
,
237 inst
[0] = PVS_OP_DST_OPERAND(hw_opcode
,
240 t_dst_index(vp
, &vpi
->DstReg
),
241 t_dst_mask(vpi
->DstReg
.WriteMask
),
242 t_dst_class(vpi
->DstReg
.File
));
243 inst
[1] = t_src_scalar(vp
, &vpi
->SrcReg
[0]);
244 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
245 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
248 static void ei_lit(struct r300_vertex_program_code
*vp
,
249 struct prog_instruction
*vpi
,
252 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
254 inst
[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX
,
257 t_dst_index(vp
, &vpi
->DstReg
),
258 t_dst_mask(vpi
->DstReg
.WriteMask
),
259 t_dst_class(vpi
->DstReg
.File
));
260 /* NOTE: Users swizzling might not work. */
261 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &vpi
->SrcReg
[0]), t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 0)), // X
262 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 3)), // W
263 PVS_SRC_SELECT_FORCE_0
, // Z
264 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 1)), // Y
265 t_src_class(vpi
->SrcReg
[0].File
),
266 vpi
->SrcReg
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
267 (vpi
->SrcReg
[0].RelAddr
<< 4);
268 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &vpi
->SrcReg
[0]), t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 1)), // Y
269 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 3)), // W
270 PVS_SRC_SELECT_FORCE_0
, // Z
271 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 0)), // X
272 t_src_class(vpi
->SrcReg
[0].File
),
273 vpi
->SrcReg
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
274 (vpi
->SrcReg
[0].RelAddr
<< 4);
275 inst
[3] = PVS_SRC_OPERAND(t_src_index(vp
, &vpi
->SrcReg
[0]), t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 1)), // Y
276 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 0)), // X
277 PVS_SRC_SELECT_FORCE_0
, // Z
278 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 3)), // W
279 t_src_class(vpi
->SrcReg
[0].File
),
280 vpi
->SrcReg
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
281 (vpi
->SrcReg
[0].RelAddr
<< 4);
284 static void ei_mad(struct r300_vertex_program_code
*vp
,
285 struct prog_instruction
*vpi
,
288 /* Remarks about hardware limitations of MAD
289 * (please preserve this comment, as this information is _NOT_
290 * in the documentation provided by AMD).
292 * As described in the documentation, MAD with three unique temporary
293 * source registers requires the use of the macro version.
295 * However (and this is not mentioned in the documentation), apparently
296 * the macro version is _NOT_ a full superset of the normal version.
297 * In particular, the macro version does not always work when relative
298 * addressing is used in the source operands.
300 * This limitation caused incorrect rendering in Sauerbraten's OpenGL
301 * assembly shader path when using medium quality animations
302 * (i.e. animations with matrix blending instead of quaternion blending).
304 * Unfortunately, I (nha) have been unable to extract a Piglit regression
305 * test for this issue - for some reason, it is possible to have vertex
306 * programs whose prefix is *exactly* the same as the prefix of the
307 * offending program in Sauerbraten up to the offending instruction
308 * without causing any trouble.
310 * Bottom line: Only use the macro version only when really necessary;
311 * according to AMD docs, this should improve performance by one clock
312 * as a nice side bonus.
314 if (vpi
->SrcReg
[0].File
== PROGRAM_TEMPORARY
&&
315 vpi
->SrcReg
[1].File
== PROGRAM_TEMPORARY
&&
316 vpi
->SrcReg
[2].File
== PROGRAM_TEMPORARY
&&
317 vpi
->SrcReg
[0].Index
!= vpi
->SrcReg
[1].Index
&&
318 vpi
->SrcReg
[0].Index
!= vpi
->SrcReg
[2].Index
&&
319 vpi
->SrcReg
[1].Index
!= vpi
->SrcReg
[2].Index
) {
320 inst
[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD
,
323 t_dst_index(vp
, &vpi
->DstReg
),
324 t_dst_mask(vpi
->DstReg
.WriteMask
),
325 t_dst_class(vpi
->DstReg
.File
));
327 inst
[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD
,
330 t_dst_index(vp
, &vpi
->DstReg
),
331 t_dst_mask(vpi
->DstReg
.WriteMask
),
332 t_dst_class(vpi
->DstReg
.File
));
334 inst
[1] = t_src(vp
, &vpi
->SrcReg
[0]);
335 inst
[2] = t_src(vp
, &vpi
->SrcReg
[1]);
336 inst
[3] = t_src(vp
, &vpi
->SrcReg
[2]);
339 static void ei_pow(struct r300_vertex_program_code
*vp
,
340 struct prog_instruction
*vpi
,
343 inst
[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF
,
346 t_dst_index(vp
, &vpi
->DstReg
),
347 t_dst_mask(vpi
->DstReg
.WriteMask
),
348 t_dst_class(vpi
->DstReg
.File
));
349 inst
[1] = t_src_scalar(vp
, &vpi
->SrcReg
[0]);
350 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
351 inst
[3] = t_src_scalar(vp
, &vpi
->SrcReg
[1]);
355 static void translate_vertex_program(struct r300_vertex_program_compiler
* compiler
)
357 struct rc_instruction
*rci
;
359 compiler
->code
->pos_end
= 0; /* Not supported yet */
360 compiler
->code
->length
= 0;
362 compiler
->SetHwInputOutput(compiler
);
364 for(rci
= compiler
->Base
.Program
.Instructions
.Next
; rci
!= &compiler
->Base
.Program
.Instructions
; rci
= rci
->Next
) {
365 struct prog_instruction
*vpi
= &rci
->I
;
366 GLuint
*inst
= compiler
->code
->body
.d
+ compiler
->code
->length
;
368 /* Skip instructions writing to non-existing destination */
369 if (!valid_dst(compiler
->code
, &vpi
->DstReg
))
372 if (compiler
->code
->length
>= VSF_MAX_FRAGMENT_LENGTH
) {
373 rc_error(&compiler
->Base
, "Vertex program has too many instructions\n");
377 switch (vpi
->Opcode
) {
378 case OPCODE_ADD
: ei_vector2(compiler
->code
, VE_ADD
, vpi
, inst
); break;
379 case OPCODE_ARL
: ei_vector1(compiler
->code
, VE_FLT2FIX_DX
, vpi
, inst
); break;
380 case OPCODE_DP4
: ei_vector2(compiler
->code
, VE_DOT_PRODUCT
, vpi
, inst
); break;
381 case OPCODE_DST
: ei_vector2(compiler
->code
, VE_DISTANCE_VECTOR
, vpi
, inst
); break;
382 case OPCODE_EX2
: ei_math1(compiler
->code
, ME_EXP_BASE2_FULL_DX
, vpi
, inst
); break;
383 case OPCODE_EXP
: ei_math1(compiler
->code
, ME_EXP_BASE2_DX
, vpi
, inst
); break;
384 case OPCODE_FRC
: ei_vector1(compiler
->code
, VE_FRACTION
, vpi
, inst
); break;
385 case OPCODE_LG2
: ei_math1(compiler
->code
, ME_LOG_BASE2_FULL_DX
, vpi
, inst
); break;
386 case OPCODE_LIT
: ei_lit(compiler
->code
, vpi
, inst
); break;
387 case OPCODE_LOG
: ei_math1(compiler
->code
, ME_LOG_BASE2_DX
, vpi
, inst
); break;
388 case OPCODE_MAD
: ei_mad(compiler
->code
, vpi
, inst
); break;
389 case OPCODE_MAX
: ei_vector2(compiler
->code
, VE_MAXIMUM
, vpi
, inst
); break;
390 case OPCODE_MIN
: ei_vector2(compiler
->code
, VE_MINIMUM
, vpi
, inst
); break;
391 case OPCODE_MOV
: ei_vector1(compiler
->code
, VE_ADD
, vpi
, inst
); break;
392 case OPCODE_MUL
: ei_vector2(compiler
->code
, VE_MULTIPLY
, vpi
, inst
); break;
393 case OPCODE_POW
: ei_pow(compiler
->code
, vpi
, inst
); break;
394 case OPCODE_RCP
: ei_math1(compiler
->code
, ME_RECIP_DX
, vpi
, inst
); break;
395 case OPCODE_RSQ
: ei_math1(compiler
->code
, ME_RECIP_SQRT_DX
, vpi
, inst
); break;
396 case OPCODE_SGE
: ei_vector2(compiler
->code
, VE_SET_GREATER_THAN_EQUAL
, vpi
, inst
); break;
397 case OPCODE_SLT
: ei_vector2(compiler
->code
, VE_SET_LESS_THAN
, vpi
, inst
); break;
399 rc_error(&compiler
->Base
, "Unknown opcode %i\n", vpi
->Opcode
);
403 compiler
->code
->length
+= 4;
405 if (compiler
->Base
.Error
)
410 struct temporary_allocation
{
413 struct rc_instruction
* LastRead
;
416 static void allocate_temporary_registers(struct r300_vertex_program_compiler
* compiler
)
418 struct rc_instruction
*inst
;
419 GLuint num_orig_temps
= 0;
420 GLboolean hwtemps
[VSF_MAX_FRAGMENT_TEMPS
];
421 struct temporary_allocation
* ta
;
424 compiler
->code
->num_temporaries
= 0;
425 memset(hwtemps
, 0, sizeof(hwtemps
));
427 /* Pass 1: Count original temporaries and allocate structures */
428 for(inst
= compiler
->Base
.Program
.Instructions
.Next
; inst
!= &compiler
->Base
.Program
.Instructions
; inst
= inst
->Next
) {
429 GLuint numsrcs
= _mesa_num_inst_src_regs(inst
->I
.Opcode
);
430 GLuint numdsts
= _mesa_num_inst_dst_regs(inst
->I
.Opcode
);
432 for (i
= 0; i
< numsrcs
; ++i
) {
433 if (inst
->I
.SrcReg
[i
].File
== PROGRAM_TEMPORARY
) {
434 if (inst
->I
.SrcReg
[i
].Index
>= num_orig_temps
)
435 num_orig_temps
= inst
->I
.SrcReg
[i
].Index
+ 1;
440 if (inst
->I
.DstReg
.File
== PROGRAM_TEMPORARY
) {
441 if (inst
->I
.DstReg
.Index
>= num_orig_temps
)
442 num_orig_temps
= inst
->I
.DstReg
.Index
+ 1;
447 ta
= (struct temporary_allocation
*)memory_pool_malloc(&compiler
->Base
.Pool
,
448 sizeof(struct temporary_allocation
) * num_orig_temps
);
449 memset(ta
, 0, sizeof(struct temporary_allocation
) * num_orig_temps
);
451 /* Pass 2: Determine original temporary lifetimes */
452 for(inst
= compiler
->Base
.Program
.Instructions
.Next
; inst
!= &compiler
->Base
.Program
.Instructions
; inst
= inst
->Next
) {
453 GLuint numsrcs
= _mesa_num_inst_src_regs(inst
->I
.Opcode
);
455 for (i
= 0; i
< numsrcs
; ++i
) {
456 if (inst
->I
.SrcReg
[i
].File
== PROGRAM_TEMPORARY
)
457 ta
[inst
->I
.SrcReg
[i
].Index
].LastRead
= inst
;
461 /* Pass 3: Register allocation */
462 for(inst
= compiler
->Base
.Program
.Instructions
.Next
; inst
!= &compiler
->Base
.Program
.Instructions
; inst
= inst
->Next
) {
463 GLuint numsrcs
= _mesa_num_inst_src_regs(inst
->I
.Opcode
);
464 GLuint numdsts
= _mesa_num_inst_dst_regs(inst
->I
.Opcode
);
466 for (i
= 0; i
< numsrcs
; ++i
) {
467 if (inst
->I
.SrcReg
[i
].File
== PROGRAM_TEMPORARY
) {
468 GLuint orig
= inst
->I
.SrcReg
[i
].Index
;
469 inst
->I
.SrcReg
[i
].Index
= ta
[orig
].HwTemp
;
471 if (ta
[orig
].Allocated
&& inst
== ta
[orig
].LastRead
)
472 hwtemps
[ta
[orig
].HwTemp
] = GL_FALSE
;
477 if (inst
->I
.DstReg
.File
== PROGRAM_TEMPORARY
) {
478 GLuint orig
= inst
->I
.DstReg
.Index
;
480 if (!ta
[orig
].Allocated
) {
481 for(j
= 0; j
< VSF_MAX_FRAGMENT_TEMPS
; ++j
) {
485 if (j
>= VSF_MAX_FRAGMENT_TEMPS
) {
486 fprintf(stderr
, "Out of hw temporaries\n");
488 ta
[orig
].Allocated
= GL_TRUE
;
490 hwtemps
[j
] = GL_TRUE
;
492 if (j
>= compiler
->code
->num_temporaries
)
493 compiler
->code
->num_temporaries
= j
+ 1;
497 inst
->I
.DstReg
.Index
= ta
[orig
].HwTemp
;
505 * Vertex engine cannot read two inputs or two constants at the same time.
506 * Introduce intermediate MOVs to temporary registers to account for this.
508 static GLboolean
transform_source_conflicts(
509 struct radeon_compiler
*c
,
510 struct rc_instruction
* inst
,
513 GLuint num_operands
= _mesa_num_inst_src_regs(inst
->I
.Opcode
);
515 if (num_operands
== 3) {
516 if (t_src_conflict(inst
->I
.SrcReg
[1], inst
->I
.SrcReg
[2])
517 || t_src_conflict(inst
->I
.SrcReg
[0], inst
->I
.SrcReg
[2])) {
518 int tmpreg
= rc_find_free_temporary(c
);
519 struct rc_instruction
* inst_mov
= rc_insert_new_instruction(c
, inst
->Prev
);
520 inst_mov
->I
.Opcode
= OPCODE_MOV
;
521 inst_mov
->I
.DstReg
.File
= PROGRAM_TEMPORARY
;
522 inst_mov
->I
.DstReg
.Index
= tmpreg
;
523 inst_mov
->I
.SrcReg
[0] = inst
->I
.SrcReg
[2];
525 reset_srcreg(&inst
->I
.SrcReg
[2]);
526 inst
->I
.SrcReg
[2].File
= PROGRAM_TEMPORARY
;
527 inst
->I
.SrcReg
[2].Index
= tmpreg
;
531 if (num_operands
>= 2) {
532 if (t_src_conflict(inst
->I
.SrcReg
[1], inst
->I
.SrcReg
[0])) {
533 int tmpreg
= rc_find_free_temporary(c
);
534 struct rc_instruction
* inst_mov
= rc_insert_new_instruction(c
, inst
->Prev
);
535 inst_mov
->I
.Opcode
= OPCODE_MOV
;
536 inst_mov
->I
.DstReg
.File
= PROGRAM_TEMPORARY
;
537 inst_mov
->I
.DstReg
.Index
= tmpreg
;
538 inst_mov
->I
.SrcReg
[0] = inst
->I
.SrcReg
[1];
540 reset_srcreg(&inst
->I
.SrcReg
[1]);
541 inst
->I
.SrcReg
[1].File
= PROGRAM_TEMPORARY
;
542 inst
->I
.SrcReg
[1].Index
= tmpreg
;
549 static void addArtificialOutputs(struct r300_vertex_program_compiler
* compiler
)
553 for(i
= 0; i
< 32; ++i
) {
554 if ((compiler
->RequiredOutputs
& (1 << i
)) &&
555 !(compiler
->Base
.Program
.OutputsWritten
& (1 << i
))) {
556 struct rc_instruction
* inst
= rc_insert_new_instruction(&compiler
->Base
, compiler
->Base
.Program
.Instructions
.Prev
);
557 inst
->I
.Opcode
= OPCODE_MOV
;
559 inst
->I
.DstReg
.File
= PROGRAM_OUTPUT
;
560 inst
->I
.DstReg
.Index
= i
;
561 inst
->I
.DstReg
.WriteMask
= WRITEMASK_XYZW
;
563 inst
->I
.SrcReg
[0].File
= PROGRAM_CONSTANT
;
564 inst
->I
.SrcReg
[0].Index
= 0;
565 inst
->I
.SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
567 compiler
->Base
.Program
.OutputsWritten
|= 1 << i
;
572 static void nqssadceInit(struct nqssadce_state
* s
)
574 struct r300_vertex_program_compiler
* compiler
= s
->UserData
;
577 for(i
= 0; i
< VERT_RESULT_MAX
; ++i
) {
578 if (compiler
->RequiredOutputs
& (1 << i
))
579 s
->Outputs
[i
].Sourced
= WRITEMASK_XYZW
;
583 static GLboolean
swizzleIsNative(GLuint opcode
, struct prog_src_register reg
)
593 void r3xx_compile_vertex_program(struct r300_vertex_program_compiler
* compiler
)
595 addArtificialOutputs(compiler
);
598 struct radeon_program_transformation transformations
[] = {
599 { &r300_transform_vertex_alu
, 0 },
601 radeonLocalTransform(&compiler
->Base
, 1, transformations
);
604 if (compiler
->Base
.Debug
) {
605 fprintf(stderr
, "Vertex program after native rewrite:\n");
606 rc_print_program(&compiler
->Base
.Program
);
611 /* Note: This pass has to be done seperately from ALU rewrite,
612 * otherwise non-native ALU instructions with source conflits
613 * will not be treated properly.
615 struct radeon_program_transformation transformations
[] = {
616 { &transform_source_conflicts
, 0 },
618 radeonLocalTransform(&compiler
->Base
, 1, transformations
);
621 if (compiler
->Base
.Debug
) {
622 fprintf(stderr
, "Vertex program after source conflict resolve:\n");
623 rc_print_program(&compiler
->Base
.Program
);
628 struct radeon_nqssadce_descr nqssadce
= {
629 .Init
= &nqssadceInit
,
630 .IsNativeSwizzle
= &swizzleIsNative
,
633 radeonNqssaDce(&compiler
->Base
, &nqssadce
, compiler
);
635 /* We need this step for reusing temporary registers */
636 allocate_temporary_registers(compiler
);
638 if (compiler
->Base
.Debug
) {
639 fprintf(stderr
, "Vertex program after NQSSADCE:\n");
640 rc_print_program(&compiler
->Base
.Program
);
645 translate_vertex_program(compiler
);
647 rc_constants_copy(&compiler
->code
->constants
, &compiler
->Base
.Program
.Constants
);
649 compiler
->code
->InputsRead
= compiler
->Base
.Program
.InputsRead
;
650 compiler
->code
->OutputsWritten
= compiler
->Base
.Program
.OutputsWritten
;
652 if (compiler
->Base
.Debug
) {
653 fprintf(stderr
, "Final vertex program code:\n");
654 r300_vertex_program_dump(compiler
->code
);