2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23 #include "radeon_compiler.h"
25 #include "../r300_reg.h"
27 #include "radeon_nqssadce.h"
28 #include "radeon_program.h"
29 #include "radeon_program_alu.h"
31 #include "shader/prog_print.h"
35 * Take an already-setup and valid source then swizzle it appropriately to
36 * obtain a constant ZERO or ONE source.
38 #define __CONST(x, y) \
39 (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \
44 t_src_class(vpi->SrcReg[x].File), \
45 NEGATE_NONE) | (vpi->SrcReg[x].RelAddr << 4))
48 static unsigned long t_dst_mask(GLuint mask
)
50 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
51 return mask
& WRITEMASK_XYZW
;
54 static unsigned long t_dst_class(gl_register_file file
)
58 case PROGRAM_TEMPORARY
:
59 return PVS_DST_REG_TEMPORARY
;
61 return PVS_DST_REG_OUT
;
63 return PVS_DST_REG_A0
;
66 case PROGRAM_LOCAL_PARAM:
67 case PROGRAM_ENV_PARAM:
68 case PROGRAM_NAMED_PARAM:
69 case PROGRAM_STATE_VAR:
70 case PROGRAM_WRITE_ONLY:
74 fprintf(stderr
, "problem in %s", __FUNCTION__
);
80 static unsigned long t_dst_index(struct r300_vertex_program_code
*vp
,
81 struct prog_dst_register
*dst
)
83 if (dst
->File
== PROGRAM_OUTPUT
)
84 return vp
->outputs
[dst
->Index
];
89 static unsigned long t_src_class(gl_register_file file
)
92 case PROGRAM_TEMPORARY
:
93 return PVS_SRC_REG_TEMPORARY
;
95 return PVS_SRC_REG_INPUT
;
96 case PROGRAM_LOCAL_PARAM
:
97 case PROGRAM_ENV_PARAM
:
98 case PROGRAM_NAMED_PARAM
:
99 case PROGRAM_CONSTANT
:
100 case PROGRAM_STATE_VAR
:
101 return PVS_SRC_REG_CONSTANT
;
104 case PROGRAM_WRITE_ONLY:
105 case PROGRAM_ADDRESS:
108 fprintf(stderr
, "problem in %s", __FUNCTION__
);
114 static GLboolean
t_src_conflict(struct prog_src_register a
, struct prog_src_register b
)
116 unsigned long aclass
= t_src_class(a
.File
);
117 unsigned long bclass
= t_src_class(b
.File
);
119 if (aclass
!= bclass
)
121 if (aclass
== PVS_SRC_REG_TEMPORARY
)
124 if (a
.RelAddr
|| b
.RelAddr
)
126 if (a
.Index
!= b
.Index
)
132 static INLINE
unsigned long t_swizzle(GLubyte swizzle
)
134 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
138 static unsigned long t_src_index(struct r300_vertex_program_code
*vp
,
139 struct prog_src_register
*src
)
141 if (src
->File
== PROGRAM_INPUT
) {
142 assert(vp
->inputs
[src
->Index
] != -1);
143 return vp
->inputs
[src
->Index
];
145 if (src
->Index
< 0) {
147 "negative offsets for indirect addressing do not work.\n");
154 /* these two functions should probably be merged... */
156 static unsigned long t_src(struct r300_vertex_program_code
*vp
,
157 struct prog_src_register
*src
)
159 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
160 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
162 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
163 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
164 t_swizzle(GET_SWZ(src
->Swizzle
, 1)),
165 t_swizzle(GET_SWZ(src
->Swizzle
, 2)),
166 t_swizzle(GET_SWZ(src
->Swizzle
, 3)),
167 t_src_class(src
->File
),
168 src
->Negate
) | (src
->RelAddr
<< 4);
171 static unsigned long t_src_scalar(struct r300_vertex_program_code
*vp
,
172 struct prog_src_register
*src
)
174 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
175 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
177 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
178 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
179 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
180 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
181 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
182 t_src_class(src
->File
),
183 src
->Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
187 static GLboolean
valid_dst(struct r300_vertex_program_code
*vp
,
188 struct prog_dst_register
*dst
)
190 if (dst
->File
== PROGRAM_OUTPUT
&& vp
->outputs
[dst
->Index
] == -1) {
192 } else if (dst
->File
== PROGRAM_ADDRESS
) {
193 assert(dst
->Index
== 0);
199 static void ei_vector1(struct r300_vertex_program_code
*vp
,
201 struct prog_instruction
*vpi
,
204 inst
[0] = PVS_OP_DST_OPERAND(hw_opcode
,
207 t_dst_index(vp
, &vpi
->DstReg
),
208 t_dst_mask(vpi
->DstReg
.WriteMask
),
209 t_dst_class(vpi
->DstReg
.File
));
210 inst
[1] = t_src(vp
, &vpi
->SrcReg
[0]);
211 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
212 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
215 static void ei_vector2(struct r300_vertex_program_code
*vp
,
217 struct prog_instruction
*vpi
,
220 inst
[0] = PVS_OP_DST_OPERAND(hw_opcode
,
223 t_dst_index(vp
, &vpi
->DstReg
),
224 t_dst_mask(vpi
->DstReg
.WriteMask
),
225 t_dst_class(vpi
->DstReg
.File
));
226 inst
[1] = t_src(vp
, &vpi
->SrcReg
[0]);
227 inst
[2] = t_src(vp
, &vpi
->SrcReg
[1]);
228 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
231 static void ei_math1(struct r300_vertex_program_code
*vp
,
233 struct prog_instruction
*vpi
,
236 inst
[0] = PVS_OP_DST_OPERAND(hw_opcode
,
239 t_dst_index(vp
, &vpi
->DstReg
),
240 t_dst_mask(vpi
->DstReg
.WriteMask
),
241 t_dst_class(vpi
->DstReg
.File
));
242 inst
[1] = t_src_scalar(vp
, &vpi
->SrcReg
[0]);
243 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
244 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
247 static void ei_lit(struct r300_vertex_program_code
*vp
,
248 struct prog_instruction
*vpi
,
251 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
253 inst
[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX
,
256 t_dst_index(vp
, &vpi
->DstReg
),
257 t_dst_mask(vpi
->DstReg
.WriteMask
),
258 t_dst_class(vpi
->DstReg
.File
));
259 /* NOTE: Users swizzling might not work. */
260 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &vpi
->SrcReg
[0]), t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 0)), // X
261 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 3)), // W
262 PVS_SRC_SELECT_FORCE_0
, // Z
263 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 1)), // Y
264 t_src_class(vpi
->SrcReg
[0].File
),
265 vpi
->SrcReg
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
266 (vpi
->SrcReg
[0].RelAddr
<< 4);
267 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &vpi
->SrcReg
[0]), t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 1)), // Y
268 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 3)), // W
269 PVS_SRC_SELECT_FORCE_0
, // Z
270 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 0)), // X
271 t_src_class(vpi
->SrcReg
[0].File
),
272 vpi
->SrcReg
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
273 (vpi
->SrcReg
[0].RelAddr
<< 4);
274 inst
[3] = PVS_SRC_OPERAND(t_src_index(vp
, &vpi
->SrcReg
[0]), t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 1)), // Y
275 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 0)), // X
276 PVS_SRC_SELECT_FORCE_0
, // Z
277 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 3)), // W
278 t_src_class(vpi
->SrcReg
[0].File
),
279 vpi
->SrcReg
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
280 (vpi
->SrcReg
[0].RelAddr
<< 4);
283 static void ei_mad(struct r300_vertex_program_code
*vp
,
284 struct prog_instruction
*vpi
,
287 /* Remarks about hardware limitations of MAD
288 * (please preserve this comment, as this information is _NOT_
289 * in the documentation provided by AMD).
291 * As described in the documentation, MAD with three unique temporary
292 * source registers requires the use of the macro version.
294 * However (and this is not mentioned in the documentation), apparently
295 * the macro version is _NOT_ a full superset of the normal version.
296 * In particular, the macro version does not always work when relative
297 * addressing is used in the source operands.
299 * This limitation caused incorrect rendering in Sauerbraten's OpenGL
300 * assembly shader path when using medium quality animations
301 * (i.e. animations with matrix blending instead of quaternion blending).
303 * Unfortunately, I (nha) have been unable to extract a Piglit regression
304 * test for this issue - for some reason, it is possible to have vertex
305 * programs whose prefix is *exactly* the same as the prefix of the
306 * offending program in Sauerbraten up to the offending instruction
307 * without causing any trouble.
309 * Bottom line: Only use the macro version only when really necessary;
310 * according to AMD docs, this should improve performance by one clock
311 * as a nice side bonus.
313 if (vpi
->SrcReg
[0].File
== PROGRAM_TEMPORARY
&&
314 vpi
->SrcReg
[1].File
== PROGRAM_TEMPORARY
&&
315 vpi
->SrcReg
[2].File
== PROGRAM_TEMPORARY
&&
316 vpi
->SrcReg
[0].Index
!= vpi
->SrcReg
[1].Index
&&
317 vpi
->SrcReg
[0].Index
!= vpi
->SrcReg
[2].Index
&&
318 vpi
->SrcReg
[1].Index
!= vpi
->SrcReg
[2].Index
) {
319 inst
[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD
,
322 t_dst_index(vp
, &vpi
->DstReg
),
323 t_dst_mask(vpi
->DstReg
.WriteMask
),
324 t_dst_class(vpi
->DstReg
.File
));
326 inst
[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD
,
329 t_dst_index(vp
, &vpi
->DstReg
),
330 t_dst_mask(vpi
->DstReg
.WriteMask
),
331 t_dst_class(vpi
->DstReg
.File
));
333 inst
[1] = t_src(vp
, &vpi
->SrcReg
[0]);
334 inst
[2] = t_src(vp
, &vpi
->SrcReg
[1]);
335 inst
[3] = t_src(vp
, &vpi
->SrcReg
[2]);
338 static void ei_pow(struct r300_vertex_program_code
*vp
,
339 struct prog_instruction
*vpi
,
342 inst
[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF
,
345 t_dst_index(vp
, &vpi
->DstReg
),
346 t_dst_mask(vpi
->DstReg
.WriteMask
),
347 t_dst_class(vpi
->DstReg
.File
));
348 inst
[1] = t_src_scalar(vp
, &vpi
->SrcReg
[0]);
349 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
350 inst
[3] = t_src_scalar(vp
, &vpi
->SrcReg
[1]);
354 static void translate_vertex_program(struct r300_vertex_program_compiler
* compiler
)
356 struct rc_instruction
*rci
;
358 compiler
->code
->pos_end
= 0; /* Not supported yet */
359 compiler
->code
->length
= 0;
361 compiler
->SetHwInputOutput(compiler
);
363 for(rci
= compiler
->Base
.Program
.Instructions
.Next
; rci
!= &compiler
->Base
.Program
.Instructions
; rci
= rci
->Next
) {
364 struct prog_instruction
*vpi
= &rci
->I
;
365 GLuint
*inst
= compiler
->code
->body
.d
+ compiler
->code
->length
;
367 /* Skip instructions writing to non-existing destination */
368 if (!valid_dst(compiler
->code
, &vpi
->DstReg
))
371 if (compiler
->code
->length
>= VSF_MAX_FRAGMENT_LENGTH
) {
372 rc_error(&compiler
->Base
, "Vertex program has too many instructions\n");
376 switch (vpi
->Opcode
) {
377 case OPCODE_ADD
: ei_vector2(compiler
->code
, VE_ADD
, vpi
, inst
); break;
378 case OPCODE_ARL
: ei_vector1(compiler
->code
, VE_FLT2FIX_DX
, vpi
, inst
); break;
379 case OPCODE_DP4
: ei_vector2(compiler
->code
, VE_DOT_PRODUCT
, vpi
, inst
); break;
380 case OPCODE_DST
: ei_vector2(compiler
->code
, VE_DISTANCE_VECTOR
, vpi
, inst
); break;
381 case OPCODE_EX2
: ei_math1(compiler
->code
, ME_EXP_BASE2_FULL_DX
, vpi
, inst
); break;
382 case OPCODE_EXP
: ei_math1(compiler
->code
, ME_EXP_BASE2_DX
, vpi
, inst
); break;
383 case OPCODE_FRC
: ei_vector1(compiler
->code
, VE_FRACTION
, vpi
, inst
); break;
384 case OPCODE_LG2
: ei_math1(compiler
->code
, ME_LOG_BASE2_FULL_DX
, vpi
, inst
); break;
385 case OPCODE_LIT
: ei_lit(compiler
->code
, vpi
, inst
); break;
386 case OPCODE_LOG
: ei_math1(compiler
->code
, ME_LOG_BASE2_DX
, vpi
, inst
); break;
387 case OPCODE_MAD
: ei_mad(compiler
->code
, vpi
, inst
); break;
388 case OPCODE_MAX
: ei_vector2(compiler
->code
, VE_MAXIMUM
, vpi
, inst
); break;
389 case OPCODE_MIN
: ei_vector2(compiler
->code
, VE_MINIMUM
, vpi
, inst
); break;
390 case OPCODE_MOV
: ei_vector1(compiler
->code
, VE_ADD
, vpi
, inst
); break;
391 case OPCODE_MUL
: ei_vector2(compiler
->code
, VE_MULTIPLY
, vpi
, inst
); break;
392 case OPCODE_POW
: ei_pow(compiler
->code
, vpi
, inst
); break;
393 case OPCODE_RCP
: ei_math1(compiler
->code
, ME_RECIP_DX
, vpi
, inst
); break;
394 case OPCODE_RSQ
: ei_math1(compiler
->code
, ME_RECIP_SQRT_DX
, vpi
, inst
); break;
395 case OPCODE_SGE
: ei_vector2(compiler
->code
, VE_SET_GREATER_THAN_EQUAL
, vpi
, inst
); break;
396 case OPCODE_SLT
: ei_vector2(compiler
->code
, VE_SET_LESS_THAN
, vpi
, inst
); break;
398 rc_error(&compiler
->Base
, "Unknown opcode %i\n", vpi
->Opcode
);
402 compiler
->code
->length
+= 4;
404 if (compiler
->Base
.Error
)
409 struct temporary_allocation
{
412 struct rc_instruction
* LastRead
;
415 static void allocate_temporary_registers(struct r300_vertex_program_compiler
* compiler
)
417 struct rc_instruction
*inst
;
418 GLuint num_orig_temps
= 0;
419 GLboolean hwtemps
[VSF_MAX_FRAGMENT_TEMPS
];
420 struct temporary_allocation
* ta
;
423 compiler
->code
->num_temporaries
= 0;
424 memset(hwtemps
, 0, sizeof(hwtemps
));
426 /* Pass 1: Count original temporaries and allocate structures */
427 for(inst
= compiler
->Base
.Program
.Instructions
.Next
; inst
!= &compiler
->Base
.Program
.Instructions
; inst
= inst
->Next
) {
428 GLuint numsrcs
= _mesa_num_inst_src_regs(inst
->I
.Opcode
);
429 GLuint numdsts
= _mesa_num_inst_dst_regs(inst
->I
.Opcode
);
431 for (i
= 0; i
< numsrcs
; ++i
) {
432 if (inst
->I
.SrcReg
[i
].File
== PROGRAM_TEMPORARY
) {
433 if (inst
->I
.SrcReg
[i
].Index
>= num_orig_temps
)
434 num_orig_temps
= inst
->I
.SrcReg
[i
].Index
+ 1;
439 if (inst
->I
.DstReg
.File
== PROGRAM_TEMPORARY
) {
440 if (inst
->I
.DstReg
.Index
>= num_orig_temps
)
441 num_orig_temps
= inst
->I
.DstReg
.Index
+ 1;
446 ta
= (struct temporary_allocation
*)memory_pool_malloc(&compiler
->Base
.Pool
,
447 sizeof(struct temporary_allocation
) * num_orig_temps
);
448 memset(ta
, 0, sizeof(struct temporary_allocation
) * num_orig_temps
);
450 /* Pass 2: Determine original temporary lifetimes */
451 for(inst
= compiler
->Base
.Program
.Instructions
.Next
; inst
!= &compiler
->Base
.Program
.Instructions
; inst
= inst
->Next
) {
452 GLuint numsrcs
= _mesa_num_inst_src_regs(inst
->I
.Opcode
);
454 for (i
= 0; i
< numsrcs
; ++i
) {
455 if (inst
->I
.SrcReg
[i
].File
== PROGRAM_TEMPORARY
)
456 ta
[inst
->I
.SrcReg
[i
].Index
].LastRead
= inst
;
460 /* Pass 3: Register allocation */
461 for(inst
= compiler
->Base
.Program
.Instructions
.Next
; inst
!= &compiler
->Base
.Program
.Instructions
; inst
= inst
->Next
) {
462 GLuint numsrcs
= _mesa_num_inst_src_regs(inst
->I
.Opcode
);
463 GLuint numdsts
= _mesa_num_inst_dst_regs(inst
->I
.Opcode
);
465 for (i
= 0; i
< numsrcs
; ++i
) {
466 if (inst
->I
.SrcReg
[i
].File
== PROGRAM_TEMPORARY
) {
467 GLuint orig
= inst
->I
.SrcReg
[i
].Index
;
468 inst
->I
.SrcReg
[i
].Index
= ta
[orig
].HwTemp
;
470 if (ta
[orig
].Allocated
&& inst
== ta
[orig
].LastRead
)
471 hwtemps
[ta
[orig
].HwTemp
] = GL_FALSE
;
476 if (inst
->I
.DstReg
.File
== PROGRAM_TEMPORARY
) {
477 GLuint orig
= inst
->I
.DstReg
.Index
;
479 if (!ta
[orig
].Allocated
) {
480 for(j
= 0; j
< VSF_MAX_FRAGMENT_TEMPS
; ++j
) {
484 if (j
>= VSF_MAX_FRAGMENT_TEMPS
) {
485 fprintf(stderr
, "Out of hw temporaries\n");
487 ta
[orig
].Allocated
= GL_TRUE
;
489 hwtemps
[j
] = GL_TRUE
;
491 if (j
>= compiler
->code
->num_temporaries
)
492 compiler
->code
->num_temporaries
= j
+ 1;
496 inst
->I
.DstReg
.Index
= ta
[orig
].HwTemp
;
504 * Vertex engine cannot read two inputs or two constants at the same time.
505 * Introduce intermediate MOVs to temporary registers to account for this.
507 static GLboolean
transform_source_conflicts(
508 struct radeon_compiler
*c
,
509 struct rc_instruction
* inst
,
512 GLuint num_operands
= _mesa_num_inst_src_regs(inst
->I
.Opcode
);
514 if (num_operands
== 3) {
515 if (t_src_conflict(inst
->I
.SrcReg
[1], inst
->I
.SrcReg
[2])
516 || t_src_conflict(inst
->I
.SrcReg
[0], inst
->I
.SrcReg
[2])) {
517 int tmpreg
= rc_find_free_temporary(c
);
518 struct rc_instruction
* inst_mov
= rc_insert_new_instruction(c
, inst
->Prev
);
519 inst_mov
->I
.Opcode
= OPCODE_MOV
;
520 inst_mov
->I
.DstReg
.File
= PROGRAM_TEMPORARY
;
521 inst_mov
->I
.DstReg
.Index
= tmpreg
;
522 inst_mov
->I
.SrcReg
[0] = inst
->I
.SrcReg
[2];
524 reset_srcreg(&inst
->I
.SrcReg
[2]);
525 inst
->I
.SrcReg
[2].File
= PROGRAM_TEMPORARY
;
526 inst
->I
.SrcReg
[2].Index
= tmpreg
;
530 if (num_operands
>= 2) {
531 if (t_src_conflict(inst
->I
.SrcReg
[1], inst
->I
.SrcReg
[0])) {
532 int tmpreg
= rc_find_free_temporary(c
);
533 struct rc_instruction
* inst_mov
= rc_insert_new_instruction(c
, inst
->Prev
);
534 inst_mov
->I
.Opcode
= OPCODE_MOV
;
535 inst_mov
->I
.DstReg
.File
= PROGRAM_TEMPORARY
;
536 inst_mov
->I
.DstReg
.Index
= tmpreg
;
537 inst_mov
->I
.SrcReg
[0] = inst
->I
.SrcReg
[1];
539 reset_srcreg(&inst
->I
.SrcReg
[1]);
540 inst
->I
.SrcReg
[1].File
= PROGRAM_TEMPORARY
;
541 inst
->I
.SrcReg
[1].Index
= tmpreg
;
548 static void addArtificialOutputs(struct r300_vertex_program_compiler
* compiler
)
552 for(i
= 0; i
< 32; ++i
) {
553 if ((compiler
->RequiredOutputs
& (1 << i
)) &&
554 !(compiler
->Base
.Program
.OutputsWritten
& (1 << i
))) {
555 struct rc_instruction
* inst
= rc_insert_new_instruction(&compiler
->Base
, compiler
->Base
.Program
.Instructions
.Prev
);
556 inst
->I
.Opcode
= OPCODE_MOV
;
558 inst
->I
.DstReg
.File
= PROGRAM_OUTPUT
;
559 inst
->I
.DstReg
.Index
= i
;
560 inst
->I
.DstReg
.WriteMask
= WRITEMASK_XYZW
;
562 inst
->I
.SrcReg
[0].File
= PROGRAM_CONSTANT
;
563 inst
->I
.SrcReg
[0].Index
= 0;
564 inst
->I
.SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
566 compiler
->Base
.Program
.OutputsWritten
|= 1 << i
;
571 static void nqssadceInit(struct nqssadce_state
* s
)
573 struct r300_vertex_program_compiler
* compiler
= s
->UserData
;
576 for(i
= 0; i
< VERT_RESULT_MAX
; ++i
) {
577 if (compiler
->RequiredOutputs
& (1 << i
))
578 s
->Outputs
[i
].Sourced
= WRITEMASK_XYZW
;
582 static GLboolean
swizzleIsNative(GLuint opcode
, struct prog_src_register reg
)
592 void r3xx_compile_vertex_program(struct r300_vertex_program_compiler
* compiler
)
594 addArtificialOutputs(compiler
);
597 struct radeon_program_transformation transformations
[] = {
598 { &r300_transform_vertex_alu
, 0 },
600 radeonLocalTransform(&compiler
->Base
, 1, transformations
);
603 if (compiler
->Base
.Debug
) {
604 fprintf(stderr
, "Vertex program after native rewrite:\n");
605 rc_print_program(&compiler
->Base
.Program
);
610 /* Note: This pass has to be done seperately from ALU rewrite,
611 * otherwise non-native ALU instructions with source conflits
612 * will not be treated properly.
614 struct radeon_program_transformation transformations
[] = {
615 { &transform_source_conflicts
, 0 },
617 radeonLocalTransform(&compiler
->Base
, 1, transformations
);
620 if (compiler
->Base
.Debug
) {
621 fprintf(stderr
, "Vertex program after source conflict resolve:\n");
622 rc_print_program(&compiler
->Base
.Program
);
627 struct radeon_nqssadce_descr nqssadce
= {
628 .Init
= &nqssadceInit
,
629 .IsNativeSwizzle
= &swizzleIsNative
,
632 radeonNqssaDce(&compiler
->Base
, &nqssadce
, compiler
);
634 /* We need this step for reusing temporary registers */
635 allocate_temporary_registers(compiler
);
637 if (compiler
->Base
.Debug
) {
638 fprintf(stderr
, "Vertex program after NQSSADCE:\n");
639 rc_print_program(&compiler
->Base
.Program
);
644 translate_vertex_program(compiler
);
646 rc_constants_copy(&compiler
->code
->constants
, &compiler
->Base
.Program
.Constants
);
648 compiler
->code
->InputsRead
= compiler
->Base
.Program
.InputsRead
;
649 compiler
->code
->OutputsWritten
= compiler
->Base
.Program
.OutputsWritten
;
651 if (compiler
->Base
.Debug
) {
652 printf("Final vertex program code:\n");
653 r300_vertex_program_dump(compiler
->code
);