2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23 #include "radeon_compiler.h"
27 #include "../r300_reg.h"
29 #include "radeon_dataflow.h"
30 #include "radeon_program_alu.h"
31 #include "radeon_swizzle.h"
32 #include "radeon_emulate_branches.h"
35 * Take an already-setup and valid source then swizzle it appropriately to
36 * obtain a constant ZERO or ONE source.
38 #define __CONST(x, y) \
39 (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \
44 t_src_class(vpi->SrcReg[x].File), \
45 RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4))
48 static unsigned long t_dst_mask(unsigned int mask
)
50 /* RC_MASK_* is equivalent to VSF_FLAG_* */
51 return mask
& RC_MASK_XYZW
;
54 static unsigned long t_dst_class(rc_register_file file
)
58 fprintf(stderr
, "%s: Bad register file %i\n", __FUNCTION__
, file
);
60 case RC_FILE_TEMPORARY
:
61 return PVS_DST_REG_TEMPORARY
;
63 return PVS_DST_REG_OUT
;
65 return PVS_DST_REG_A0
;
69 static unsigned long t_dst_index(struct r300_vertex_program_code
*vp
,
70 struct rc_dst_register
*dst
)
72 if (dst
->File
== RC_FILE_OUTPUT
)
73 return vp
->outputs
[dst
->Index
];
78 static unsigned long t_src_class(rc_register_file file
)
82 fprintf(stderr
, "%s: Bad register file %i\n", __FUNCTION__
, file
);
85 case RC_FILE_TEMPORARY
:
86 return PVS_SRC_REG_TEMPORARY
;
88 return PVS_SRC_REG_INPUT
;
89 case RC_FILE_CONSTANT
:
90 return PVS_SRC_REG_CONSTANT
;
94 static int t_src_conflict(struct rc_src_register a
, struct rc_src_register b
)
96 unsigned long aclass
= t_src_class(a
.File
);
97 unsigned long bclass
= t_src_class(b
.File
);
101 if (aclass
== PVS_SRC_REG_TEMPORARY
)
104 if (a
.RelAddr
|| b
.RelAddr
)
106 if (a
.Index
!= b
.Index
)
112 static inline unsigned long t_swizzle(unsigned int swizzle
)
114 /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
118 static unsigned long t_src_index(struct r300_vertex_program_code
*vp
,
119 struct rc_src_register
*src
)
121 if (src
->File
== RC_FILE_INPUT
) {
122 assert(vp
->inputs
[src
->Index
] != -1);
123 return vp
->inputs
[src
->Index
];
125 if (src
->Index
< 0) {
127 "negative offsets for indirect addressing do not work.\n");
134 /* these two functions should probably be merged... */
136 static unsigned long t_src(struct r300_vertex_program_code
*vp
,
137 struct rc_src_register
*src
)
139 /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
140 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
142 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
143 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
144 t_swizzle(GET_SWZ(src
->Swizzle
, 1)),
145 t_swizzle(GET_SWZ(src
->Swizzle
, 2)),
146 t_swizzle(GET_SWZ(src
->Swizzle
, 3)),
147 t_src_class(src
->File
),
148 src
->Negate
) | (src
->RelAddr
<< 4);
151 static unsigned long t_src_scalar(struct r300_vertex_program_code
*vp
,
152 struct rc_src_register
*src
)
154 /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
155 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
157 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
158 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
159 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
160 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
161 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
162 t_src_class(src
->File
),
163 src
->Negate
? RC_MASK_XYZW
: RC_MASK_NONE
) |
167 static int valid_dst(struct r300_vertex_program_code
*vp
,
168 struct rc_dst_register
*dst
)
170 if (dst
->File
== RC_FILE_OUTPUT
&& vp
->outputs
[dst
->Index
] == -1) {
172 } else if (dst
->File
== RC_FILE_ADDRESS
) {
173 assert(dst
->Index
== 0);
179 static void ei_vector1(struct r300_vertex_program_code
*vp
,
180 unsigned int hw_opcode
,
181 struct rc_sub_instruction
*vpi
,
184 inst
[0] = PVS_OP_DST_OPERAND(hw_opcode
,
187 t_dst_index(vp
, &vpi
->DstReg
),
188 t_dst_mask(vpi
->DstReg
.WriteMask
),
189 t_dst_class(vpi
->DstReg
.File
));
190 inst
[1] = t_src(vp
, &vpi
->SrcReg
[0]);
191 inst
[2] = __CONST(0, RC_SWIZZLE_ZERO
);
192 inst
[3] = __CONST(0, RC_SWIZZLE_ZERO
);
195 static void ei_vector2(struct r300_vertex_program_code
*vp
,
196 unsigned int hw_opcode
,
197 struct rc_sub_instruction
*vpi
,
200 inst
[0] = PVS_OP_DST_OPERAND(hw_opcode
,
203 t_dst_index(vp
, &vpi
->DstReg
),
204 t_dst_mask(vpi
->DstReg
.WriteMask
),
205 t_dst_class(vpi
->DstReg
.File
));
206 inst
[1] = t_src(vp
, &vpi
->SrcReg
[0]);
207 inst
[2] = t_src(vp
, &vpi
->SrcReg
[1]);
208 inst
[3] = __CONST(1, RC_SWIZZLE_ZERO
);
211 static void ei_math1(struct r300_vertex_program_code
*vp
,
212 unsigned int hw_opcode
,
213 struct rc_sub_instruction
*vpi
,
216 inst
[0] = PVS_OP_DST_OPERAND(hw_opcode
,
219 t_dst_index(vp
, &vpi
->DstReg
),
220 t_dst_mask(vpi
->DstReg
.WriteMask
),
221 t_dst_class(vpi
->DstReg
.File
));
222 inst
[1] = t_src_scalar(vp
, &vpi
->SrcReg
[0]);
223 inst
[2] = __CONST(0, RC_SWIZZLE_ZERO
);
224 inst
[3] = __CONST(0, RC_SWIZZLE_ZERO
);
227 static void ei_lit(struct r300_vertex_program_code
*vp
,
228 struct rc_sub_instruction
*vpi
,
231 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
233 inst
[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX
,
236 t_dst_index(vp
, &vpi
->DstReg
),
237 t_dst_mask(vpi
->DstReg
.WriteMask
),
238 t_dst_class(vpi
->DstReg
.File
));
239 /* NOTE: Users swizzling might not work. */
240 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &vpi
->SrcReg
[0]), t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 0)), // X
241 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 3)), // W
242 PVS_SRC_SELECT_FORCE_0
, // Z
243 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 1)), // Y
244 t_src_class(vpi
->SrcReg
[0].File
),
245 vpi
->SrcReg
[0].Negate
? RC_MASK_XYZW
: RC_MASK_NONE
) |
246 (vpi
->SrcReg
[0].RelAddr
<< 4);
247 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &vpi
->SrcReg
[0]), t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 1)), // Y
248 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 3)), // W
249 PVS_SRC_SELECT_FORCE_0
, // Z
250 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 0)), // X
251 t_src_class(vpi
->SrcReg
[0].File
),
252 vpi
->SrcReg
[0].Negate
? RC_MASK_XYZW
: RC_MASK_NONE
) |
253 (vpi
->SrcReg
[0].RelAddr
<< 4);
254 inst
[3] = PVS_SRC_OPERAND(t_src_index(vp
, &vpi
->SrcReg
[0]), t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 1)), // Y
255 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 0)), // X
256 PVS_SRC_SELECT_FORCE_0
, // Z
257 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 3)), // W
258 t_src_class(vpi
->SrcReg
[0].File
),
259 vpi
->SrcReg
[0].Negate
? RC_MASK_XYZW
: RC_MASK_NONE
) |
260 (vpi
->SrcReg
[0].RelAddr
<< 4);
263 static void ei_mad(struct r300_vertex_program_code
*vp
,
264 struct rc_sub_instruction
*vpi
,
267 /* Remarks about hardware limitations of MAD
268 * (please preserve this comment, as this information is _NOT_
269 * in the documentation provided by AMD).
271 * As described in the documentation, MAD with three unique temporary
272 * source registers requires the use of the macro version.
274 * However (and this is not mentioned in the documentation), apparently
275 * the macro version is _NOT_ a full superset of the normal version.
276 * In particular, the macro version does not always work when relative
277 * addressing is used in the source operands.
279 * This limitation caused incorrect rendering in Sauerbraten's OpenGL
280 * assembly shader path when using medium quality animations
281 * (i.e. animations with matrix blending instead of quaternion blending).
283 * Unfortunately, I (nha) have been unable to extract a Piglit regression
284 * test for this issue - for some reason, it is possible to have vertex
285 * programs whose prefix is *exactly* the same as the prefix of the
286 * offending program in Sauerbraten up to the offending instruction
287 * without causing any trouble.
289 * Bottom line: Only use the macro version only when really necessary;
290 * according to AMD docs, this should improve performance by one clock
291 * as a nice side bonus.
293 if (vpi
->SrcReg
[0].File
== RC_FILE_TEMPORARY
&&
294 vpi
->SrcReg
[1].File
== RC_FILE_TEMPORARY
&&
295 vpi
->SrcReg
[2].File
== RC_FILE_TEMPORARY
&&
296 vpi
->SrcReg
[0].Index
!= vpi
->SrcReg
[1].Index
&&
297 vpi
->SrcReg
[0].Index
!= vpi
->SrcReg
[2].Index
&&
298 vpi
->SrcReg
[1].Index
!= vpi
->SrcReg
[2].Index
) {
299 inst
[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD
,
302 t_dst_index(vp
, &vpi
->DstReg
),
303 t_dst_mask(vpi
->DstReg
.WriteMask
),
304 t_dst_class(vpi
->DstReg
.File
));
306 inst
[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD
,
309 t_dst_index(vp
, &vpi
->DstReg
),
310 t_dst_mask(vpi
->DstReg
.WriteMask
),
311 t_dst_class(vpi
->DstReg
.File
));
313 inst
[1] = t_src(vp
, &vpi
->SrcReg
[0]);
314 inst
[2] = t_src(vp
, &vpi
->SrcReg
[1]);
315 inst
[3] = t_src(vp
, &vpi
->SrcReg
[2]);
318 static void ei_pow(struct r300_vertex_program_code
*vp
,
319 struct rc_sub_instruction
*vpi
,
322 inst
[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF
,
325 t_dst_index(vp
, &vpi
->DstReg
),
326 t_dst_mask(vpi
->DstReg
.WriteMask
),
327 t_dst_class(vpi
->DstReg
.File
));
328 inst
[1] = t_src_scalar(vp
, &vpi
->SrcReg
[0]);
329 inst
[2] = __CONST(0, RC_SWIZZLE_ZERO
);
330 inst
[3] = t_src_scalar(vp
, &vpi
->SrcReg
[1]);
334 static void translate_vertex_program(struct r300_vertex_program_compiler
* compiler
)
336 struct rc_instruction
*rci
;
338 compiler
->code
->pos_end
= 0; /* Not supported yet */
339 compiler
->code
->length
= 0;
341 compiler
->SetHwInputOutput(compiler
);
343 for(rci
= compiler
->Base
.Program
.Instructions
.Next
; rci
!= &compiler
->Base
.Program
.Instructions
; rci
= rci
->Next
) {
344 struct rc_sub_instruction
*vpi
= &rci
->U
.I
;
345 unsigned int *inst
= compiler
->code
->body
.d
+ compiler
->code
->length
;
347 /* Skip instructions writing to non-existing destination */
348 if (!valid_dst(compiler
->code
, &vpi
->DstReg
))
351 if (compiler
->code
->length
>= VSF_MAX_FRAGMENT_LENGTH
) {
352 rc_error(&compiler
->Base
, "Vertex program has too many instructions\n");
356 switch (vpi
->Opcode
) {
357 case RC_OPCODE_ADD
: ei_vector2(compiler
->code
, VE_ADD
, vpi
, inst
); break;
358 case RC_OPCODE_ARL
: ei_vector1(compiler
->code
, VE_FLT2FIX_DX
, vpi
, inst
); break;
359 case RC_OPCODE_DP4
: ei_vector2(compiler
->code
, VE_DOT_PRODUCT
, vpi
, inst
); break;
360 case RC_OPCODE_DST
: ei_vector2(compiler
->code
, VE_DISTANCE_VECTOR
, vpi
, inst
); break;
361 case RC_OPCODE_EX2
: ei_math1(compiler
->code
, ME_EXP_BASE2_FULL_DX
, vpi
, inst
); break;
362 case RC_OPCODE_EXP
: ei_math1(compiler
->code
, ME_EXP_BASE2_DX
, vpi
, inst
); break;
363 case RC_OPCODE_FRC
: ei_vector1(compiler
->code
, VE_FRACTION
, vpi
, inst
); break;
364 case RC_OPCODE_LG2
: ei_math1(compiler
->code
, ME_LOG_BASE2_FULL_DX
, vpi
, inst
); break;
365 case RC_OPCODE_LIT
: ei_lit(compiler
->code
, vpi
, inst
); break;
366 case RC_OPCODE_LOG
: ei_math1(compiler
->code
, ME_LOG_BASE2_DX
, vpi
, inst
); break;
367 case RC_OPCODE_MAD
: ei_mad(compiler
->code
, vpi
, inst
); break;
368 case RC_OPCODE_MAX
: ei_vector2(compiler
->code
, VE_MAXIMUM
, vpi
, inst
); break;
369 case RC_OPCODE_MIN
: ei_vector2(compiler
->code
, VE_MINIMUM
, vpi
, inst
); break;
370 case RC_OPCODE_MOV
: ei_vector1(compiler
->code
, VE_ADD
, vpi
, inst
); break;
371 case RC_OPCODE_MUL
: ei_vector2(compiler
->code
, VE_MULTIPLY
, vpi
, inst
); break;
372 case RC_OPCODE_POW
: ei_pow(compiler
->code
, vpi
, inst
); break;
373 case RC_OPCODE_RCP
: ei_math1(compiler
->code
, ME_RECIP_DX
, vpi
, inst
); break;
374 case RC_OPCODE_RSQ
: ei_math1(compiler
->code
, ME_RECIP_SQRT_DX
, vpi
, inst
); break;
375 case RC_OPCODE_SGE
: ei_vector2(compiler
->code
, VE_SET_GREATER_THAN_EQUAL
, vpi
, inst
); break;
376 case RC_OPCODE_SLT
: ei_vector2(compiler
->code
, VE_SET_LESS_THAN
, vpi
, inst
); break;
378 rc_error(&compiler
->Base
, "Unknown opcode %i\n", vpi
->Opcode
);
382 compiler
->code
->length
+= 4;
384 if (compiler
->Base
.Error
)
389 struct temporary_allocation
{
390 unsigned int Allocated
:1;
391 unsigned int HwTemp
:15;
392 struct rc_instruction
* LastRead
;
395 static void allocate_temporary_registers(struct r300_vertex_program_compiler
* compiler
)
397 struct rc_instruction
*inst
;
398 unsigned int num_orig_temps
= 0;
399 char hwtemps
[VSF_MAX_FRAGMENT_TEMPS
];
400 struct temporary_allocation
* ta
;
403 compiler
->code
->num_temporaries
= 0;
404 memset(hwtemps
, 0, sizeof(hwtemps
));
406 /* Pass 1: Count original temporaries and allocate structures */
407 for(inst
= compiler
->Base
.Program
.Instructions
.Next
; inst
!= &compiler
->Base
.Program
.Instructions
; inst
= inst
->Next
) {
408 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
410 for (i
= 0; i
< opcode
->NumSrcRegs
; ++i
) {
411 if (inst
->U
.I
.SrcReg
[i
].File
== RC_FILE_TEMPORARY
) {
412 if (inst
->U
.I
.SrcReg
[i
].Index
>= num_orig_temps
)
413 num_orig_temps
= inst
->U
.I
.SrcReg
[i
].Index
+ 1;
417 if (opcode
->HasDstReg
) {
418 if (inst
->U
.I
.DstReg
.File
== RC_FILE_TEMPORARY
) {
419 if (inst
->U
.I
.DstReg
.Index
>= num_orig_temps
)
420 num_orig_temps
= inst
->U
.I
.DstReg
.Index
+ 1;
425 ta
= (struct temporary_allocation
*)memory_pool_malloc(&compiler
->Base
.Pool
,
426 sizeof(struct temporary_allocation
) * num_orig_temps
);
427 memset(ta
, 0, sizeof(struct temporary_allocation
) * num_orig_temps
);
429 /* Pass 2: Determine original temporary lifetimes */
430 for(inst
= compiler
->Base
.Program
.Instructions
.Next
; inst
!= &compiler
->Base
.Program
.Instructions
; inst
= inst
->Next
) {
431 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
433 for (i
= 0; i
< opcode
->NumSrcRegs
; ++i
) {
434 if (inst
->U
.I
.SrcReg
[i
].File
== RC_FILE_TEMPORARY
)
435 ta
[inst
->U
.I
.SrcReg
[i
].Index
].LastRead
= inst
;
439 /* Pass 3: Register allocation */
440 for(inst
= compiler
->Base
.Program
.Instructions
.Next
; inst
!= &compiler
->Base
.Program
.Instructions
; inst
= inst
->Next
) {
441 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
443 for (i
= 0; i
< opcode
->NumSrcRegs
; ++i
) {
444 if (inst
->U
.I
.SrcReg
[i
].File
== RC_FILE_TEMPORARY
) {
445 unsigned int orig
= inst
->U
.I
.SrcReg
[i
].Index
;
446 inst
->U
.I
.SrcReg
[i
].Index
= ta
[orig
].HwTemp
;
448 if (ta
[orig
].Allocated
&& inst
== ta
[orig
].LastRead
)
449 hwtemps
[ta
[orig
].HwTemp
] = 0;
453 if (opcode
->HasDstReg
) {
454 if (inst
->U
.I
.DstReg
.File
== RC_FILE_TEMPORARY
) {
455 unsigned int orig
= inst
->U
.I
.DstReg
.Index
;
457 if (!ta
[orig
].Allocated
) {
458 for(j
= 0; j
< VSF_MAX_FRAGMENT_TEMPS
; ++j
) {
462 if (j
>= VSF_MAX_FRAGMENT_TEMPS
) {
463 fprintf(stderr
, "Out of hw temporaries\n");
465 ta
[orig
].Allocated
= 1;
469 if (j
>= compiler
->code
->num_temporaries
)
470 compiler
->code
->num_temporaries
= j
+ 1;
474 inst
->U
.I
.DstReg
.Index
= ta
[orig
].HwTemp
;
482 * Vertex engine cannot read two inputs or two constants at the same time.
483 * Introduce intermediate MOVs to temporary registers to account for this.
485 static int transform_source_conflicts(
486 struct radeon_compiler
*c
,
487 struct rc_instruction
* inst
,
490 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
492 if (opcode
->NumSrcRegs
== 3) {
493 if (t_src_conflict(inst
->U
.I
.SrcReg
[1], inst
->U
.I
.SrcReg
[2])
494 || t_src_conflict(inst
->U
.I
.SrcReg
[0], inst
->U
.I
.SrcReg
[2])) {
495 int tmpreg
= rc_find_free_temporary(c
);
496 struct rc_instruction
* inst_mov
= rc_insert_new_instruction(c
, inst
->Prev
);
497 inst_mov
->U
.I
.Opcode
= RC_OPCODE_MOV
;
498 inst_mov
->U
.I
.DstReg
.File
= RC_FILE_TEMPORARY
;
499 inst_mov
->U
.I
.DstReg
.Index
= tmpreg
;
500 inst_mov
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
502 reset_srcreg(&inst
->U
.I
.SrcReg
[2]);
503 inst
->U
.I
.SrcReg
[2].File
= RC_FILE_TEMPORARY
;
504 inst
->U
.I
.SrcReg
[2].Index
= tmpreg
;
508 if (opcode
->NumSrcRegs
>= 2) {
509 if (t_src_conflict(inst
->U
.I
.SrcReg
[1], inst
->U
.I
.SrcReg
[0])) {
510 int tmpreg
= rc_find_free_temporary(c
);
511 struct rc_instruction
* inst_mov
= rc_insert_new_instruction(c
, inst
->Prev
);
512 inst_mov
->U
.I
.Opcode
= RC_OPCODE_MOV
;
513 inst_mov
->U
.I
.DstReg
.File
= RC_FILE_TEMPORARY
;
514 inst_mov
->U
.I
.DstReg
.Index
= tmpreg
;
515 inst_mov
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[1];
517 reset_srcreg(&inst
->U
.I
.SrcReg
[1]);
518 inst
->U
.I
.SrcReg
[1].File
= RC_FILE_TEMPORARY
;
519 inst
->U
.I
.SrcReg
[1].Index
= tmpreg
;
526 static void addArtificialOutputs(struct r300_vertex_program_compiler
* compiler
)
530 for(i
= 0; i
< 32; ++i
) {
531 if ((compiler
->RequiredOutputs
& (1 << i
)) &&
532 !(compiler
->Base
.Program
.OutputsWritten
& (1 << i
))) {
533 struct rc_instruction
* inst
= rc_insert_new_instruction(&compiler
->Base
, compiler
->Base
.Program
.Instructions
.Prev
);
534 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
536 inst
->U
.I
.DstReg
.File
= RC_FILE_OUTPUT
;
537 inst
->U
.I
.DstReg
.Index
= i
;
538 inst
->U
.I
.DstReg
.WriteMask
= RC_MASK_XYZW
;
540 inst
->U
.I
.SrcReg
[0].File
= RC_FILE_CONSTANT
;
541 inst
->U
.I
.SrcReg
[0].Index
= 0;
542 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_XYZW
;
544 compiler
->Base
.Program
.OutputsWritten
|= 1 << i
;
549 static void dataflow_outputs_mark_used(void * userdata
, void * data
,
550 void (*callback
)(void *, unsigned int, unsigned int))
552 struct r300_vertex_program_compiler
* c
= userdata
;
555 for(i
= 0; i
< 32; ++i
) {
556 if (c
->RequiredOutputs
& (1 << i
))
557 callback(data
, i
, RC_MASK_XYZW
);
561 static int swizzle_is_native(rc_opcode opcode
, struct rc_src_register reg
)
569 static void debug_program_log(struct r300_vertex_program_compiler
* c
, const char * where
)
572 fprintf(stderr
, "Vertex Program: %s\n", where
);
573 rc_print_program(&c
->Base
.Program
);
578 static struct rc_swizzle_caps r300_vertprog_swizzle_caps
= {
579 .IsNative
= &swizzle_is_native
,
580 .Split
= 0 /* should never be called */
584 void r3xx_compile_vertex_program(struct r300_vertex_program_compiler
* compiler
)
586 compiler
->Base
.SwizzleCaps
= &r300_vertprog_swizzle_caps
;
588 addArtificialOutputs(compiler
);
590 debug_program_log(compiler
, "before compilation");
592 /* XXX Ideally this should be done only for r3xx, but since
593 * we don't have branching support for r5xx, we use the emulation
594 * on all chipsets. */
595 rc_emulate_branches(&compiler
->Base
);
597 debug_program_log(compiler
, "after emulate branches");
600 struct radeon_program_transformation transformations
[] = {
601 { &r300_transform_vertex_alu
, 0 },
603 radeonLocalTransform(&compiler
->Base
, 1, transformations
);
606 debug_program_log(compiler
, "after native rewrite");
609 /* Note: This pass has to be done seperately from ALU rewrite,
610 * otherwise non-native ALU instructions with source conflits
611 * will not be treated properly.
613 struct radeon_program_transformation transformations
[] = {
614 { &transform_source_conflicts
, 0 },
616 radeonLocalTransform(&compiler
->Base
, 1, transformations
);
619 debug_program_log(compiler
, "after source conflict resolve");
621 rc_dataflow_deadcode(&compiler
->Base
, &dataflow_outputs_mark_used
, compiler
);
623 debug_program_log(compiler
, "after deadcode");
625 rc_dataflow_swizzles(&compiler
->Base
);
627 allocate_temporary_registers(compiler
);
629 debug_program_log(compiler
, "after dataflow");
631 translate_vertex_program(compiler
);
633 rc_constants_copy(&compiler
->code
->constants
, &compiler
->Base
.Program
.Constants
);
635 compiler
->code
->InputsRead
= compiler
->Base
.Program
.InputsRead
;
636 compiler
->code
->OutputsWritten
= compiler
->Base
.Program
.OutputsWritten
;
638 if (compiler
->Base
.Debug
) {
639 fprintf(stderr
, "Final vertex program code:\n");
640 r300_vertex_program_dump(compiler
->code
);