2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23 #include "radeon_compiler.h"
27 #include "../r300_reg.h"
29 #include "radeon_dataflow.h"
30 #include "radeon_program_alu.h"
31 #include "radeon_swizzle.h"
35 * Take an already-setup and valid source then swizzle it appropriately to
36 * obtain a constant ZERO or ONE source.
38 #define __CONST(x, y) \
39 (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \
44 t_src_class(vpi->SrcReg[x].File), \
45 RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4))
48 static unsigned long t_dst_mask(unsigned int mask
)
50 /* RC_MASK_* is equivalent to VSF_FLAG_* */
51 return mask
& RC_MASK_XYZW
;
54 static unsigned long t_dst_class(rc_register_file file
)
58 fprintf(stderr
, "%s: Bad register file %i\n", __FUNCTION__
, file
);
60 case RC_FILE_TEMPORARY
:
61 return PVS_DST_REG_TEMPORARY
;
63 return PVS_DST_REG_OUT
;
65 return PVS_DST_REG_A0
;
69 static unsigned long t_dst_index(struct r300_vertex_program_code
*vp
,
70 struct rc_dst_register
*dst
)
72 if (dst
->File
== RC_FILE_OUTPUT
)
73 return vp
->outputs
[dst
->Index
];
78 static unsigned long t_src_class(rc_register_file file
)
82 fprintf(stderr
, "%s: Bad register file %i\n", __FUNCTION__
, file
);
84 case RC_FILE_TEMPORARY
:
85 return PVS_SRC_REG_TEMPORARY
;
87 return PVS_SRC_REG_INPUT
;
88 case RC_FILE_CONSTANT
:
89 return PVS_SRC_REG_CONSTANT
;
93 static int t_src_conflict(struct rc_src_register a
, struct rc_src_register b
)
95 unsigned long aclass
= t_src_class(a
.File
);
96 unsigned long bclass
= t_src_class(b
.File
);
100 if (aclass
== PVS_SRC_REG_TEMPORARY
)
103 if (a
.RelAddr
|| b
.RelAddr
)
105 if (a
.Index
!= b
.Index
)
111 static inline unsigned long t_swizzle(unsigned int swizzle
)
113 /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
117 static unsigned long t_src_index(struct r300_vertex_program_code
*vp
,
118 struct rc_src_register
*src
)
120 if (src
->File
== RC_FILE_INPUT
) {
121 assert(vp
->inputs
[src
->Index
] != -1);
122 return vp
->inputs
[src
->Index
];
124 if (src
->Index
< 0) {
126 "negative offsets for indirect addressing do not work.\n");
133 /* these two functions should probably be merged... */
135 static unsigned long t_src(struct r300_vertex_program_code
*vp
,
136 struct rc_src_register
*src
)
138 /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
139 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
141 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
142 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
143 t_swizzle(GET_SWZ(src
->Swizzle
, 1)),
144 t_swizzle(GET_SWZ(src
->Swizzle
, 2)),
145 t_swizzle(GET_SWZ(src
->Swizzle
, 3)),
146 t_src_class(src
->File
),
147 src
->Negate
) | (src
->RelAddr
<< 4);
150 static unsigned long t_src_scalar(struct r300_vertex_program_code
*vp
,
151 struct rc_src_register
*src
)
153 /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
154 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
156 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
157 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
158 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
159 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
160 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
161 t_src_class(src
->File
),
162 src
->Negate
? RC_MASK_XYZW
: RC_MASK_NONE
) |
166 static int valid_dst(struct r300_vertex_program_code
*vp
,
167 struct rc_dst_register
*dst
)
169 if (dst
->File
== RC_FILE_OUTPUT
&& vp
->outputs
[dst
->Index
] == -1) {
171 } else if (dst
->File
== RC_FILE_ADDRESS
) {
172 assert(dst
->Index
== 0);
178 static void ei_vector1(struct r300_vertex_program_code
*vp
,
179 unsigned int hw_opcode
,
180 struct rc_sub_instruction
*vpi
,
183 inst
[0] = PVS_OP_DST_OPERAND(hw_opcode
,
186 t_dst_index(vp
, &vpi
->DstReg
),
187 t_dst_mask(vpi
->DstReg
.WriteMask
),
188 t_dst_class(vpi
->DstReg
.File
));
189 inst
[1] = t_src(vp
, &vpi
->SrcReg
[0]);
190 inst
[2] = __CONST(0, RC_SWIZZLE_ZERO
);
191 inst
[3] = __CONST(0, RC_SWIZZLE_ZERO
);
194 static void ei_vector2(struct r300_vertex_program_code
*vp
,
195 unsigned int hw_opcode
,
196 struct rc_sub_instruction
*vpi
,
199 inst
[0] = PVS_OP_DST_OPERAND(hw_opcode
,
202 t_dst_index(vp
, &vpi
->DstReg
),
203 t_dst_mask(vpi
->DstReg
.WriteMask
),
204 t_dst_class(vpi
->DstReg
.File
));
205 inst
[1] = t_src(vp
, &vpi
->SrcReg
[0]);
206 inst
[2] = t_src(vp
, &vpi
->SrcReg
[1]);
207 inst
[3] = __CONST(1, RC_SWIZZLE_ZERO
);
210 static void ei_math1(struct r300_vertex_program_code
*vp
,
211 unsigned int hw_opcode
,
212 struct rc_sub_instruction
*vpi
,
215 inst
[0] = PVS_OP_DST_OPERAND(hw_opcode
,
218 t_dst_index(vp
, &vpi
->DstReg
),
219 t_dst_mask(vpi
->DstReg
.WriteMask
),
220 t_dst_class(vpi
->DstReg
.File
));
221 inst
[1] = t_src_scalar(vp
, &vpi
->SrcReg
[0]);
222 inst
[2] = __CONST(0, RC_SWIZZLE_ZERO
);
223 inst
[3] = __CONST(0, RC_SWIZZLE_ZERO
);
226 static void ei_lit(struct r300_vertex_program_code
*vp
,
227 struct rc_sub_instruction
*vpi
,
230 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
232 inst
[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX
,
235 t_dst_index(vp
, &vpi
->DstReg
),
236 t_dst_mask(vpi
->DstReg
.WriteMask
),
237 t_dst_class(vpi
->DstReg
.File
));
238 /* NOTE: Users swizzling might not work. */
239 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &vpi
->SrcReg
[0]), t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 0)), // X
240 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 3)), // W
241 PVS_SRC_SELECT_FORCE_0
, // Z
242 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 1)), // Y
243 t_src_class(vpi
->SrcReg
[0].File
),
244 vpi
->SrcReg
[0].Negate
? RC_MASK_XYZW
: RC_MASK_NONE
) |
245 (vpi
->SrcReg
[0].RelAddr
<< 4);
246 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &vpi
->SrcReg
[0]), t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 1)), // Y
247 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 3)), // W
248 PVS_SRC_SELECT_FORCE_0
, // Z
249 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 0)), // X
250 t_src_class(vpi
->SrcReg
[0].File
),
251 vpi
->SrcReg
[0].Negate
? RC_MASK_XYZW
: RC_MASK_NONE
) |
252 (vpi
->SrcReg
[0].RelAddr
<< 4);
253 inst
[3] = PVS_SRC_OPERAND(t_src_index(vp
, &vpi
->SrcReg
[0]), t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 1)), // Y
254 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 0)), // X
255 PVS_SRC_SELECT_FORCE_0
, // Z
256 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 3)), // W
257 t_src_class(vpi
->SrcReg
[0].File
),
258 vpi
->SrcReg
[0].Negate
? RC_MASK_XYZW
: RC_MASK_NONE
) |
259 (vpi
->SrcReg
[0].RelAddr
<< 4);
262 static void ei_mad(struct r300_vertex_program_code
*vp
,
263 struct rc_sub_instruction
*vpi
,
266 /* Remarks about hardware limitations of MAD
267 * (please preserve this comment, as this information is _NOT_
268 * in the documentation provided by AMD).
270 * As described in the documentation, MAD with three unique temporary
271 * source registers requires the use of the macro version.
273 * However (and this is not mentioned in the documentation), apparently
274 * the macro version is _NOT_ a full superset of the normal version.
275 * In particular, the macro version does not always work when relative
276 * addressing is used in the source operands.
278 * This limitation caused incorrect rendering in Sauerbraten's OpenGL
279 * assembly shader path when using medium quality animations
280 * (i.e. animations with matrix blending instead of quaternion blending).
282 * Unfortunately, I (nha) have been unable to extract a Piglit regression
283 * test for this issue - for some reason, it is possible to have vertex
284 * programs whose prefix is *exactly* the same as the prefix of the
285 * offending program in Sauerbraten up to the offending instruction
286 * without causing any trouble.
288 * Bottom line: Only use the macro version only when really necessary;
289 * according to AMD docs, this should improve performance by one clock
290 * as a nice side bonus.
292 if (vpi
->SrcReg
[0].File
== RC_FILE_TEMPORARY
&&
293 vpi
->SrcReg
[1].File
== RC_FILE_TEMPORARY
&&
294 vpi
->SrcReg
[2].File
== RC_FILE_TEMPORARY
&&
295 vpi
->SrcReg
[0].Index
!= vpi
->SrcReg
[1].Index
&&
296 vpi
->SrcReg
[0].Index
!= vpi
->SrcReg
[2].Index
&&
297 vpi
->SrcReg
[1].Index
!= vpi
->SrcReg
[2].Index
) {
298 inst
[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD
,
301 t_dst_index(vp
, &vpi
->DstReg
),
302 t_dst_mask(vpi
->DstReg
.WriteMask
),
303 t_dst_class(vpi
->DstReg
.File
));
305 inst
[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD
,
308 t_dst_index(vp
, &vpi
->DstReg
),
309 t_dst_mask(vpi
->DstReg
.WriteMask
),
310 t_dst_class(vpi
->DstReg
.File
));
312 inst
[1] = t_src(vp
, &vpi
->SrcReg
[0]);
313 inst
[2] = t_src(vp
, &vpi
->SrcReg
[1]);
314 inst
[3] = t_src(vp
, &vpi
->SrcReg
[2]);
317 static void ei_pow(struct r300_vertex_program_code
*vp
,
318 struct rc_sub_instruction
*vpi
,
321 inst
[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF
,
324 t_dst_index(vp
, &vpi
->DstReg
),
325 t_dst_mask(vpi
->DstReg
.WriteMask
),
326 t_dst_class(vpi
->DstReg
.File
));
327 inst
[1] = t_src_scalar(vp
, &vpi
->SrcReg
[0]);
328 inst
[2] = __CONST(0, RC_SWIZZLE_ZERO
);
329 inst
[3] = t_src_scalar(vp
, &vpi
->SrcReg
[1]);
333 static void translate_vertex_program(struct r300_vertex_program_compiler
* compiler
)
335 struct rc_instruction
*rci
;
337 compiler
->code
->pos_end
= 0; /* Not supported yet */
338 compiler
->code
->length
= 0;
340 compiler
->SetHwInputOutput(compiler
);
342 for(rci
= compiler
->Base
.Program
.Instructions
.Next
; rci
!= &compiler
->Base
.Program
.Instructions
; rci
= rci
->Next
) {
343 struct rc_sub_instruction
*vpi
= &rci
->I
;
344 unsigned int *inst
= compiler
->code
->body
.d
+ compiler
->code
->length
;
346 /* Skip instructions writing to non-existing destination */
347 if (!valid_dst(compiler
->code
, &vpi
->DstReg
))
350 if (compiler
->code
->length
>= VSF_MAX_FRAGMENT_LENGTH
) {
351 rc_error(&compiler
->Base
, "Vertex program has too many instructions\n");
355 switch (vpi
->Opcode
) {
356 case RC_OPCODE_ADD
: ei_vector2(compiler
->code
, VE_ADD
, vpi
, inst
); break;
357 case RC_OPCODE_ARL
: ei_vector1(compiler
->code
, VE_FLT2FIX_DX
, vpi
, inst
); break;
358 case RC_OPCODE_DP4
: ei_vector2(compiler
->code
, VE_DOT_PRODUCT
, vpi
, inst
); break;
359 case RC_OPCODE_DST
: ei_vector2(compiler
->code
, VE_DISTANCE_VECTOR
, vpi
, inst
); break;
360 case RC_OPCODE_EX2
: ei_math1(compiler
->code
, ME_EXP_BASE2_FULL_DX
, vpi
, inst
); break;
361 case RC_OPCODE_EXP
: ei_math1(compiler
->code
, ME_EXP_BASE2_DX
, vpi
, inst
); break;
362 case RC_OPCODE_FRC
: ei_vector1(compiler
->code
, VE_FRACTION
, vpi
, inst
); break;
363 case RC_OPCODE_LG2
: ei_math1(compiler
->code
, ME_LOG_BASE2_FULL_DX
, vpi
, inst
); break;
364 case RC_OPCODE_LIT
: ei_lit(compiler
->code
, vpi
, inst
); break;
365 case RC_OPCODE_LOG
: ei_math1(compiler
->code
, ME_LOG_BASE2_DX
, vpi
, inst
); break;
366 case RC_OPCODE_MAD
: ei_mad(compiler
->code
, vpi
, inst
); break;
367 case RC_OPCODE_MAX
: ei_vector2(compiler
->code
, VE_MAXIMUM
, vpi
, inst
); break;
368 case RC_OPCODE_MIN
: ei_vector2(compiler
->code
, VE_MINIMUM
, vpi
, inst
); break;
369 case RC_OPCODE_MOV
: ei_vector1(compiler
->code
, VE_ADD
, vpi
, inst
); break;
370 case RC_OPCODE_MUL
: ei_vector2(compiler
->code
, VE_MULTIPLY
, vpi
, inst
); break;
371 case RC_OPCODE_POW
: ei_pow(compiler
->code
, vpi
, inst
); break;
372 case RC_OPCODE_RCP
: ei_math1(compiler
->code
, ME_RECIP_DX
, vpi
, inst
); break;
373 case RC_OPCODE_RSQ
: ei_math1(compiler
->code
, ME_RECIP_SQRT_DX
, vpi
, inst
); break;
374 case RC_OPCODE_SGE
: ei_vector2(compiler
->code
, VE_SET_GREATER_THAN_EQUAL
, vpi
, inst
); break;
375 case RC_OPCODE_SLT
: ei_vector2(compiler
->code
, VE_SET_LESS_THAN
, vpi
, inst
); break;
377 rc_error(&compiler
->Base
, "Unknown opcode %i\n", vpi
->Opcode
);
381 compiler
->code
->length
+= 4;
383 if (compiler
->Base
.Error
)
388 struct temporary_allocation
{
389 unsigned int Allocated
:1;
390 unsigned int HwTemp
:15;
391 struct rc_instruction
* LastRead
;
394 static void allocate_temporary_registers(struct r300_vertex_program_compiler
* compiler
)
396 struct rc_instruction
*inst
;
397 unsigned int num_orig_temps
= 0;
398 char hwtemps
[VSF_MAX_FRAGMENT_TEMPS
];
399 struct temporary_allocation
* ta
;
402 compiler
->code
->num_temporaries
= 0;
403 memset(hwtemps
, 0, sizeof(hwtemps
));
405 /* Pass 1: Count original temporaries and allocate structures */
406 for(inst
= compiler
->Base
.Program
.Instructions
.Next
; inst
!= &compiler
->Base
.Program
.Instructions
; inst
= inst
->Next
) {
407 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->I
.Opcode
);
409 for (i
= 0; i
< opcode
->NumSrcRegs
; ++i
) {
410 if (inst
->I
.SrcReg
[i
].File
== RC_FILE_TEMPORARY
) {
411 if (inst
->I
.SrcReg
[i
].Index
>= num_orig_temps
)
412 num_orig_temps
= inst
->I
.SrcReg
[i
].Index
+ 1;
416 if (opcode
->HasDstReg
) {
417 if (inst
->I
.DstReg
.File
== RC_FILE_TEMPORARY
) {
418 if (inst
->I
.DstReg
.Index
>= num_orig_temps
)
419 num_orig_temps
= inst
->I
.DstReg
.Index
+ 1;
424 ta
= (struct temporary_allocation
*)memory_pool_malloc(&compiler
->Base
.Pool
,
425 sizeof(struct temporary_allocation
) * num_orig_temps
);
426 memset(ta
, 0, sizeof(struct temporary_allocation
) * num_orig_temps
);
428 /* Pass 2: Determine original temporary lifetimes */
429 for(inst
= compiler
->Base
.Program
.Instructions
.Next
; inst
!= &compiler
->Base
.Program
.Instructions
; inst
= inst
->Next
) {
430 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->I
.Opcode
);
432 for (i
= 0; i
< opcode
->NumSrcRegs
; ++i
) {
433 if (inst
->I
.SrcReg
[i
].File
== RC_FILE_TEMPORARY
)
434 ta
[inst
->I
.SrcReg
[i
].Index
].LastRead
= inst
;
438 /* Pass 3: Register allocation */
439 for(inst
= compiler
->Base
.Program
.Instructions
.Next
; inst
!= &compiler
->Base
.Program
.Instructions
; inst
= inst
->Next
) {
440 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->I
.Opcode
);
442 for (i
= 0; i
< opcode
->NumSrcRegs
; ++i
) {
443 if (inst
->I
.SrcReg
[i
].File
== RC_FILE_TEMPORARY
) {
444 unsigned int orig
= inst
->I
.SrcReg
[i
].Index
;
445 inst
->I
.SrcReg
[i
].Index
= ta
[orig
].HwTemp
;
447 if (ta
[orig
].Allocated
&& inst
== ta
[orig
].LastRead
)
448 hwtemps
[ta
[orig
].HwTemp
] = 0;
452 if (opcode
->HasDstReg
) {
453 if (inst
->I
.DstReg
.File
== RC_FILE_TEMPORARY
) {
454 unsigned int orig
= inst
->I
.DstReg
.Index
;
456 if (!ta
[orig
].Allocated
) {
457 for(j
= 0; j
< VSF_MAX_FRAGMENT_TEMPS
; ++j
) {
461 if (j
>= VSF_MAX_FRAGMENT_TEMPS
) {
462 fprintf(stderr
, "Out of hw temporaries\n");
464 ta
[orig
].Allocated
= 1;
468 if (j
>= compiler
->code
->num_temporaries
)
469 compiler
->code
->num_temporaries
= j
+ 1;
473 inst
->I
.DstReg
.Index
= ta
[orig
].HwTemp
;
481 * Vertex engine cannot read two inputs or two constants at the same time.
482 * Introduce intermediate MOVs to temporary registers to account for this.
484 static int transform_source_conflicts(
485 struct radeon_compiler
*c
,
486 struct rc_instruction
* inst
,
489 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->I
.Opcode
);
491 if (opcode
->NumSrcRegs
== 3) {
492 if (t_src_conflict(inst
->I
.SrcReg
[1], inst
->I
.SrcReg
[2])
493 || t_src_conflict(inst
->I
.SrcReg
[0], inst
->I
.SrcReg
[2])) {
494 int tmpreg
= rc_find_free_temporary(c
);
495 struct rc_instruction
* inst_mov
= rc_insert_new_instruction(c
, inst
->Prev
);
496 inst_mov
->I
.Opcode
= RC_OPCODE_MOV
;
497 inst_mov
->I
.DstReg
.File
= RC_FILE_TEMPORARY
;
498 inst_mov
->I
.DstReg
.Index
= tmpreg
;
499 inst_mov
->I
.SrcReg
[0] = inst
->I
.SrcReg
[2];
501 reset_srcreg(&inst
->I
.SrcReg
[2]);
502 inst
->I
.SrcReg
[2].File
= RC_FILE_TEMPORARY
;
503 inst
->I
.SrcReg
[2].Index
= tmpreg
;
507 if (opcode
->NumSrcRegs
>= 2) {
508 if (t_src_conflict(inst
->I
.SrcReg
[1], inst
->I
.SrcReg
[0])) {
509 int tmpreg
= rc_find_free_temporary(c
);
510 struct rc_instruction
* inst_mov
= rc_insert_new_instruction(c
, inst
->Prev
);
511 inst_mov
->I
.Opcode
= RC_OPCODE_MOV
;
512 inst_mov
->I
.DstReg
.File
= RC_FILE_TEMPORARY
;
513 inst_mov
->I
.DstReg
.Index
= tmpreg
;
514 inst_mov
->I
.SrcReg
[0] = inst
->I
.SrcReg
[1];
516 reset_srcreg(&inst
->I
.SrcReg
[1]);
517 inst
->I
.SrcReg
[1].File
= RC_FILE_TEMPORARY
;
518 inst
->I
.SrcReg
[1].Index
= tmpreg
;
525 static void addArtificialOutputs(struct r300_vertex_program_compiler
* compiler
)
529 for(i
= 0; i
< 32; ++i
) {
530 if ((compiler
->RequiredOutputs
& (1 << i
)) &&
531 !(compiler
->Base
.Program
.OutputsWritten
& (1 << i
))) {
532 struct rc_instruction
* inst
= rc_insert_new_instruction(&compiler
->Base
, compiler
->Base
.Program
.Instructions
.Prev
);
533 inst
->I
.Opcode
= RC_OPCODE_MOV
;
535 inst
->I
.DstReg
.File
= RC_FILE_OUTPUT
;
536 inst
->I
.DstReg
.Index
= i
;
537 inst
->I
.DstReg
.WriteMask
= RC_MASK_XYZW
;
539 inst
->I
.SrcReg
[0].File
= RC_FILE_CONSTANT
;
540 inst
->I
.SrcReg
[0].Index
= 0;
541 inst
->I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_XYZW
;
543 compiler
->Base
.Program
.OutputsWritten
|= 1 << i
;
548 static void dataflow_outputs_mark_used(void * userdata
, void * data
,
549 void (*callback
)(void *, unsigned int, unsigned int))
551 struct r300_vertex_program_compiler
* c
= userdata
;
554 for(i
= 0; i
< 32; ++i
) {
555 if (c
->RequiredOutputs
& (1 << i
))
556 callback(data
, i
, RC_MASK_XYZW
);
560 static int swizzle_is_native(rc_opcode opcode
, struct rc_src_register reg
)
569 static struct rc_swizzle_caps r300_vertprog_swizzle_caps
= {
570 .IsNative
= &swizzle_is_native
,
571 .Split
= 0 /* should never be called */
575 void r3xx_compile_vertex_program(struct r300_vertex_program_compiler
* compiler
)
577 compiler
->Base
.SwizzleCaps
= &r300_vertprog_swizzle_caps
;
579 addArtificialOutputs(compiler
);
582 struct radeon_program_transformation transformations
[] = {
583 { &r300_transform_vertex_alu
, 0 },
585 radeonLocalTransform(&compiler
->Base
, 1, transformations
);
588 if (compiler
->Base
.Debug
) {
589 fprintf(stderr
, "Vertex program after native rewrite:\n");
590 rc_print_program(&compiler
->Base
.Program
, 0);
595 /* Note: This pass has to be done seperately from ALU rewrite,
596 * otherwise non-native ALU instructions with source conflits
597 * will not be treated properly.
599 struct radeon_program_transformation transformations
[] = {
600 { &transform_source_conflicts
, 0 },
602 radeonLocalTransform(&compiler
->Base
, 1, transformations
);
605 if (compiler
->Base
.Debug
) {
606 fprintf(stderr
, "Vertex program after source conflict resolve:\n");
607 rc_print_program(&compiler
->Base
.Program
, 0);
611 rc_dataflow_annotate(&compiler
->Base
, &dataflow_outputs_mark_used
, compiler
);
612 rc_dataflow_dealias(&compiler
->Base
);
613 rc_dataflow_swizzles(&compiler
->Base
);
615 /* This invalidates dataflow annotations and should be replaced
616 * by a future generic register allocation pass. */
617 allocate_temporary_registers(compiler
);
619 if (compiler
->Base
.Debug
) {
620 fprintf(stderr
, "Vertex program after dataflow:\n");
621 rc_print_program(&compiler
->Base
.Program
, 0);
625 translate_vertex_program(compiler
);
627 rc_constants_copy(&compiler
->code
->constants
, &compiler
->Base
.Program
.Constants
);
629 compiler
->code
->InputsRead
= compiler
->Base
.Program
.InputsRead
;
630 compiler
->code
->OutputsWritten
= compiler
->Base
.Program
.OutputsWritten
;
632 if (compiler
->Base
.Debug
) {
633 fprintf(stderr
, "Final vertex program code:\n");
634 r300_vertex_program_dump(compiler
->code
);