2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23 #include "radeon_compiler.h"
25 #include "../r300_reg.h"
27 #include "radeon_nqssadce.h"
28 #include "radeon_program.h"
29 #include "radeon_program_alu.h"
31 #include "shader/prog_print.h"
35 * Take an already-setup and valid source then swizzle it appropriately to
36 * obtain a constant ZERO or ONE source.
38 #define __CONST(x, y) \
39 (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \
44 t_src_class(vpi->SrcReg[x].File), \
45 NEGATE_NONE) | (vpi->SrcReg[x].RelAddr << 4))
48 static unsigned long t_dst_mask(GLuint mask
)
50 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
51 return mask
& WRITEMASK_XYZW
;
54 static unsigned long t_dst_class(gl_register_file file
)
58 case PROGRAM_TEMPORARY
:
59 return PVS_DST_REG_TEMPORARY
;
61 return PVS_DST_REG_OUT
;
63 return PVS_DST_REG_A0
;
66 case PROGRAM_LOCAL_PARAM:
67 case PROGRAM_ENV_PARAM:
68 case PROGRAM_NAMED_PARAM:
69 case PROGRAM_STATE_VAR:
70 case PROGRAM_WRITE_ONLY:
74 fprintf(stderr
, "problem in %s", __FUNCTION__
);
80 static unsigned long t_dst_index(struct r300_vertex_program_code
*vp
,
81 struct prog_dst_register
*dst
)
83 if (dst
->File
== PROGRAM_OUTPUT
)
84 return vp
->outputs
[dst
->Index
];
89 static unsigned long t_src_class(gl_register_file file
)
92 case PROGRAM_TEMPORARY
:
93 return PVS_SRC_REG_TEMPORARY
;
95 return PVS_SRC_REG_INPUT
;
96 case PROGRAM_LOCAL_PARAM
:
97 case PROGRAM_ENV_PARAM
:
98 case PROGRAM_NAMED_PARAM
:
99 case PROGRAM_CONSTANT
:
100 case PROGRAM_STATE_VAR
:
101 return PVS_SRC_REG_CONSTANT
;
104 case PROGRAM_WRITE_ONLY:
105 case PROGRAM_ADDRESS:
108 fprintf(stderr
, "problem in %s", __FUNCTION__
);
114 static GLboolean
t_src_conflict(struct prog_src_register a
, struct prog_src_register b
)
116 unsigned long aclass
= t_src_class(a
.File
);
117 unsigned long bclass
= t_src_class(b
.File
);
119 if (aclass
!= bclass
)
121 if (aclass
== PVS_SRC_REG_TEMPORARY
)
124 if (a
.RelAddr
|| b
.RelAddr
)
126 if (a
.Index
!= b
.Index
)
132 static INLINE
unsigned long t_swizzle(GLubyte swizzle
)
134 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
138 static unsigned long t_src_index(struct r300_vertex_program_code
*vp
,
139 struct prog_src_register
*src
)
141 if (src
->File
== PROGRAM_INPUT
) {
142 assert(vp
->inputs
[src
->Index
] != -1);
143 return vp
->inputs
[src
->Index
];
145 if (src
->Index
< 0) {
147 "negative offsets for indirect addressing do not work.\n");
154 /* these two functions should probably be merged... */
156 static unsigned long t_src(struct r300_vertex_program_code
*vp
,
157 struct prog_src_register
*src
)
159 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
160 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
162 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
163 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
164 t_swizzle(GET_SWZ(src
->Swizzle
, 1)),
165 t_swizzle(GET_SWZ(src
->Swizzle
, 2)),
166 t_swizzle(GET_SWZ(src
->Swizzle
, 3)),
167 t_src_class(src
->File
),
168 src
->Negate
) | (src
->RelAddr
<< 4);
171 static unsigned long t_src_scalar(struct r300_vertex_program_code
*vp
,
172 struct prog_src_register
*src
)
174 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
175 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
177 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
178 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
179 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
180 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
181 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
182 t_src_class(src
->File
),
183 src
->Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
187 static GLboolean
valid_dst(struct r300_vertex_program_code
*vp
,
188 struct prog_dst_register
*dst
)
190 if (dst
->File
== PROGRAM_OUTPUT
&& vp
->outputs
[dst
->Index
] == -1) {
192 } else if (dst
->File
== PROGRAM_ADDRESS
) {
193 assert(dst
->Index
== 0);
199 static void ei_vector1(struct r300_vertex_program_code
*vp
,
201 struct prog_instruction
*vpi
,
204 inst
[0] = PVS_OP_DST_OPERAND(hw_opcode
,
207 t_dst_index(vp
, &vpi
->DstReg
),
208 t_dst_mask(vpi
->DstReg
.WriteMask
),
209 t_dst_class(vpi
->DstReg
.File
));
210 inst
[1] = t_src(vp
, &vpi
->SrcReg
[0]);
211 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
212 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
215 static void ei_vector2(struct r300_vertex_program_code
*vp
,
217 struct prog_instruction
*vpi
,
220 inst
[0] = PVS_OP_DST_OPERAND(hw_opcode
,
223 t_dst_index(vp
, &vpi
->DstReg
),
224 t_dst_mask(vpi
->DstReg
.WriteMask
),
225 t_dst_class(vpi
->DstReg
.File
));
226 inst
[1] = t_src(vp
, &vpi
->SrcReg
[0]);
227 inst
[2] = t_src(vp
, &vpi
->SrcReg
[1]);
228 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
231 static void ei_math1(struct r300_vertex_program_code
*vp
,
233 struct prog_instruction
*vpi
,
236 inst
[0] = PVS_OP_DST_OPERAND(hw_opcode
,
239 t_dst_index(vp
, &vpi
->DstReg
),
240 t_dst_mask(vpi
->DstReg
.WriteMask
),
241 t_dst_class(vpi
->DstReg
.File
));
242 inst
[1] = t_src_scalar(vp
, &vpi
->SrcReg
[0]);
243 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
244 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
247 static void ei_lit(struct r300_vertex_program_code
*vp
,
248 struct prog_instruction
*vpi
,
251 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
253 inst
[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX
,
256 t_dst_index(vp
, &vpi
->DstReg
),
257 t_dst_mask(vpi
->DstReg
.WriteMask
),
258 t_dst_class(vpi
->DstReg
.File
));
259 /* NOTE: Users swizzling might not work. */
260 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &vpi
->SrcReg
[0]), t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 0)), // X
261 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 3)), // W
262 PVS_SRC_SELECT_FORCE_0
, // Z
263 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 1)), // Y
264 t_src_class(vpi
->SrcReg
[0].File
),
265 vpi
->SrcReg
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
266 (vpi
->SrcReg
[0].RelAddr
<< 4);
267 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &vpi
->SrcReg
[0]), t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 1)), // Y
268 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 3)), // W
269 PVS_SRC_SELECT_FORCE_0
, // Z
270 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 0)), // X
271 t_src_class(vpi
->SrcReg
[0].File
),
272 vpi
->SrcReg
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
273 (vpi
->SrcReg
[0].RelAddr
<< 4);
274 inst
[3] = PVS_SRC_OPERAND(t_src_index(vp
, &vpi
->SrcReg
[0]), t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 1)), // Y
275 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 0)), // X
276 PVS_SRC_SELECT_FORCE_0
, // Z
277 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 3)), // W
278 t_src_class(vpi
->SrcReg
[0].File
),
279 vpi
->SrcReg
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
280 (vpi
->SrcReg
[0].RelAddr
<< 4);
283 static void ei_mad(struct r300_vertex_program_code
*vp
,
284 struct prog_instruction
*vpi
,
287 inst
[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD
,
290 t_dst_index(vp
, &vpi
->DstReg
),
291 t_dst_mask(vpi
->DstReg
.WriteMask
),
292 t_dst_class(vpi
->DstReg
.File
));
293 inst
[1] = t_src(vp
, &vpi
->SrcReg
[0]);
294 inst
[2] = t_src(vp
, &vpi
->SrcReg
[1]);
295 inst
[3] = t_src(vp
, &vpi
->SrcReg
[2]);
298 static void ei_pow(struct r300_vertex_program_code
*vp
,
299 struct prog_instruction
*vpi
,
302 inst
[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF
,
305 t_dst_index(vp
, &vpi
->DstReg
),
306 t_dst_mask(vpi
->DstReg
.WriteMask
),
307 t_dst_class(vpi
->DstReg
.File
));
308 inst
[1] = t_src_scalar(vp
, &vpi
->SrcReg
[0]);
309 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
310 inst
[3] = t_src_scalar(vp
, &vpi
->SrcReg
[1]);
313 static void t_inputs_outputs(struct r300_vertex_program_compiler
* c
)
317 GLuint OutputsWritten
, InputsRead
;
319 OutputsWritten
= c
->Base
.Program
.OutputsWritten
;
320 InputsRead
= c
->Base
.Program
.InputsRead
;
323 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++) {
324 if (InputsRead
& (1 << i
))
325 c
->code
->inputs
[i
] = ++cur_reg
;
327 c
->code
->inputs
[i
] = -1;
331 for (i
= 0; i
< VERT_RESULT_MAX
; i
++)
332 c
->code
->outputs
[i
] = -1;
334 assert(OutputsWritten
& (1 << VERT_RESULT_HPOS
));
336 if (OutputsWritten
& (1 << VERT_RESULT_HPOS
)) {
337 c
->code
->outputs
[VERT_RESULT_HPOS
] = cur_reg
++;
340 if (OutputsWritten
& (1 << VERT_RESULT_PSIZ
)) {
341 c
->code
->outputs
[VERT_RESULT_PSIZ
] = cur_reg
++;
344 /* If we're writing back facing colors we need to send
345 * four colors to make front/back face colors selection work.
346 * If the vertex program doesn't write all 4 colors, lets
347 * pretend it does by skipping output index reg so the colors
348 * get written into appropriate output vectors.
350 if (OutputsWritten
& (1 << VERT_RESULT_COL0
)) {
351 c
->code
->outputs
[VERT_RESULT_COL0
] = cur_reg
++;
352 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC0
) ||
353 OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
357 if (OutputsWritten
& (1 << VERT_RESULT_COL1
)) {
358 c
->code
->outputs
[VERT_RESULT_COL1
] = cur_reg
++;
359 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC0
) ||
360 OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
364 if (OutputsWritten
& (1 << VERT_RESULT_BFC0
)) {
365 c
->code
->outputs
[VERT_RESULT_BFC0
] = cur_reg
++;
366 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
370 if (OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
371 c
->code
->outputs
[VERT_RESULT_BFC1
] = cur_reg
++;
372 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC0
)) {
376 for (i
= VERT_RESULT_TEX0
; i
<= VERT_RESULT_TEX7
; i
++) {
377 if (OutputsWritten
& (1 << i
)) {
378 c
->code
->outputs
[i
] = cur_reg
++;
382 if (OutputsWritten
& (1 << VERT_RESULT_FOGC
)) {
383 c
->code
->outputs
[VERT_RESULT_FOGC
] = cur_reg
++;
387 static void translate_vertex_program(struct r300_vertex_program_compiler
* compiler
)
389 struct rc_instruction
*rci
;
391 compiler
->code
->pos_end
= 0; /* Not supported yet */
392 compiler
->code
->length
= 0;
394 t_inputs_outputs(compiler
);
396 for(rci
= compiler
->Base
.Program
.Instructions
.Next
; rci
!= &compiler
->Base
.Program
.Instructions
; rci
= rci
->Next
) {
397 struct prog_instruction
*vpi
= &rci
->I
;
398 GLuint
*inst
= compiler
->code
->body
.d
+ compiler
->code
->length
;
400 /* Skip instructions writing to non-existing destination */
401 if (!valid_dst(compiler
->code
, &vpi
->DstReg
))
404 if (compiler
->code
->length
>= VSF_MAX_FRAGMENT_LENGTH
) {
405 rc_error(&compiler
->Base
, "Vertex program has too many instructions\n");
409 switch (vpi
->Opcode
) {
410 case OPCODE_ADD
: ei_vector2(compiler
->code
, VE_ADD
, vpi
, inst
); break;
411 case OPCODE_ARL
: ei_vector1(compiler
->code
, VE_FLT2FIX_DX
, vpi
, inst
); break;
412 case OPCODE_DP4
: ei_vector2(compiler
->code
, VE_DOT_PRODUCT
, vpi
, inst
); break;
413 case OPCODE_DST
: ei_vector2(compiler
->code
, VE_DISTANCE_VECTOR
, vpi
, inst
); break;
414 case OPCODE_EX2
: ei_math1(compiler
->code
, ME_EXP_BASE2_FULL_DX
, vpi
, inst
); break;
415 case OPCODE_EXP
: ei_math1(compiler
->code
, ME_EXP_BASE2_DX
, vpi
, inst
); break;
416 case OPCODE_FRC
: ei_vector1(compiler
->code
, VE_FRACTION
, vpi
, inst
); break;
417 case OPCODE_LG2
: ei_math1(compiler
->code
, ME_LOG_BASE2_FULL_DX
, vpi
, inst
); break;
418 case OPCODE_LIT
: ei_lit(compiler
->code
, vpi
, inst
); break;
419 case OPCODE_LOG
: ei_math1(compiler
->code
, ME_LOG_BASE2_DX
, vpi
, inst
); break;
420 case OPCODE_MAD
: ei_mad(compiler
->code
, vpi
, inst
); break;
421 case OPCODE_MAX
: ei_vector2(compiler
->code
, VE_MAXIMUM
, vpi
, inst
); break;
422 case OPCODE_MIN
: ei_vector2(compiler
->code
, VE_MINIMUM
, vpi
, inst
); break;
423 case OPCODE_MOV
: ei_vector1(compiler
->code
, VE_ADD
, vpi
, inst
); break;
424 case OPCODE_MUL
: ei_vector2(compiler
->code
, VE_MULTIPLY
, vpi
, inst
); break;
425 case OPCODE_POW
: ei_pow(compiler
->code
, vpi
, inst
); break;
426 case OPCODE_RCP
: ei_math1(compiler
->code
, ME_RECIP_DX
, vpi
, inst
); break;
427 case OPCODE_RSQ
: ei_math1(compiler
->code
, ME_RECIP_SQRT_DX
, vpi
, inst
); break;
428 case OPCODE_SGE
: ei_vector2(compiler
->code
, VE_SET_GREATER_THAN_EQUAL
, vpi
, inst
); break;
429 case OPCODE_SLT
: ei_vector2(compiler
->code
, VE_SET_LESS_THAN
, vpi
, inst
); break;
431 rc_error(&compiler
->Base
, "Unknown opcode %i\n", vpi
->Opcode
);
435 compiler
->code
->length
+= 4;
437 if (compiler
->Base
.Error
)
442 struct temporary_allocation
{
445 struct rc_instruction
* LastRead
;
448 static void allocate_temporary_registers(struct r300_vertex_program_compiler
* compiler
)
450 struct rc_instruction
*inst
;
451 GLuint num_orig_temps
= 0;
452 GLboolean hwtemps
[VSF_MAX_FRAGMENT_TEMPS
];
453 struct temporary_allocation
* ta
;
456 compiler
->code
->num_temporaries
= 0;
457 memset(hwtemps
, 0, sizeof(hwtemps
));
459 /* Pass 1: Count original temporaries and allocate structures */
460 for(inst
= compiler
->Base
.Program
.Instructions
.Next
; inst
!= &compiler
->Base
.Program
.Instructions
; inst
= inst
->Next
) {
461 GLuint numsrcs
= _mesa_num_inst_src_regs(inst
->I
.Opcode
);
462 GLuint numdsts
= _mesa_num_inst_dst_regs(inst
->I
.Opcode
);
464 for (i
= 0; i
< numsrcs
; ++i
) {
465 if (inst
->I
.SrcReg
[i
].File
== PROGRAM_TEMPORARY
) {
466 if (inst
->I
.SrcReg
[i
].Index
>= num_orig_temps
)
467 num_orig_temps
= inst
->I
.SrcReg
[i
].Index
+ 1;
472 if (inst
->I
.DstReg
.File
== PROGRAM_TEMPORARY
) {
473 if (inst
->I
.DstReg
.Index
>= num_orig_temps
)
474 num_orig_temps
= inst
->I
.DstReg
.Index
+ 1;
479 ta
= (struct temporary_allocation
*)memory_pool_malloc(&compiler
->Base
.Pool
,
480 sizeof(struct temporary_allocation
) * num_orig_temps
);
481 memset(ta
, 0, sizeof(struct temporary_allocation
) * num_orig_temps
);
483 /* Pass 2: Determine original temporary lifetimes */
484 for(inst
= compiler
->Base
.Program
.Instructions
.Next
; inst
!= &compiler
->Base
.Program
.Instructions
; inst
= inst
->Next
) {
485 GLuint numsrcs
= _mesa_num_inst_src_regs(inst
->I
.Opcode
);
487 for (i
= 0; i
< numsrcs
; ++i
) {
488 if (inst
->I
.SrcReg
[i
].File
== PROGRAM_TEMPORARY
)
489 ta
[inst
->I
.SrcReg
[i
].Index
].LastRead
= inst
;
493 /* Pass 3: Register allocation */
494 for(inst
= compiler
->Base
.Program
.Instructions
.Next
; inst
!= &compiler
->Base
.Program
.Instructions
; inst
= inst
->Next
) {
495 GLuint numsrcs
= _mesa_num_inst_src_regs(inst
->I
.Opcode
);
496 GLuint numdsts
= _mesa_num_inst_dst_regs(inst
->I
.Opcode
);
498 for (i
= 0; i
< numsrcs
; ++i
) {
499 if (inst
->I
.SrcReg
[i
].File
== PROGRAM_TEMPORARY
) {
500 GLuint orig
= inst
->I
.SrcReg
[i
].Index
;
501 inst
->I
.SrcReg
[i
].Index
= ta
[orig
].HwTemp
;
503 if (ta
[orig
].Allocated
&& inst
== ta
[orig
].LastRead
)
504 hwtemps
[ta
[orig
].HwTemp
] = GL_FALSE
;
509 if (inst
->I
.DstReg
.File
== PROGRAM_TEMPORARY
) {
510 GLuint orig
= inst
->I
.DstReg
.Index
;
512 if (!ta
[orig
].Allocated
) {
513 for(j
= 0; j
< VSF_MAX_FRAGMENT_TEMPS
; ++j
) {
517 if (j
>= VSF_MAX_FRAGMENT_TEMPS
) {
518 fprintf(stderr
, "Out of hw temporaries\n");
520 ta
[orig
].Allocated
= GL_TRUE
;
522 hwtemps
[j
] = GL_TRUE
;
524 if (j
>= compiler
->code
->num_temporaries
)
525 compiler
->code
->num_temporaries
= j
+ 1;
529 inst
->I
.DstReg
.Index
= ta
[orig
].HwTemp
;
537 * Vertex engine cannot read two inputs or two constants at the same time.
538 * Introduce intermediate MOVs to temporary registers to account for this.
540 static GLboolean
transform_source_conflicts(
541 struct radeon_compiler
*c
,
542 struct rc_instruction
* inst
,
545 GLuint num_operands
= _mesa_num_inst_src_regs(inst
->I
.Opcode
);
547 if (num_operands
== 3) {
548 if (t_src_conflict(inst
->I
.SrcReg
[1], inst
->I
.SrcReg
[2])
549 || t_src_conflict(inst
->I
.SrcReg
[0], inst
->I
.SrcReg
[2])) {
550 int tmpreg
= rc_find_free_temporary(c
);
551 struct rc_instruction
* inst_mov
= rc_insert_new_instruction(c
, inst
->Prev
);
552 inst_mov
->I
.Opcode
= OPCODE_MOV
;
553 inst_mov
->I
.DstReg
.File
= PROGRAM_TEMPORARY
;
554 inst_mov
->I
.DstReg
.Index
= tmpreg
;
555 inst_mov
->I
.SrcReg
[0] = inst
->I
.SrcReg
[2];
557 reset_srcreg(&inst
->I
.SrcReg
[2]);
558 inst
->I
.SrcReg
[2].File
= PROGRAM_TEMPORARY
;
559 inst
->I
.SrcReg
[2].Index
= tmpreg
;
563 if (num_operands
>= 2) {
564 if (t_src_conflict(inst
->I
.SrcReg
[1], inst
->I
.SrcReg
[0])) {
565 int tmpreg
= rc_find_free_temporary(c
);
566 struct rc_instruction
* inst_mov
= rc_insert_new_instruction(c
, inst
->Prev
);
567 inst_mov
->I
.Opcode
= OPCODE_MOV
;
568 inst_mov
->I
.DstReg
.File
= PROGRAM_TEMPORARY
;
569 inst_mov
->I
.DstReg
.Index
= tmpreg
;
570 inst_mov
->I
.SrcReg
[0] = inst
->I
.SrcReg
[1];
572 reset_srcreg(&inst
->I
.SrcReg
[1]);
573 inst
->I
.SrcReg
[1].File
= PROGRAM_TEMPORARY
;
574 inst
->I
.SrcReg
[1].Index
= tmpreg
;
581 static void insert_wpos(struct gl_program
*prog
, GLuint temp_index
, int tex_id
)
583 struct prog_instruction
*vpi
;
585 _mesa_insert_instructions(prog
, prog
->NumInstructions
- 1, 2);
587 vpi
= &prog
->Instructions
[prog
->NumInstructions
- 3];
589 vpi
->Opcode
= OPCODE_MOV
;
591 vpi
->DstReg
.File
= PROGRAM_OUTPUT
;
592 vpi
->DstReg
.Index
= VERT_RESULT_HPOS
;
593 vpi
->DstReg
.WriteMask
= WRITEMASK_XYZW
;
594 vpi
->DstReg
.CondMask
= COND_TR
;
596 vpi
->SrcReg
[0].File
= PROGRAM_TEMPORARY
;
597 vpi
->SrcReg
[0].Index
= temp_index
;
598 vpi
->SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
602 vpi
->Opcode
= OPCODE_MOV
;
604 vpi
->DstReg
.File
= PROGRAM_OUTPUT
;
605 vpi
->DstReg
.Index
= VERT_RESULT_TEX0
+ tex_id
;
606 vpi
->DstReg
.WriteMask
= WRITEMASK_XYZW
;
607 vpi
->DstReg
.CondMask
= COND_TR
;
609 vpi
->SrcReg
[0].File
= PROGRAM_TEMPORARY
;
610 vpi
->SrcReg
[0].Index
= temp_index
;
611 vpi
->SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
615 vpi
->Opcode
= OPCODE_END
;
618 static void pos_as_texcoord(struct gl_program
*prog
, int tex_id
)
620 struct prog_instruction
*vpi
;
621 GLuint tempregi
= prog
->NumTemporaries
;
623 prog
->NumTemporaries
++;
625 for (vpi
= prog
->Instructions
; vpi
->Opcode
!= OPCODE_END
; vpi
++) {
626 if (vpi
->DstReg
.File
== PROGRAM_OUTPUT
&& vpi
->DstReg
.Index
== VERT_RESULT_HPOS
) {
627 vpi
->DstReg
.File
= PROGRAM_TEMPORARY
;
628 vpi
->DstReg
.Index
= tempregi
;
632 insert_wpos(prog
, tempregi
, tex_id
);
634 prog
->OutputsWritten
|= 1 << (VERT_RESULT_TEX0
+ tex_id
);
638 * The fogcoord attribute is special in that only the first component
639 * is relevant, and the remaining components are always fixed (when read
640 * from by the fragment program) to yield an X001 pattern.
642 * We need to enforce this either in the vertex program or in the fragment
643 * program, and this code chooses not to enforce it in the vertex program.
644 * This is slightly cheaper, as long as the fragment program does not use
647 * And it seems that usually, weird swizzles are not used, so...
649 * See also the counterpart rewriting for fragment programs.
651 static void fog_as_texcoord(struct gl_program
*prog
, int tex_id
)
653 struct prog_instruction
*vpi
;
655 vpi
= prog
->Instructions
;
656 while (vpi
->Opcode
!= OPCODE_END
) {
657 if (vpi
->DstReg
.File
== PROGRAM_OUTPUT
&& vpi
->DstReg
.Index
== VERT_RESULT_FOGC
) {
658 vpi
->DstReg
.Index
= VERT_RESULT_TEX0
+ tex_id
;
659 vpi
->DstReg
.WriteMask
= WRITEMASK_X
;
665 prog
->OutputsWritten
&= ~(1 << VERT_RESULT_FOGC
);
666 prog
->OutputsWritten
|= 1 << (VERT_RESULT_TEX0
+ tex_id
);
670 static void addArtificialOutputs(struct r300_vertex_program_compiler
* compiler
)
674 for(i
= 0; i
< 32; ++i
) {
675 if ((compiler
->RequiredOutputs
& (1 << i
)) &&
676 !(compiler
->Base
.Program
.OutputsWritten
& (1 << i
))) {
677 struct rc_instruction
* inst
= rc_insert_new_instruction(&compiler
->Base
, compiler
->Base
.Program
.Instructions
.Prev
);
678 inst
->I
.Opcode
= OPCODE_MOV
;
680 inst
->I
.DstReg
.File
= PROGRAM_OUTPUT
;
681 inst
->I
.DstReg
.Index
= i
;
682 inst
->I
.DstReg
.WriteMask
= WRITEMASK_XYZW
;
684 inst
->I
.SrcReg
[0].File
= PROGRAM_CONSTANT
;
685 inst
->I
.SrcReg
[0].Index
= 0;
686 inst
->I
.SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
688 compiler
->Base
.Program
.OutputsWritten
|= 1 << i
;
693 static void nqssadceInit(struct nqssadce_state
* s
)
695 struct r300_vertex_program_compiler
* compiler
= s
->UserData
;
698 for(i
= 0; i
< VERT_RESULT_MAX
; ++i
) {
699 if (compiler
->RequiredOutputs
& (1 << i
)) {
700 if (i
!= VERT_RESULT_PSIZ
)
701 s
->Outputs
[i
].Sourced
= WRITEMASK_XYZW
;
703 s
->Outputs
[i
].Sourced
= WRITEMASK_X
; /* ugly hack! */
708 static GLboolean
swizzleIsNative(GLuint opcode
, struct prog_src_register reg
)
718 void r3xx_compile_vertex_program(struct r300_vertex_program_compiler
* compiler
)
720 if (compiler
->state
.WPosAttr
!= FRAG_ATTRIB_MAX
) {
721 pos_as_texcoord(compiler
->program
, compiler
->state
.WPosAttr
- FRAG_ATTRIB_TEX0
);
724 if (compiler
->state
.FogAttr
!= FRAG_ATTRIB_MAX
) {
725 fog_as_texcoord(compiler
->program
, compiler
->state
.FogAttr
- FRAG_ATTRIB_TEX0
);
728 rc_mesa_to_rc_program(&compiler
->Base
, compiler
->program
);
730 addArtificialOutputs(compiler
);
733 struct radeon_program_transformation transformations
[] = {
734 { &r300_transform_vertex_alu
, 0 },
736 radeonLocalTransform(&compiler
->Base
, 1, transformations
);
739 if (compiler
->Base
.Debug
) {
740 fprintf(stderr
, "Vertex program after native rewrite:\n");
741 rc_print_program(&compiler
->Base
.Program
);
746 /* Note: This pass has to be done seperately from ALU rewrite,
747 * otherwise non-native ALU instructions with source conflits
748 * will not be treated properly.
750 struct radeon_program_transformation transformations
[] = {
751 { &transform_source_conflicts
, 0 },
753 radeonLocalTransform(&compiler
->Base
, 1, transformations
);
756 if (compiler
->Base
.Debug
) {
757 fprintf(stderr
, "Vertex program after source conflict resolve:\n");
758 rc_print_program(&compiler
->Base
.Program
);
763 struct radeon_nqssadce_descr nqssadce
= {
764 .Init
= &nqssadceInit
,
765 .IsNativeSwizzle
= &swizzleIsNative
,
768 radeonNqssaDce(&compiler
->Base
, &nqssadce
, compiler
);
770 /* We need this step for reusing temporary registers */
771 allocate_temporary_registers(compiler
);
773 if (compiler
->Base
.Debug
) {
774 fprintf(stderr
, "Vertex program after NQSSADCE:\n");
775 rc_print_program(&compiler
->Base
.Program
);
780 translate_vertex_program(compiler
);
782 rc_constants_copy(&compiler
->code
->constants
, &compiler
->Base
.Program
.Constants
);
784 compiler
->code
->InputsRead
= compiler
->Base
.Program
.InputsRead
;
785 compiler
->code
->OutputsWritten
= compiler
->Base
.Program
.OutputsWritten
;