2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23 #include "radeon_compiler.h"
25 #include "../r300_reg.h"
27 #include "radeon_nqssadce.h"
28 #include "radeon_program.h"
29 #include "radeon_program_alu.h"
31 #include "shader/prog_print.h"
35 * Take an already-setup and valid source then swizzle it appropriately to
36 * obtain a constant ZERO or ONE source.
38 #define __CONST(x, y) \
39 (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \
44 t_src_class(vpi->SrcReg[x].File), \
45 NEGATE_NONE) | (vpi->SrcReg[x].RelAddr << 4))
48 static unsigned long t_dst_mask(GLuint mask
)
50 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
51 return mask
& WRITEMASK_XYZW
;
54 static unsigned long t_dst_class(gl_register_file file
)
58 case PROGRAM_TEMPORARY
:
59 return PVS_DST_REG_TEMPORARY
;
61 return PVS_DST_REG_OUT
;
63 return PVS_DST_REG_A0
;
66 case PROGRAM_LOCAL_PARAM:
67 case PROGRAM_ENV_PARAM:
68 case PROGRAM_NAMED_PARAM:
69 case PROGRAM_STATE_VAR:
70 case PROGRAM_WRITE_ONLY:
74 fprintf(stderr
, "problem in %s", __FUNCTION__
);
80 static unsigned long t_dst_index(struct r300_vertex_program_code
*vp
,
81 struct prog_dst_register
*dst
)
83 if (dst
->File
== PROGRAM_OUTPUT
)
84 return vp
->outputs
[dst
->Index
];
89 static unsigned long t_src_class(gl_register_file file
)
92 case PROGRAM_TEMPORARY
:
93 return PVS_SRC_REG_TEMPORARY
;
95 return PVS_SRC_REG_INPUT
;
96 case PROGRAM_LOCAL_PARAM
:
97 case PROGRAM_ENV_PARAM
:
98 case PROGRAM_NAMED_PARAM
:
99 case PROGRAM_CONSTANT
:
100 case PROGRAM_STATE_VAR
:
101 return PVS_SRC_REG_CONSTANT
;
104 case PROGRAM_WRITE_ONLY:
105 case PROGRAM_ADDRESS:
108 fprintf(stderr
, "problem in %s", __FUNCTION__
);
114 static GLboolean
t_src_conflict(struct prog_src_register a
, struct prog_src_register b
)
116 unsigned long aclass
= t_src_class(a
.File
);
117 unsigned long bclass
= t_src_class(b
.File
);
119 if (aclass
!= bclass
)
121 if (aclass
== PVS_SRC_REG_TEMPORARY
)
124 if (a
.RelAddr
|| b
.RelAddr
)
126 if (a
.Index
!= b
.Index
)
132 static INLINE
unsigned long t_swizzle(GLubyte swizzle
)
134 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
138 static unsigned long t_src_index(struct r300_vertex_program_code
*vp
,
139 struct prog_src_register
*src
)
141 if (src
->File
== PROGRAM_INPUT
) {
142 assert(vp
->inputs
[src
->Index
] != -1);
143 return vp
->inputs
[src
->Index
];
145 if (src
->Index
< 0) {
147 "negative offsets for indirect addressing do not work.\n");
154 /* these two functions should probably be merged... */
156 static unsigned long t_src(struct r300_vertex_program_code
*vp
,
157 struct prog_src_register
*src
)
159 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
160 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
162 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
163 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
164 t_swizzle(GET_SWZ(src
->Swizzle
, 1)),
165 t_swizzle(GET_SWZ(src
->Swizzle
, 2)),
166 t_swizzle(GET_SWZ(src
->Swizzle
, 3)),
167 t_src_class(src
->File
),
168 src
->Negate
) | (src
->RelAddr
<< 4);
171 static unsigned long t_src_scalar(struct r300_vertex_program_code
*vp
,
172 struct prog_src_register
*src
)
174 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
175 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
177 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
178 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
179 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
180 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
181 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
182 t_src_class(src
->File
),
183 src
->Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
187 static GLboolean
valid_dst(struct r300_vertex_program_code
*vp
,
188 struct prog_dst_register
*dst
)
190 if (dst
->File
== PROGRAM_OUTPUT
&& vp
->outputs
[dst
->Index
] == -1) {
192 } else if (dst
->File
== PROGRAM_ADDRESS
) {
193 assert(dst
->Index
== 0);
199 static void ei_vector1(struct r300_vertex_program_code
*vp
,
201 struct prog_instruction
*vpi
,
204 inst
[0] = PVS_OP_DST_OPERAND(hw_opcode
,
207 t_dst_index(vp
, &vpi
->DstReg
),
208 t_dst_mask(vpi
->DstReg
.WriteMask
),
209 t_dst_class(vpi
->DstReg
.File
));
210 inst
[1] = t_src(vp
, &vpi
->SrcReg
[0]);
211 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
212 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
215 static void ei_vector2(struct r300_vertex_program_code
*vp
,
217 struct prog_instruction
*vpi
,
220 inst
[0] = PVS_OP_DST_OPERAND(hw_opcode
,
223 t_dst_index(vp
, &vpi
->DstReg
),
224 t_dst_mask(vpi
->DstReg
.WriteMask
),
225 t_dst_class(vpi
->DstReg
.File
));
226 inst
[1] = t_src(vp
, &vpi
->SrcReg
[0]);
227 inst
[2] = t_src(vp
, &vpi
->SrcReg
[1]);
228 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
231 static void ei_math1(struct r300_vertex_program_code
*vp
,
233 struct prog_instruction
*vpi
,
236 inst
[0] = PVS_OP_DST_OPERAND(hw_opcode
,
239 t_dst_index(vp
, &vpi
->DstReg
),
240 t_dst_mask(vpi
->DstReg
.WriteMask
),
241 t_dst_class(vpi
->DstReg
.File
));
242 inst
[1] = t_src_scalar(vp
, &vpi
->SrcReg
[0]);
243 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
244 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
247 static void ei_lit(struct r300_vertex_program_code
*vp
,
248 struct prog_instruction
*vpi
,
251 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
253 inst
[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX
,
256 t_dst_index(vp
, &vpi
->DstReg
),
257 t_dst_mask(vpi
->DstReg
.WriteMask
),
258 t_dst_class(vpi
->DstReg
.File
));
259 /* NOTE: Users swizzling might not work. */
260 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &vpi
->SrcReg
[0]), t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 0)), // X
261 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 3)), // W
262 PVS_SRC_SELECT_FORCE_0
, // Z
263 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 1)), // Y
264 t_src_class(vpi
->SrcReg
[0].File
),
265 vpi
->SrcReg
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
266 (vpi
->SrcReg
[0].RelAddr
<< 4);
267 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &vpi
->SrcReg
[0]), t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 1)), // Y
268 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 3)), // W
269 PVS_SRC_SELECT_FORCE_0
, // Z
270 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 0)), // X
271 t_src_class(vpi
->SrcReg
[0].File
),
272 vpi
->SrcReg
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
273 (vpi
->SrcReg
[0].RelAddr
<< 4);
274 inst
[3] = PVS_SRC_OPERAND(t_src_index(vp
, &vpi
->SrcReg
[0]), t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 1)), // Y
275 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 0)), // X
276 PVS_SRC_SELECT_FORCE_0
, // Z
277 t_swizzle(GET_SWZ(vpi
->SrcReg
[0].Swizzle
, 3)), // W
278 t_src_class(vpi
->SrcReg
[0].File
),
279 vpi
->SrcReg
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
280 (vpi
->SrcReg
[0].RelAddr
<< 4);
283 static void ei_mad(struct r300_vertex_program_code
*vp
,
284 struct prog_instruction
*vpi
,
287 inst
[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD
,
290 t_dst_index(vp
, &vpi
->DstReg
),
291 t_dst_mask(vpi
->DstReg
.WriteMask
),
292 t_dst_class(vpi
->DstReg
.File
));
293 inst
[1] = t_src(vp
, &vpi
->SrcReg
[0]);
294 inst
[2] = t_src(vp
, &vpi
->SrcReg
[1]);
295 inst
[3] = t_src(vp
, &vpi
->SrcReg
[2]);
298 static void ei_pow(struct r300_vertex_program_code
*vp
,
299 struct prog_instruction
*vpi
,
302 inst
[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF
,
305 t_dst_index(vp
, &vpi
->DstReg
),
306 t_dst_mask(vpi
->DstReg
.WriteMask
),
307 t_dst_class(vpi
->DstReg
.File
));
308 inst
[1] = t_src_scalar(vp
, &vpi
->SrcReg
[0]);
309 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
310 inst
[3] = t_src_scalar(vp
, &vpi
->SrcReg
[1]);
313 static void t_inputs_outputs(struct r300_vertex_program_compiler
* c
)
317 GLuint OutputsWritten
, InputsRead
;
319 OutputsWritten
= c
->Base
.Program
.OutputsWritten
;
320 InputsRead
= c
->Base
.Program
.InputsRead
;
323 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++) {
324 if (InputsRead
& (1 << i
))
325 c
->code
->inputs
[i
] = ++cur_reg
;
327 c
->code
->inputs
[i
] = -1;
331 for (i
= 0; i
< VERT_RESULT_MAX
; i
++)
332 c
->code
->outputs
[i
] = -1;
334 assert(OutputsWritten
& (1 << VERT_RESULT_HPOS
));
336 if (OutputsWritten
& (1 << VERT_RESULT_HPOS
)) {
337 c
->code
->outputs
[VERT_RESULT_HPOS
] = cur_reg
++;
340 if (OutputsWritten
& (1 << VERT_RESULT_PSIZ
)) {
341 c
->code
->outputs
[VERT_RESULT_PSIZ
] = cur_reg
++;
344 /* If we're writing back facing colors we need to send
345 * four colors to make front/back face colors selection work.
346 * If the vertex program doesn't write all 4 colors, lets
347 * pretend it does by skipping output index reg so the colors
348 * get written into appropriate output vectors.
350 if (OutputsWritten
& (1 << VERT_RESULT_COL0
)) {
351 c
->code
->outputs
[VERT_RESULT_COL0
] = cur_reg
++;
352 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC0
) ||
353 OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
357 if (OutputsWritten
& (1 << VERT_RESULT_COL1
)) {
358 c
->code
->outputs
[VERT_RESULT_COL1
] = cur_reg
++;
359 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC0
) ||
360 OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
364 if (OutputsWritten
& (1 << VERT_RESULT_BFC0
)) {
365 c
->code
->outputs
[VERT_RESULT_BFC0
] = cur_reg
++;
366 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
370 if (OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
371 c
->code
->outputs
[VERT_RESULT_BFC1
] = cur_reg
++;
372 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC0
)) {
376 for (i
= VERT_RESULT_TEX0
; i
<= VERT_RESULT_TEX7
; i
++) {
377 if (OutputsWritten
& (1 << i
)) {
378 c
->code
->outputs
[i
] = cur_reg
++;
382 if (OutputsWritten
& (1 << VERT_RESULT_FOGC
)) {
383 c
->code
->outputs
[VERT_RESULT_FOGC
] = cur_reg
++;
387 static void translate_vertex_program(struct r300_vertex_program_compiler
* compiler
)
389 struct rc_instruction
*rci
;
391 compiler
->code
->pos_end
= 0; /* Not supported yet */
392 compiler
->code
->length
= 0;
394 t_inputs_outputs(compiler
);
396 for(rci
= compiler
->Base
.Program
.Instructions
.Next
; rci
!= &compiler
->Base
.Program
.Instructions
; rci
= rci
->Next
) {
397 struct prog_instruction
*vpi
= &rci
->I
;
398 GLuint
*inst
= compiler
->code
->body
.d
+ compiler
->code
->length
;
400 /* Skip instructions writing to non-existing destination */
401 if (!valid_dst(compiler
->code
, &vpi
->DstReg
))
404 if (compiler
->code
->length
>= VSF_MAX_FRAGMENT_LENGTH
) {
405 rc_error(&compiler
->Base
, "Vertex program has too many instructions\n");
409 switch (vpi
->Opcode
) {
410 case OPCODE_ADD
: ei_vector2(compiler
->code
, VE_ADD
, vpi
, inst
); break;
411 case OPCODE_ARL
: ei_vector1(compiler
->code
, VE_FLT2FIX_DX
, vpi
, inst
); break;
412 case OPCODE_DP4
: ei_vector2(compiler
->code
, VE_DOT_PRODUCT
, vpi
, inst
); break;
413 case OPCODE_DST
: ei_vector2(compiler
->code
, VE_DISTANCE_VECTOR
, vpi
, inst
); break;
414 case OPCODE_EX2
: ei_math1(compiler
->code
, ME_EXP_BASE2_FULL_DX
, vpi
, inst
); break;
415 case OPCODE_EXP
: ei_math1(compiler
->code
, ME_EXP_BASE2_DX
, vpi
, inst
); break;
416 case OPCODE_FRC
: ei_vector1(compiler
->code
, VE_FRACTION
, vpi
, inst
); break;
417 case OPCODE_LG2
: ei_math1(compiler
->code
, ME_LOG_BASE2_FULL_DX
, vpi
, inst
); break;
418 case OPCODE_LIT
: ei_lit(compiler
->code
, vpi
, inst
); break;
419 case OPCODE_LOG
: ei_math1(compiler
->code
, ME_LOG_BASE2_DX
, vpi
, inst
); break;
420 case OPCODE_MAD
: ei_mad(compiler
->code
, vpi
, inst
); break;
421 case OPCODE_MAX
: ei_vector2(compiler
->code
, VE_MAXIMUM
, vpi
, inst
); break;
422 case OPCODE_MIN
: ei_vector2(compiler
->code
, VE_MINIMUM
, vpi
, inst
); break;
423 case OPCODE_MOV
: ei_vector1(compiler
->code
, VE_ADD
, vpi
, inst
); break;
424 case OPCODE_MUL
: ei_vector2(compiler
->code
, VE_MULTIPLY
, vpi
, inst
); break;
425 case OPCODE_POW
: ei_pow(compiler
->code
, vpi
, inst
); break;
426 case OPCODE_RCP
: ei_math1(compiler
->code
, ME_RECIP_DX
, vpi
, inst
); break;
427 case OPCODE_RSQ
: ei_math1(compiler
->code
, ME_RECIP_SQRT_DX
, vpi
, inst
); break;
428 case OPCODE_SGE
: ei_vector2(compiler
->code
, VE_SET_GREATER_THAN_EQUAL
, vpi
, inst
); break;
429 case OPCODE_SLT
: ei_vector2(compiler
->code
, VE_SET_LESS_THAN
, vpi
, inst
); break;
431 rc_error(&compiler
->Base
, "Unknown opcode %i\n", vpi
->Opcode
);
435 compiler
->code
->length
+= 4;
437 if (compiler
->Base
.Error
)
442 struct temporary_allocation
{
445 struct rc_instruction
* LastRead
;
448 static void allocate_temporary_registers(struct r300_vertex_program_compiler
* compiler
)
450 struct rc_instruction
*inst
;
451 GLuint num_orig_temps
= 0;
452 GLboolean hwtemps
[VSF_MAX_FRAGMENT_TEMPS
];
453 struct temporary_allocation
* ta
;
456 compiler
->code
->num_temporaries
= 0;
457 memset(hwtemps
, 0, sizeof(hwtemps
));
459 /* Pass 1: Count original temporaries and allocate structures */
460 for(inst
= compiler
->Base
.Program
.Instructions
.Next
; inst
!= &compiler
->Base
.Program
.Instructions
; inst
= inst
->Next
) {
461 GLuint numsrcs
= _mesa_num_inst_src_regs(inst
->I
.Opcode
);
462 GLuint numdsts
= _mesa_num_inst_dst_regs(inst
->I
.Opcode
);
464 for (i
= 0; i
< numsrcs
; ++i
) {
465 if (inst
->I
.SrcReg
[i
].File
== PROGRAM_TEMPORARY
) {
466 if (inst
->I
.SrcReg
[i
].Index
>= num_orig_temps
)
467 num_orig_temps
= inst
->I
.SrcReg
[i
].Index
+ 1;
472 if (inst
->I
.DstReg
.File
== PROGRAM_TEMPORARY
) {
473 if (inst
->I
.DstReg
.Index
>= num_orig_temps
)
474 num_orig_temps
= inst
->I
.DstReg
.Index
+ 1;
479 ta
= (struct temporary_allocation
*)memory_pool_malloc(&compiler
->Base
.Pool
,
480 sizeof(struct temporary_allocation
) * num_orig_temps
);
481 memset(ta
, 0, sizeof(struct temporary_allocation
) * num_orig_temps
);
483 /* Pass 2: Determine original temporary lifetimes */
484 for(inst
= compiler
->Base
.Program
.Instructions
.Next
; inst
!= &compiler
->Base
.Program
.Instructions
; inst
= inst
->Next
) {
485 GLuint numsrcs
= _mesa_num_inst_src_regs(inst
->I
.Opcode
);
487 for (i
= 0; i
< numsrcs
; ++i
) {
488 if (inst
->I
.SrcReg
[i
].File
== PROGRAM_TEMPORARY
)
489 ta
[inst
->I
.SrcReg
[i
].Index
].LastRead
= inst
;
493 /* Pass 3: Register allocation */
494 for(inst
= compiler
->Base
.Program
.Instructions
.Next
; inst
!= &compiler
->Base
.Program
.Instructions
; inst
= inst
->Next
) {
495 GLuint numsrcs
= _mesa_num_inst_src_regs(inst
->I
.Opcode
);
496 GLuint numdsts
= _mesa_num_inst_dst_regs(inst
->I
.Opcode
);
498 for (i
= 0; i
< numsrcs
; ++i
) {
499 if (inst
->I
.SrcReg
[i
].File
== PROGRAM_TEMPORARY
) {
500 GLuint orig
= inst
->I
.SrcReg
[i
].Index
;
501 inst
->I
.SrcReg
[i
].Index
= ta
[orig
].HwTemp
;
503 if (ta
[orig
].Allocated
&& inst
== ta
[orig
].LastRead
)
504 hwtemps
[ta
[orig
].HwTemp
] = GL_FALSE
;
509 if (inst
->I
.DstReg
.File
== PROGRAM_TEMPORARY
) {
510 GLuint orig
= inst
->I
.DstReg
.Index
;
512 if (!ta
[orig
].Allocated
) {
513 for(j
= 0; j
< VSF_MAX_FRAGMENT_TEMPS
; ++j
) {
517 if (j
>= VSF_MAX_FRAGMENT_TEMPS
) {
518 fprintf(stderr
, "Out of hw temporaries\n");
520 ta
[orig
].Allocated
= GL_TRUE
;
522 hwtemps
[j
] = GL_TRUE
;
524 if (j
>= compiler
->code
->num_temporaries
)
525 compiler
->code
->num_temporaries
= j
+ 1;
529 inst
->I
.DstReg
.Index
= ta
[orig
].HwTemp
;
537 * Vertex engine cannot read two inputs or two constants at the same time.
538 * Introduce intermediate MOVs to temporary registers to account for this.
540 static GLboolean
transform_source_conflicts(
541 struct radeon_transform_context
*t
,
542 struct prog_instruction
* orig_inst
,
545 struct prog_instruction inst
= *orig_inst
;
546 struct prog_instruction
* dst
;
547 GLuint num_operands
= _mesa_num_inst_src_regs(inst
.Opcode
);
549 if (num_operands
== 3) {
550 if (t_src_conflict(inst
.SrcReg
[1], inst
.SrcReg
[2])
551 || t_src_conflict(inst
.SrcReg
[0], inst
.SrcReg
[2])) {
552 int tmpreg
= radeonFindFreeTemporary(t
);
553 struct prog_instruction
* inst_mov
= radeonAppendInstructions(t
->Program
, 1);
554 inst_mov
->Opcode
= OPCODE_MOV
;
555 inst_mov
->DstReg
.File
= PROGRAM_TEMPORARY
;
556 inst_mov
->DstReg
.Index
= tmpreg
;
557 inst_mov
->SrcReg
[0] = inst
.SrcReg
[2];
559 reset_srcreg(&inst
.SrcReg
[2]);
560 inst
.SrcReg
[2].File
= PROGRAM_TEMPORARY
;
561 inst
.SrcReg
[2].Index
= tmpreg
;
565 if (num_operands
>= 2) {
566 if (t_src_conflict(inst
.SrcReg
[1], inst
.SrcReg
[0])) {
567 int tmpreg
= radeonFindFreeTemporary(t
);
568 struct prog_instruction
* inst_mov
= radeonAppendInstructions(t
->Program
, 1);
569 inst_mov
->Opcode
= OPCODE_MOV
;
570 inst_mov
->DstReg
.File
= PROGRAM_TEMPORARY
;
571 inst_mov
->DstReg
.Index
= tmpreg
;
572 inst_mov
->SrcReg
[0] = inst
.SrcReg
[1];
574 reset_srcreg(&inst
.SrcReg
[1]);
575 inst
.SrcReg
[1].File
= PROGRAM_TEMPORARY
;
576 inst
.SrcReg
[1].Index
= tmpreg
;
580 dst
= radeonAppendInstructions(t
->Program
, 1);
585 static void insert_wpos(struct gl_program
*prog
, GLuint temp_index
, int tex_id
)
587 struct prog_instruction
*vpi
;
589 _mesa_insert_instructions(prog
, prog
->NumInstructions
- 1, 2);
591 vpi
= &prog
->Instructions
[prog
->NumInstructions
- 3];
593 vpi
->Opcode
= OPCODE_MOV
;
595 vpi
->DstReg
.File
= PROGRAM_OUTPUT
;
596 vpi
->DstReg
.Index
= VERT_RESULT_HPOS
;
597 vpi
->DstReg
.WriteMask
= WRITEMASK_XYZW
;
598 vpi
->DstReg
.CondMask
= COND_TR
;
600 vpi
->SrcReg
[0].File
= PROGRAM_TEMPORARY
;
601 vpi
->SrcReg
[0].Index
= temp_index
;
602 vpi
->SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
606 vpi
->Opcode
= OPCODE_MOV
;
608 vpi
->DstReg
.File
= PROGRAM_OUTPUT
;
609 vpi
->DstReg
.Index
= VERT_RESULT_TEX0
+ tex_id
;
610 vpi
->DstReg
.WriteMask
= WRITEMASK_XYZW
;
611 vpi
->DstReg
.CondMask
= COND_TR
;
613 vpi
->SrcReg
[0].File
= PROGRAM_TEMPORARY
;
614 vpi
->SrcReg
[0].Index
= temp_index
;
615 vpi
->SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
619 vpi
->Opcode
= OPCODE_END
;
622 static void pos_as_texcoord(struct gl_program
*prog
, int tex_id
)
624 struct prog_instruction
*vpi
;
625 GLuint tempregi
= prog
->NumTemporaries
;
627 prog
->NumTemporaries
++;
629 for (vpi
= prog
->Instructions
; vpi
->Opcode
!= OPCODE_END
; vpi
++) {
630 if (vpi
->DstReg
.File
== PROGRAM_OUTPUT
&& vpi
->DstReg
.Index
== VERT_RESULT_HPOS
) {
631 vpi
->DstReg
.File
= PROGRAM_TEMPORARY
;
632 vpi
->DstReg
.Index
= tempregi
;
636 insert_wpos(prog
, tempregi
, tex_id
);
638 prog
->OutputsWritten
|= 1 << (VERT_RESULT_TEX0
+ tex_id
);
642 * The fogcoord attribute is special in that only the first component
643 * is relevant, and the remaining components are always fixed (when read
644 * from by the fragment program) to yield an X001 pattern.
646 * We need to enforce this either in the vertex program or in the fragment
647 * program, and this code chooses not to enforce it in the vertex program.
648 * This is slightly cheaper, as long as the fragment program does not use
651 * And it seems that usually, weird swizzles are not used, so...
653 * See also the counterpart rewriting for fragment programs.
655 static void fog_as_texcoord(struct gl_program
*prog
, int tex_id
)
657 struct prog_instruction
*vpi
;
659 vpi
= prog
->Instructions
;
660 while (vpi
->Opcode
!= OPCODE_END
) {
661 if (vpi
->DstReg
.File
== PROGRAM_OUTPUT
&& vpi
->DstReg
.Index
== VERT_RESULT_FOGC
) {
662 vpi
->DstReg
.Index
= VERT_RESULT_TEX0
+ tex_id
;
663 vpi
->DstReg
.WriteMask
= WRITEMASK_X
;
669 prog
->OutputsWritten
&= ~(1 << VERT_RESULT_FOGC
);
670 prog
->OutputsWritten
|= 1 << (VERT_RESULT_TEX0
+ tex_id
);
674 #define ADD_OUTPUT(fp_attr, vp_result) \
676 if ((FpReads & (1 << (fp_attr))) && !(compiler->program->OutputsWritten & (1 << (vp_result)))) { \
677 OutputsAdded |= 1 << (vp_result); \
682 static void addArtificialOutputs(struct r300_vertex_program_compiler
* compiler
)
684 GLuint OutputsAdded
, FpReads
;
689 FpReads
= compiler
->state
.FpReads
;
691 ADD_OUTPUT(FRAG_ATTRIB_COL0
, VERT_RESULT_COL0
);
692 ADD_OUTPUT(FRAG_ATTRIB_COL1
, VERT_RESULT_COL1
);
694 for (i
= 0; i
< 7; ++i
) {
695 ADD_OUTPUT(FRAG_ATTRIB_TEX0
+ i
, VERT_RESULT_TEX0
+ i
);
698 /* Some outputs may be artificially added, to match the inputs of the fragment program.
699 * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by
700 * vertex program are undefined, so just use MOV [vertex_result], CONST[0]
703 struct prog_instruction
*inst
;
705 _mesa_insert_instructions(compiler
->program
, compiler
->program
->NumInstructions
- 1, count
);
706 inst
= &compiler
->program
->Instructions
[compiler
->program
->NumInstructions
- 1 - count
];
708 for (i
= 0; i
< VERT_RESULT_MAX
; ++i
) {
709 if (OutputsAdded
& (1 << i
)) {
710 inst
->Opcode
= OPCODE_MOV
;
712 inst
->DstReg
.File
= PROGRAM_OUTPUT
;
713 inst
->DstReg
.Index
= i
;
714 inst
->DstReg
.WriteMask
= WRITEMASK_XYZW
;
715 inst
->DstReg
.CondMask
= COND_TR
;
717 inst
->SrcReg
[0].File
= PROGRAM_CONSTANT
;
718 inst
->SrcReg
[0].Index
= 0;
719 inst
->SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
725 compiler
->program
->OutputsWritten
|= OutputsAdded
;
731 static void nqssadceInit(struct nqssadce_state
* s
)
733 struct r300_vertex_program_compiler
* compiler
= s
->UserData
;
736 fp_reads
= compiler
->state
.FpReads
;
738 if (fp_reads
& FRAG_BIT_COL0
) {
739 s
->Outputs
[VERT_RESULT_COL0
].Sourced
= WRITEMASK_XYZW
;
740 s
->Outputs
[VERT_RESULT_BFC0
].Sourced
= WRITEMASK_XYZW
;
743 if (fp_reads
& FRAG_BIT_COL1
) {
744 s
->Outputs
[VERT_RESULT_COL1
].Sourced
= WRITEMASK_XYZW
;
745 s
->Outputs
[VERT_RESULT_BFC1
].Sourced
= WRITEMASK_XYZW
;
751 for (i
= 0; i
< 8; ++i
) {
752 if (fp_reads
& FRAG_BIT_TEX(i
)) {
753 s
->Outputs
[VERT_RESULT_TEX0
+ i
].Sourced
= WRITEMASK_XYZW
;
758 s
->Outputs
[VERT_RESULT_HPOS
].Sourced
= WRITEMASK_XYZW
;
759 if (s
->Compiler
->Program
.OutputsWritten
& (1 << VERT_RESULT_PSIZ
))
760 s
->Outputs
[VERT_RESULT_PSIZ
].Sourced
= WRITEMASK_X
;
763 static GLboolean
swizzleIsNative(GLuint opcode
, struct prog_src_register reg
)
773 void r3xx_compile_vertex_program(struct r300_vertex_program_compiler
* compiler
)
775 if (compiler
->state
.WPosAttr
!= FRAG_ATTRIB_MAX
) {
776 pos_as_texcoord(compiler
->program
, compiler
->state
.WPosAttr
- FRAG_ATTRIB_TEX0
);
779 if (compiler
->state
.FogAttr
!= FRAG_ATTRIB_MAX
) {
780 fog_as_texcoord(compiler
->program
, compiler
->state
.FogAttr
- FRAG_ATTRIB_TEX0
);
783 addArtificialOutputs(compiler
);
786 struct radeon_program_transformation transformations
[] = {
787 { &r300_transform_vertex_alu
, 0 },
789 radeonLocalTransform(compiler
->program
, 1, transformations
);
792 if (compiler
->Base
.Debug
) {
793 fprintf(stderr
, "Vertex program after native rewrite:\n");
794 _mesa_print_program(compiler
->program
);
799 /* Note: This pass has to be done seperately from ALU rewrite,
800 * otherwise non-native ALU instructions with source conflits
801 * will not be treated properly.
803 struct radeon_program_transformation transformations
[] = {
804 { &transform_source_conflicts
, 0 },
806 radeonLocalTransform(compiler
->program
, 1, transformations
);
809 if (compiler
->Base
.Debug
) {
810 fprintf(stderr
, "Vertex program after source conflict resolve:\n");
811 _mesa_print_program(compiler
->program
);
815 rc_mesa_to_rc_program(&compiler
->Base
, compiler
->program
);
818 struct radeon_nqssadce_descr nqssadce
= {
819 .Init
= &nqssadceInit
,
820 .IsNativeSwizzle
= &swizzleIsNative
,
823 radeonNqssaDce(&compiler
->Base
, &nqssadce
, compiler
);
825 /* We need this step for reusing temporary registers */
826 allocate_temporary_registers(compiler
);
828 if (compiler
->Base
.Debug
) {
829 fprintf(stderr
, "Vertex program after NQSSADCE:\n");
830 rc_print_program(&compiler
->Base
.Program
);
835 translate_vertex_program(compiler
);
837 compiler
->code
->InputsRead
= compiler
->Base
.Program
.InputsRead
;
838 compiler
->code
->OutputsWritten
= compiler
->Base
.Program
.OutputsWritten
;