2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23 #include "radeon_compiler.h"
25 #include "../r300_reg.h"
27 #include "radeon_nqssadce.h"
28 #include "radeon_program.h"
29 #include "radeon_program_alu.h"
31 #include "shader/prog_optimize.h"
32 #include "shader/prog_print.h"
35 /* TODO: Get rid of t_src_class call */
36 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
37 ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
38 t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
39 (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
40 t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
43 * Take an already-setup and valid source then swizzle it appropriately to
44 * obtain a constant ZERO or ONE source.
46 #define __CONST(x, y) \
47 (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \
52 t_src_class(src[x].File), \
53 NEGATE_NONE) | (src[x].RelAddr << 4))
58 static unsigned long t_dst_mask(GLuint mask
)
60 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
61 return mask
& WRITEMASK_XYZW
;
64 static unsigned long t_dst_class(gl_register_file file
)
68 case PROGRAM_TEMPORARY
:
69 return PVS_DST_REG_TEMPORARY
;
71 return PVS_DST_REG_OUT
;
73 return PVS_DST_REG_A0
;
76 case PROGRAM_LOCAL_PARAM:
77 case PROGRAM_ENV_PARAM:
78 case PROGRAM_NAMED_PARAM:
79 case PROGRAM_STATE_VAR:
80 case PROGRAM_WRITE_ONLY:
84 fprintf(stderr
, "problem in %s", __FUNCTION__
);
90 static unsigned long t_dst_index(struct r300_vertex_program_code
*vp
,
91 struct prog_dst_register
*dst
)
93 if (dst
->File
== PROGRAM_OUTPUT
)
94 return vp
->outputs
[dst
->Index
];
99 static unsigned long t_src_class(gl_register_file file
)
102 case PROGRAM_TEMPORARY
:
103 return PVS_SRC_REG_TEMPORARY
;
105 return PVS_SRC_REG_INPUT
;
106 case PROGRAM_LOCAL_PARAM
:
107 case PROGRAM_ENV_PARAM
:
108 case PROGRAM_NAMED_PARAM
:
109 case PROGRAM_CONSTANT
:
110 case PROGRAM_STATE_VAR
:
111 return PVS_SRC_REG_CONSTANT
;
114 case PROGRAM_WRITE_ONLY:
115 case PROGRAM_ADDRESS:
118 fprintf(stderr
, "problem in %s", __FUNCTION__
);
124 static INLINE
unsigned long t_swizzle(GLubyte swizzle
)
126 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
130 static unsigned long t_src_index(struct r300_vertex_program_code
*vp
,
131 struct prog_src_register
*src
)
133 if (src
->File
== PROGRAM_INPUT
) {
134 assert(vp
->inputs
[src
->Index
] != -1);
135 return vp
->inputs
[src
->Index
];
137 if (src
->Index
< 0) {
139 "negative offsets for indirect addressing do not work.\n");
146 /* these two functions should probably be merged... */
148 static unsigned long t_src(struct r300_vertex_program_code
*vp
,
149 struct prog_src_register
*src
)
151 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
152 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
154 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
155 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
156 t_swizzle(GET_SWZ(src
->Swizzle
, 1)),
157 t_swizzle(GET_SWZ(src
->Swizzle
, 2)),
158 t_swizzle(GET_SWZ(src
->Swizzle
, 3)),
159 t_src_class(src
->File
),
160 src
->Negate
) | (src
->RelAddr
<< 4);
163 static unsigned long t_src_scalar(struct r300_vertex_program_code
*vp
,
164 struct prog_src_register
*src
)
166 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
167 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
169 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
170 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
171 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
172 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
173 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
174 t_src_class(src
->File
),
175 src
->Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
179 static GLboolean
valid_dst(struct r300_vertex_program_code
*vp
,
180 struct prog_dst_register
*dst
)
182 if (dst
->File
== PROGRAM_OUTPUT
&& vp
->outputs
[dst
->Index
] == -1) {
184 } else if (dst
->File
== PROGRAM_ADDRESS
) {
185 assert(dst
->Index
== 0);
191 static GLuint
*r300TranslateOpcodeADD(struct r300_vertex_program_code
*vp
,
192 struct prog_instruction
*vpi
,
194 struct prog_src_register src
[3])
196 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
199 t_dst_index(vp
, &vpi
->DstReg
),
200 t_dst_mask(vpi
->DstReg
.WriteMask
),
201 t_dst_class(vpi
->DstReg
.File
));
202 inst
[1] = t_src(vp
, &src
[0]);
203 inst
[2] = t_src(vp
, &src
[1]);
204 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
209 static GLuint
*r300TranslateOpcodeARL(struct r300_vertex_program_code
*vp
,
210 struct prog_instruction
*vpi
,
212 struct prog_src_register src
[3])
214 inst
[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX
,
217 t_dst_index(vp
, &vpi
->DstReg
),
218 t_dst_mask(vpi
->DstReg
.WriteMask
),
219 t_dst_class(vpi
->DstReg
.File
));
220 inst
[1] = t_src(vp
, &src
[0]);
221 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
222 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
227 static GLuint
*r300TranslateOpcodeDP4(struct r300_vertex_program_code
*vp
,
228 struct prog_instruction
*vpi
,
230 struct prog_src_register src
[3])
232 inst
[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT
,
235 t_dst_index(vp
, &vpi
->DstReg
),
236 t_dst_mask(vpi
->DstReg
.WriteMask
),
237 t_dst_class(vpi
->DstReg
.File
));
238 inst
[1] = t_src(vp
, &src
[0]);
239 inst
[2] = t_src(vp
, &src
[1]);
240 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
245 static GLuint
*r300TranslateOpcodeDST(struct r300_vertex_program_code
*vp
,
246 struct prog_instruction
*vpi
,
248 struct prog_src_register src
[3])
250 inst
[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR
,
253 t_dst_index(vp
, &vpi
->DstReg
),
254 t_dst_mask(vpi
->DstReg
.WriteMask
),
255 t_dst_class(vpi
->DstReg
.File
));
256 inst
[1] = t_src(vp
, &src
[0]);
257 inst
[2] = t_src(vp
, &src
[1]);
258 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
263 static GLuint
*r300TranslateOpcodeEX2(struct r300_vertex_program_code
*vp
,
264 struct prog_instruction
*vpi
,
266 struct prog_src_register src
[3])
268 inst
[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX
,
271 t_dst_index(vp
, &vpi
->DstReg
),
272 t_dst_mask(vpi
->DstReg
.WriteMask
),
273 t_dst_class(vpi
->DstReg
.File
));
274 inst
[1] = t_src_scalar(vp
, &src
[0]);
275 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
276 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
281 static GLuint
*r300TranslateOpcodeEXP(struct r300_vertex_program_code
*vp
,
282 struct prog_instruction
*vpi
,
284 struct prog_src_register src
[3])
286 inst
[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX
,
289 t_dst_index(vp
, &vpi
->DstReg
),
290 t_dst_mask(vpi
->DstReg
.WriteMask
),
291 t_dst_class(vpi
->DstReg
.File
));
292 inst
[1] = t_src_scalar(vp
, &src
[0]);
293 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
294 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
299 static GLuint
*r300TranslateOpcodeFRC(struct r300_vertex_program_code
*vp
,
300 struct prog_instruction
*vpi
,
302 struct prog_src_register src
[3])
304 inst
[0] = PVS_OP_DST_OPERAND(VE_FRACTION
,
307 t_dst_index(vp
, &vpi
->DstReg
),
308 t_dst_mask(vpi
->DstReg
.WriteMask
),
309 t_dst_class(vpi
->DstReg
.File
));
310 inst
[1] = t_src(vp
, &src
[0]);
311 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
312 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
317 static GLuint
*r300TranslateOpcodeLG2(struct r300_vertex_program_code
*vp
,
318 struct prog_instruction
*vpi
,
320 struct prog_src_register src
[3])
322 // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
324 inst
[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX
,
327 t_dst_index(vp
, &vpi
->DstReg
),
328 t_dst_mask(vpi
->DstReg
.WriteMask
),
329 t_dst_class(vpi
->DstReg
.File
));
330 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
331 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
332 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
333 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
334 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
335 t_src_class(src
[0].File
),
336 src
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
337 (src
[0].RelAddr
<< 4);
338 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
339 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
344 static GLuint
*r300TranslateOpcodeLIT(struct r300_vertex_program_code
*vp
,
345 struct prog_instruction
*vpi
,
347 struct prog_src_register src
[3])
349 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
351 inst
[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX
,
354 t_dst_index(vp
, &vpi
->DstReg
),
355 t_dst_mask(vpi
->DstReg
.WriteMask
),
356 t_dst_class(vpi
->DstReg
.File
));
357 /* NOTE: Users swizzling might not work. */
358 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
359 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
360 PVS_SRC_SELECT_FORCE_0
, // Z
361 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
362 t_src_class(src
[0].File
),
363 src
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
364 (src
[0].RelAddr
<< 4);
365 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
366 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
367 PVS_SRC_SELECT_FORCE_0
, // Z
368 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
369 t_src_class(src
[0].File
),
370 src
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
371 (src
[0].RelAddr
<< 4);
372 inst
[3] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
373 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
374 PVS_SRC_SELECT_FORCE_0
, // Z
375 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
376 t_src_class(src
[0].File
),
377 src
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
378 (src
[0].RelAddr
<< 4);
383 static GLuint
*r300TranslateOpcodeLOG(struct r300_vertex_program_code
*vp
,
384 struct prog_instruction
*vpi
,
386 struct prog_src_register src
[3])
388 inst
[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX
,
391 t_dst_index(vp
, &vpi
->DstReg
),
392 t_dst_mask(vpi
->DstReg
.WriteMask
),
393 t_dst_class(vpi
->DstReg
.File
));
394 inst
[1] = t_src_scalar(vp
, &src
[0]);
395 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
396 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
401 static GLuint
*r300TranslateOpcodeMAD(struct r300_vertex_program_code
*vp
,
402 struct prog_instruction
*vpi
,
404 struct prog_src_register src
[3])
406 inst
[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD
,
409 t_dst_index(vp
, &vpi
->DstReg
),
410 t_dst_mask(vpi
->DstReg
.WriteMask
),
411 t_dst_class(vpi
->DstReg
.File
));
412 inst
[1] = t_src(vp
, &src
[0]);
413 inst
[2] = t_src(vp
, &src
[1]);
414 inst
[3] = t_src(vp
, &src
[2]);
419 static GLuint
*r300TranslateOpcodeMAX(struct r300_vertex_program_code
*vp
,
420 struct prog_instruction
*vpi
,
422 struct prog_src_register src
[3])
424 inst
[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM
,
427 t_dst_index(vp
, &vpi
->DstReg
),
428 t_dst_mask(vpi
->DstReg
.WriteMask
),
429 t_dst_class(vpi
->DstReg
.File
));
430 inst
[1] = t_src(vp
, &src
[0]);
431 inst
[2] = t_src(vp
, &src
[1]);
432 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
437 static GLuint
*r300TranslateOpcodeMIN(struct r300_vertex_program_code
*vp
,
438 struct prog_instruction
*vpi
,
440 struct prog_src_register src
[3])
442 inst
[0] = PVS_OP_DST_OPERAND(VE_MINIMUM
,
445 t_dst_index(vp
, &vpi
->DstReg
),
446 t_dst_mask(vpi
->DstReg
.WriteMask
),
447 t_dst_class(vpi
->DstReg
.File
));
448 inst
[1] = t_src(vp
, &src
[0]);
449 inst
[2] = t_src(vp
, &src
[1]);
450 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
455 static GLuint
*r300TranslateOpcodeMOV(struct r300_vertex_program_code
*vp
,
456 struct prog_instruction
*vpi
,
458 struct prog_src_register src
[3])
460 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
462 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
465 t_dst_index(vp
, &vpi
->DstReg
),
466 t_dst_mask(vpi
->DstReg
.WriteMask
),
467 t_dst_class(vpi
->DstReg
.File
));
468 inst
[1] = t_src(vp
, &src
[0]);
469 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
470 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
475 static GLuint
*r300TranslateOpcodeMUL(struct r300_vertex_program_code
*vp
,
476 struct prog_instruction
*vpi
,
478 struct prog_src_register src
[3])
480 inst
[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY
,
483 t_dst_index(vp
, &vpi
->DstReg
),
484 t_dst_mask(vpi
->DstReg
.WriteMask
),
485 t_dst_class(vpi
->DstReg
.File
));
486 inst
[1] = t_src(vp
, &src
[0]);
487 inst
[2] = t_src(vp
, &src
[1]);
488 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
493 static GLuint
*r300TranslateOpcodePOW(struct r300_vertex_program_code
*vp
,
494 struct prog_instruction
*vpi
,
496 struct prog_src_register src
[3])
498 inst
[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF
,
501 t_dst_index(vp
, &vpi
->DstReg
),
502 t_dst_mask(vpi
->DstReg
.WriteMask
),
503 t_dst_class(vpi
->DstReg
.File
));
504 inst
[1] = t_src_scalar(vp
, &src
[0]);
505 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
506 inst
[3] = t_src_scalar(vp
, &src
[1]);
511 static GLuint
*r300TranslateOpcodeRCP(struct r300_vertex_program_code
*vp
,
512 struct prog_instruction
*vpi
,
514 struct prog_src_register src
[3])
516 inst
[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX
,
519 t_dst_index(vp
, &vpi
->DstReg
),
520 t_dst_mask(vpi
->DstReg
.WriteMask
),
521 t_dst_class(vpi
->DstReg
.File
));
522 inst
[1] = t_src_scalar(vp
, &src
[0]);
523 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
524 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
529 static GLuint
*r300TranslateOpcodeRSQ(struct r300_vertex_program_code
*vp
,
530 struct prog_instruction
*vpi
,
532 struct prog_src_register src
[3])
534 inst
[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX
,
537 t_dst_index(vp
, &vpi
->DstReg
),
538 t_dst_mask(vpi
->DstReg
.WriteMask
),
539 t_dst_class(vpi
->DstReg
.File
));
540 inst
[1] = t_src_scalar(vp
, &src
[0]);
541 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
542 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
547 static GLuint
*r300TranslateOpcodeSGE(struct r300_vertex_program_code
*vp
,
548 struct prog_instruction
*vpi
,
550 struct prog_src_register src
[3])
552 inst
[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL
,
555 t_dst_index(vp
, &vpi
->DstReg
),
556 t_dst_mask(vpi
->DstReg
.WriteMask
),
557 t_dst_class(vpi
->DstReg
.File
));
558 inst
[1] = t_src(vp
, &src
[0]);
559 inst
[2] = t_src(vp
, &src
[1]);
560 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
565 static GLuint
*r300TranslateOpcodeSLT(struct r300_vertex_program_code
*vp
,
566 struct prog_instruction
*vpi
,
568 struct prog_src_register src
[3])
570 inst
[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN
,
573 t_dst_index(vp
, &vpi
->DstReg
),
574 t_dst_mask(vpi
->DstReg
.WriteMask
),
575 t_dst_class(vpi
->DstReg
.File
));
576 inst
[1] = t_src(vp
, &src
[0]);
577 inst
[2] = t_src(vp
, &src
[1]);
578 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
583 static void t_inputs_outputs(struct r300_vertex_program_code
*vp
, struct gl_program
* glvp
)
587 GLuint OutputsWritten
, InputsRead
;
589 OutputsWritten
= glvp
->OutputsWritten
;
590 InputsRead
= glvp
->InputsRead
;
593 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++) {
594 if (InputsRead
& (1 << i
))
595 vp
->inputs
[i
] = ++cur_reg
;
601 for (i
= 0; i
< VERT_RESULT_MAX
; i
++)
604 assert(OutputsWritten
& (1 << VERT_RESULT_HPOS
));
606 if (OutputsWritten
& (1 << VERT_RESULT_HPOS
)) {
607 vp
->outputs
[VERT_RESULT_HPOS
] = cur_reg
++;
610 if (OutputsWritten
& (1 << VERT_RESULT_PSIZ
)) {
611 vp
->outputs
[VERT_RESULT_PSIZ
] = cur_reg
++;
614 /* If we're writing back facing colors we need to send
615 * four colors to make front/back face colors selection work.
616 * If the vertex program doesn't write all 4 colors, lets
617 * pretend it does by skipping output index reg so the colors
618 * get written into appropriate output vectors.
620 if (OutputsWritten
& (1 << VERT_RESULT_COL0
)) {
621 vp
->outputs
[VERT_RESULT_COL0
] = cur_reg
++;
622 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC0
) ||
623 OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
627 if (OutputsWritten
& (1 << VERT_RESULT_COL1
)) {
628 vp
->outputs
[VERT_RESULT_COL1
] = cur_reg
++;
629 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC0
) ||
630 OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
634 if (OutputsWritten
& (1 << VERT_RESULT_BFC0
)) {
635 vp
->outputs
[VERT_RESULT_BFC0
] = cur_reg
++;
636 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
640 if (OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
641 vp
->outputs
[VERT_RESULT_BFC1
] = cur_reg
++;
642 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC0
)) {
646 for (i
= VERT_RESULT_TEX0
; i
<= VERT_RESULT_TEX7
; i
++) {
647 if (OutputsWritten
& (1 << i
)) {
648 vp
->outputs
[i
] = cur_reg
++;
652 if (OutputsWritten
& (1 << VERT_RESULT_FOGC
)) {
653 vp
->outputs
[VERT_RESULT_FOGC
] = cur_reg
++;
657 static GLboolean
translate_vertex_program(struct r300_vertex_program_compiler
* compiler
)
659 struct prog_instruction
*vpi
= compiler
->program
->Instructions
;
662 unsigned long num_operands
;
663 /* Initial value should be last tmp reg that hw supports.
664 Strangely enough r300 doesnt mind even though these would be out of range.
665 Smart enough to realize that it doesnt need it? */
666 int u_temp_i
= VSF_MAX_FRAGMENT_TEMPS
- 1;
667 struct prog_src_register src
[3];
668 struct r300_vertex_program_code
* vp
= compiler
->code
;
670 compiler
->code
->pos_end
= 0; /* Not supported yet */
671 compiler
->code
->length
= 0;
673 t_inputs_outputs(compiler
->code
, compiler
->program
);
675 for (inst
= compiler
->code
->body
.d
; vpi
->Opcode
!= OPCODE_END
;
679 int u_temp_used
= (VSF_MAX_FRAGMENT_TEMPS
- 1) - u_temp_i
;
680 if((compiler
->code
->num_temporaries
+ u_temp_used
) > VSF_MAX_FRAGMENT_TEMPS
) {
681 fprintf(stderr
, "Ran out of temps, num temps %d, us %d\n", compiler
->code
->num_temporaries
, u_temp_used
);
684 u_temp_i
=VSF_MAX_FRAGMENT_TEMPS
-1;
687 if (!valid_dst(compiler
->code
, &vpi
->DstReg
)) {
688 /* redirect result to unused temp */
689 vpi
->DstReg
.File
= PROGRAM_TEMPORARY
;
690 vpi
->DstReg
.Index
= u_temp_i
;
693 num_operands
= _mesa_num_inst_src_regs(vpi
->Opcode
);
695 /* copy the sources (src) from mesa into a local variable... is this needed? */
696 for (i
= 0; i
< num_operands
; i
++) {
697 src
[i
] = vpi
->SrcReg
[i
];
700 if (num_operands
== 3) { /* TODO: scalars */
701 if (CMP_SRCS(src
[1], src
[2])
702 || CMP_SRCS(src
[0], src
[2])) {
703 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
708 PVS_DST_REG_TEMPORARY
);
710 PVS_SRC_OPERAND(t_src_index(compiler
->code
, &src
[2]),
715 t_src_class(src
[2].File
),
716 NEGATE_NONE
) | (src
[2].
719 inst
[2] = __CONST(2, SWIZZLE_ZERO
);
720 inst
[3] = __CONST(2, SWIZZLE_ZERO
);
723 src
[2].File
= PROGRAM_TEMPORARY
;
724 src
[2].Index
= u_temp_i
;
730 if (num_operands
>= 2) {
731 if (CMP_SRCS(src
[1], src
[0])) {
732 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
737 PVS_DST_REG_TEMPORARY
);
739 PVS_SRC_OPERAND(t_src_index(compiler
->code
, &src
[0]),
744 t_src_class(src
[0].File
),
745 NEGATE_NONE
) | (src
[0].
748 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
749 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
752 src
[0].File
= PROGRAM_TEMPORARY
;
753 src
[0].Index
= u_temp_i
;
759 switch (vpi
->Opcode
) {
761 inst
= r300TranslateOpcodeADD(compiler
->code
, vpi
, inst
, src
);
764 inst
= r300TranslateOpcodeARL(compiler
->code
, vpi
, inst
, src
);
767 inst
= r300TranslateOpcodeDP4(compiler
->code
, vpi
, inst
, src
);
770 inst
= r300TranslateOpcodeDST(compiler
->code
, vpi
, inst
, src
);
773 inst
= r300TranslateOpcodeEX2(compiler
->code
, vpi
, inst
, src
);
776 inst
= r300TranslateOpcodeEXP(compiler
->code
, vpi
, inst
, src
);
779 inst
= r300TranslateOpcodeFRC(compiler
->code
, vpi
, inst
, src
);
782 inst
= r300TranslateOpcodeLG2(compiler
->code
, vpi
, inst
, src
);
785 inst
= r300TranslateOpcodeLIT(compiler
->code
, vpi
, inst
, src
);
788 inst
= r300TranslateOpcodeLOG(compiler
->code
, vpi
, inst
, src
);
791 inst
= r300TranslateOpcodeMAD(compiler
->code
, vpi
, inst
, src
);
794 inst
= r300TranslateOpcodeMAX(compiler
->code
, vpi
, inst
, src
);
797 inst
= r300TranslateOpcodeMIN(compiler
->code
, vpi
, inst
, src
);
800 inst
= r300TranslateOpcodeMOV(compiler
->code
, vpi
, inst
, src
);
803 inst
= r300TranslateOpcodeMUL(compiler
->code
, vpi
, inst
, src
);
806 inst
= r300TranslateOpcodePOW(compiler
->code
, vpi
, inst
, src
);
809 inst
= r300TranslateOpcodeRCP(compiler
->code
, vpi
, inst
, src
);
812 inst
= r300TranslateOpcodeRSQ(compiler
->code
, vpi
, inst
, src
);
815 inst
= r300TranslateOpcodeSGE(compiler
->code
, vpi
, inst
, src
);
818 inst
= r300TranslateOpcodeSLT(compiler
->code
, vpi
, inst
, src
);
825 compiler
->code
->length
= (inst
- compiler
->code
->body
.d
);
826 if (compiler
->code
->length
>= VSF_MAX_FRAGMENT_LENGTH
) {
833 static void insert_wpos(struct gl_program
*prog
, GLuint temp_index
, int tex_id
)
835 struct prog_instruction
*vpi
;
837 _mesa_insert_instructions(prog
, prog
->NumInstructions
- 1, 2);
839 vpi
= &prog
->Instructions
[prog
->NumInstructions
- 3];
841 vpi
->Opcode
= OPCODE_MOV
;
843 vpi
->DstReg
.File
= PROGRAM_OUTPUT
;
844 vpi
->DstReg
.Index
= VERT_RESULT_HPOS
;
845 vpi
->DstReg
.WriteMask
= WRITEMASK_XYZW
;
846 vpi
->DstReg
.CondMask
= COND_TR
;
848 vpi
->SrcReg
[0].File
= PROGRAM_TEMPORARY
;
849 vpi
->SrcReg
[0].Index
= temp_index
;
850 vpi
->SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
854 vpi
->Opcode
= OPCODE_MOV
;
856 vpi
->DstReg
.File
= PROGRAM_OUTPUT
;
857 vpi
->DstReg
.Index
= VERT_RESULT_TEX0
+ tex_id
;
858 vpi
->DstReg
.WriteMask
= WRITEMASK_XYZW
;
859 vpi
->DstReg
.CondMask
= COND_TR
;
861 vpi
->SrcReg
[0].File
= PROGRAM_TEMPORARY
;
862 vpi
->SrcReg
[0].Index
= temp_index
;
863 vpi
->SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
867 vpi
->Opcode
= OPCODE_END
;
870 static void pos_as_texcoord(struct gl_program
*prog
, int tex_id
)
872 struct prog_instruction
*vpi
;
873 GLuint tempregi
= prog
->NumTemporaries
;
875 prog
->NumTemporaries
++;
877 for (vpi
= prog
->Instructions
; vpi
->Opcode
!= OPCODE_END
; vpi
++) {
878 if (vpi
->DstReg
.File
== PROGRAM_OUTPUT
&& vpi
->DstReg
.Index
== VERT_RESULT_HPOS
) {
879 vpi
->DstReg
.File
= PROGRAM_TEMPORARY
;
880 vpi
->DstReg
.Index
= tempregi
;
884 insert_wpos(prog
, tempregi
, tex_id
);
886 prog
->OutputsWritten
|= 1 << (VERT_RESULT_TEX0
+ tex_id
);
890 * The fogcoord attribute is special in that only the first component
891 * is relevant, and the remaining components are always fixed (when read
892 * from by the fragment program) to yield an X001 pattern.
894 * We need to enforce this either in the vertex program or in the fragment
895 * program, and this code chooses not to enforce it in the vertex program.
896 * This is slightly cheaper, as long as the fragment program does not use
899 * And it seems that usually, weird swizzles are not used, so...
901 * See also the counterpart rewriting for fragment programs.
903 static void fog_as_texcoord(struct gl_program
*prog
, int tex_id
)
905 struct prog_instruction
*vpi
;
907 vpi
= prog
->Instructions
;
908 while (vpi
->Opcode
!= OPCODE_END
) {
909 if (vpi
->DstReg
.File
== PROGRAM_OUTPUT
&& vpi
->DstReg
.Index
== VERT_RESULT_FOGC
) {
910 vpi
->DstReg
.Index
= VERT_RESULT_TEX0
+ tex_id
;
911 vpi
->DstReg
.WriteMask
= WRITEMASK_X
;
917 prog
->OutputsWritten
&= ~(1 << VERT_RESULT_FOGC
);
918 prog
->OutputsWritten
|= 1 << (VERT_RESULT_TEX0
+ tex_id
);
922 #define ADD_OUTPUT(fp_attr, vp_result) \
924 if ((FpReads & (1 << (fp_attr))) && !(compiler->program->OutputsWritten & (1 << (vp_result)))) { \
925 OutputsAdded |= 1 << (vp_result); \
930 static void addArtificialOutputs(struct r300_vertex_program_compiler
* compiler
)
932 GLuint OutputsAdded
, FpReads
;
937 FpReads
= compiler
->state
.FpReads
;
939 ADD_OUTPUT(FRAG_ATTRIB_COL0
, VERT_RESULT_COL0
);
940 ADD_OUTPUT(FRAG_ATTRIB_COL1
, VERT_RESULT_COL1
);
942 for (i
= 0; i
< 7; ++i
) {
943 ADD_OUTPUT(FRAG_ATTRIB_TEX0
+ i
, VERT_RESULT_TEX0
+ i
);
946 /* Some outputs may be artificially added, to match the inputs of the fragment program.
947 * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by
948 * vertex program are undefined, so just use MOV [vertex_result], CONST[0]
951 struct prog_instruction
*inst
;
953 _mesa_insert_instructions(compiler
->program
, compiler
->program
->NumInstructions
- 1, count
);
954 inst
= &compiler
->program
->Instructions
[compiler
->program
->NumInstructions
- 1 - count
];
956 for (i
= 0; i
< VERT_RESULT_MAX
; ++i
) {
957 if (OutputsAdded
& (1 << i
)) {
958 inst
->Opcode
= OPCODE_MOV
;
960 inst
->DstReg
.File
= PROGRAM_OUTPUT
;
961 inst
->DstReg
.Index
= i
;
962 inst
->DstReg
.WriteMask
= WRITEMASK_XYZW
;
963 inst
->DstReg
.CondMask
= COND_TR
;
965 inst
->SrcReg
[0].File
= PROGRAM_CONSTANT
;
966 inst
->SrcReg
[0].Index
= 0;
967 inst
->SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
973 compiler
->program
->OutputsWritten
|= OutputsAdded
;
979 static void nqssadceInit(struct nqssadce_state
* s
)
981 struct r300_vertex_program_compiler
* compiler
= s
->UserData
;
984 fp_reads
= compiler
->state
.FpReads
;
986 if (fp_reads
& FRAG_BIT_COL0
) {
987 s
->Outputs
[VERT_RESULT_COL0
].Sourced
= WRITEMASK_XYZW
;
988 s
->Outputs
[VERT_RESULT_BFC0
].Sourced
= WRITEMASK_XYZW
;
991 if (fp_reads
& FRAG_BIT_COL1
) {
992 s
->Outputs
[VERT_RESULT_COL1
].Sourced
= WRITEMASK_XYZW
;
993 s
->Outputs
[VERT_RESULT_BFC1
].Sourced
= WRITEMASK_XYZW
;
999 for (i
= 0; i
< 8; ++i
) {
1000 if (fp_reads
& FRAG_BIT_TEX(i
)) {
1001 s
->Outputs
[VERT_RESULT_TEX0
+ i
].Sourced
= WRITEMASK_XYZW
;
1006 s
->Outputs
[VERT_RESULT_HPOS
].Sourced
= WRITEMASK_XYZW
;
1007 if (s
->Program
->OutputsWritten
& (1 << VERT_RESULT_PSIZ
))
1008 s
->Outputs
[VERT_RESULT_PSIZ
].Sourced
= WRITEMASK_X
;
1011 static GLboolean
swizzleIsNative(GLuint opcode
, struct prog_src_register reg
)
1021 GLboolean
r3xx_compile_vertex_program(struct r300_vertex_program_compiler
* compiler
, GLcontext
* ctx
)
1025 if (compiler
->state
.WPosAttr
!= FRAG_ATTRIB_MAX
) {
1026 pos_as_texcoord(compiler
->program
, compiler
->state
.WPosAttr
- FRAG_ATTRIB_TEX0
);
1029 if (compiler
->state
.FogAttr
!= FRAG_ATTRIB_MAX
) {
1030 fog_as_texcoord(compiler
->program
, compiler
->state
.FogAttr
- FRAG_ATTRIB_TEX0
);
1033 addArtificialOutputs(compiler
);
1036 struct radeon_program_transformation transformations
[] = {
1037 { &r300_transform_vertex_alu
, 0 },
1039 radeonLocalTransform(compiler
->program
, 1, transformations
);
1042 if (compiler
->Base
.Debug
) {
1043 fprintf(stderr
, "Vertex program after native rewrite:\n");
1044 _mesa_print_program(compiler
->program
);
1049 struct radeon_nqssadce_descr nqssadce
= {
1050 .Init
= &nqssadceInit
,
1051 .IsNativeSwizzle
= &swizzleIsNative
,
1052 .BuildSwizzle
= NULL
1054 radeonNqssaDce(compiler
->program
, &nqssadce
, compiler
);
1056 /* We need this step for reusing temporary registers */
1057 _mesa_optimize_program(ctx
, compiler
->program
);
1059 if (compiler
->Base
.Debug
) {
1060 fprintf(stderr
, "Vertex program after NQSSADCE:\n");
1061 _mesa_print_program(compiler
->program
);
1066 assert(compiler
->program
->NumInstructions
);
1068 struct prog_instruction
*inst
;
1071 inst
= compiler
->program
->Instructions
;
1073 while (inst
->Opcode
!= OPCODE_END
) {
1074 tmp
= _mesa_num_inst_src_regs(inst
->Opcode
);
1075 for (i
= 0; i
< tmp
; ++i
) {
1076 if (inst
->SrcReg
[i
].File
== PROGRAM_TEMPORARY
) {
1077 if ((int) inst
->SrcReg
[i
].Index
> max
) {
1078 max
= inst
->SrcReg
[i
].Index
;
1083 if (_mesa_num_inst_dst_regs(inst
->Opcode
)) {
1084 if (inst
->DstReg
.File
== PROGRAM_TEMPORARY
) {
1085 if ((int) inst
->DstReg
.Index
> max
) {
1086 max
= inst
->DstReg
.Index
;
1093 /* We actually want highest index of used temporary register,
1094 * not the number of temporaries used.
1095 * These values aren't always the same.
1097 compiler
->code
->num_temporaries
= max
+ 1;
1100 success
= translate_vertex_program(compiler
);
1102 compiler
->code
->InputsRead
= compiler
->program
->InputsRead
;
1103 compiler
->code
->OutputsWritten
= compiler
->program
->OutputsWritten
;