2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23 #include "radeon_compiler.h"
25 #include "../r300_reg.h"
27 #include "radeon_nqssadce.h"
29 #include "shader/prog_optimize.h"
30 #include "shader/prog_print.h"
33 /* TODO: Get rid of t_src_class call */
34 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
35 ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
36 t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
37 (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
38 t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
41 * Take an already-setup and valid source then swizzle it appropriately to
42 * obtain a constant ZERO or ONE source.
44 #define __CONST(x, y) \
45 (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \
50 t_src_class(src[x].File), \
51 NEGATE_NONE) | (src[x].RelAddr << 4))
56 static unsigned long t_dst_mask(GLuint mask
)
58 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
59 return mask
& WRITEMASK_XYZW
;
62 static unsigned long t_dst_class(gl_register_file file
)
66 case PROGRAM_TEMPORARY
:
67 return PVS_DST_REG_TEMPORARY
;
69 return PVS_DST_REG_OUT
;
71 return PVS_DST_REG_A0
;
74 case PROGRAM_LOCAL_PARAM:
75 case PROGRAM_ENV_PARAM:
76 case PROGRAM_NAMED_PARAM:
77 case PROGRAM_STATE_VAR:
78 case PROGRAM_WRITE_ONLY:
82 fprintf(stderr
, "problem in %s", __FUNCTION__
);
88 static unsigned long t_dst_index(struct r300_vertex_program_code
*vp
,
89 struct prog_dst_register
*dst
)
91 if (dst
->File
== PROGRAM_OUTPUT
)
92 return vp
->outputs
[dst
->Index
];
97 static unsigned long t_src_class(gl_register_file file
)
100 case PROGRAM_TEMPORARY
:
101 return PVS_SRC_REG_TEMPORARY
;
103 return PVS_SRC_REG_INPUT
;
104 case PROGRAM_LOCAL_PARAM
:
105 case PROGRAM_ENV_PARAM
:
106 case PROGRAM_NAMED_PARAM
:
107 case PROGRAM_CONSTANT
:
108 case PROGRAM_STATE_VAR
:
109 return PVS_SRC_REG_CONSTANT
;
112 case PROGRAM_WRITE_ONLY:
113 case PROGRAM_ADDRESS:
116 fprintf(stderr
, "problem in %s", __FUNCTION__
);
122 static INLINE
unsigned long t_swizzle(GLubyte swizzle
)
124 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
128 static unsigned long t_src_index(struct r300_vertex_program_code
*vp
,
129 struct prog_src_register
*src
)
131 if (src
->File
== PROGRAM_INPUT
) {
132 assert(vp
->inputs
[src
->Index
] != -1);
133 return vp
->inputs
[src
->Index
];
135 if (src
->Index
< 0) {
137 "negative offsets for indirect addressing do not work.\n");
144 /* these two functions should probably be merged... */
146 static unsigned long t_src(struct r300_vertex_program_code
*vp
,
147 struct prog_src_register
*src
)
149 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
150 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
152 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
153 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
154 t_swizzle(GET_SWZ(src
->Swizzle
, 1)),
155 t_swizzle(GET_SWZ(src
->Swizzle
, 2)),
156 t_swizzle(GET_SWZ(src
->Swizzle
, 3)),
157 t_src_class(src
->File
),
158 src
->Negate
) | (src
->RelAddr
<< 4);
161 static unsigned long t_src_scalar(struct r300_vertex_program_code
*vp
,
162 struct prog_src_register
*src
)
164 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
165 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
167 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
168 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
169 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
170 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
171 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
172 t_src_class(src
->File
),
173 src
->Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
177 static GLboolean
valid_dst(struct r300_vertex_program_code
*vp
,
178 struct prog_dst_register
*dst
)
180 if (dst
->File
== PROGRAM_OUTPUT
&& vp
->outputs
[dst
->Index
] == -1) {
182 } else if (dst
->File
== PROGRAM_ADDRESS
) {
183 assert(dst
->Index
== 0);
189 static GLuint
*r300TranslateOpcodeABS(struct r300_vertex_program_code
*vp
,
190 struct prog_instruction
*vpi
,
192 struct prog_src_register src
[3])
194 //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
196 inst
[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM
,
199 t_dst_index(vp
, &vpi
->DstReg
),
200 t_dst_mask(vpi
->DstReg
.WriteMask
),
201 t_dst_class(vpi
->DstReg
.File
));
202 inst
[1] = t_src(vp
, &src
[0]);
203 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
204 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
205 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
206 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
207 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)),
208 t_src_class(src
[0].File
),
210 Negate
) ? NEGATE_XYZW
: NEGATE_NONE
) |
211 (src
[0].RelAddr
<< 4);
217 static GLuint
*r300TranslateOpcodeADD(struct r300_vertex_program_code
*vp
,
218 struct prog_instruction
*vpi
,
220 struct prog_src_register src
[3])
222 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
225 t_dst_index(vp
, &vpi
->DstReg
),
226 t_dst_mask(vpi
->DstReg
.WriteMask
),
227 t_dst_class(vpi
->DstReg
.File
));
228 inst
[1] = t_src(vp
, &src
[0]);
229 inst
[2] = t_src(vp
, &src
[1]);
230 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
235 static GLuint
*r300TranslateOpcodeARL(struct r300_vertex_program_code
*vp
,
236 struct prog_instruction
*vpi
,
238 struct prog_src_register src
[3])
240 inst
[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX
,
243 t_dst_index(vp
, &vpi
->DstReg
),
244 t_dst_mask(vpi
->DstReg
.WriteMask
),
245 t_dst_class(vpi
->DstReg
.File
));
246 inst
[1] = t_src(vp
, &src
[0]);
247 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
248 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
253 static GLuint
*r300TranslateOpcodeDP3(struct r300_vertex_program_code
*vp
,
254 struct prog_instruction
*vpi
,
256 struct prog_src_register src
[3])
258 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
260 inst
[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT
,
263 t_dst_index(vp
, &vpi
->DstReg
),
264 t_dst_mask(vpi
->DstReg
.WriteMask
),
265 t_dst_class(vpi
->DstReg
.File
));
266 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
267 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
268 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
269 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
271 t_src_class(src
[0].File
),
272 src
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
273 (src
[0].RelAddr
<< 4);
275 PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]),
276 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
277 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
278 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), SWIZZLE_ZERO
,
279 t_src_class(src
[1].File
),
280 src
[1].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
281 (src
[1].RelAddr
<< 4);
282 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
287 static GLuint
*r300TranslateOpcodeDP4(struct r300_vertex_program_code
*vp
,
288 struct prog_instruction
*vpi
,
290 struct prog_src_register src
[3])
292 inst
[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT
,
295 t_dst_index(vp
, &vpi
->DstReg
),
296 t_dst_mask(vpi
->DstReg
.WriteMask
),
297 t_dst_class(vpi
->DstReg
.File
));
298 inst
[1] = t_src(vp
, &src
[0]);
299 inst
[2] = t_src(vp
, &src
[1]);
300 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
305 static GLuint
*r300TranslateOpcodeDPH(struct r300_vertex_program_code
*vp
,
306 struct prog_instruction
*vpi
,
308 struct prog_src_register src
[3])
310 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
311 inst
[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT
,
314 t_dst_index(vp
, &vpi
->DstReg
),
315 t_dst_mask(vpi
->DstReg
.WriteMask
),
316 t_dst_class(vpi
->DstReg
.File
));
317 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
318 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
319 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
320 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
321 PVS_SRC_SELECT_FORCE_1
,
322 t_src_class(src
[0].File
),
323 src
[0].Negate
? NEGATE_XYZ
: NEGATE_NONE
) |
324 (src
[0].RelAddr
<< 4);
325 inst
[2] = t_src(vp
, &src
[1]);
326 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
331 static GLuint
*r300TranslateOpcodeDST(struct r300_vertex_program_code
*vp
,
332 struct prog_instruction
*vpi
,
334 struct prog_src_register src
[3])
336 inst
[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR
,
339 t_dst_index(vp
, &vpi
->DstReg
),
340 t_dst_mask(vpi
->DstReg
.WriteMask
),
341 t_dst_class(vpi
->DstReg
.File
));
342 inst
[1] = t_src(vp
, &src
[0]);
343 inst
[2] = t_src(vp
, &src
[1]);
344 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
349 static GLuint
*r300TranslateOpcodeEX2(struct r300_vertex_program_code
*vp
,
350 struct prog_instruction
*vpi
,
352 struct prog_src_register src
[3])
354 inst
[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX
,
357 t_dst_index(vp
, &vpi
->DstReg
),
358 t_dst_mask(vpi
->DstReg
.WriteMask
),
359 t_dst_class(vpi
->DstReg
.File
));
360 inst
[1] = t_src_scalar(vp
, &src
[0]);
361 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
362 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
367 static GLuint
*r300TranslateOpcodeEXP(struct r300_vertex_program_code
*vp
,
368 struct prog_instruction
*vpi
,
370 struct prog_src_register src
[3])
372 inst
[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX
,
375 t_dst_index(vp
, &vpi
->DstReg
),
376 t_dst_mask(vpi
->DstReg
.WriteMask
),
377 t_dst_class(vpi
->DstReg
.File
));
378 inst
[1] = t_src_scalar(vp
, &src
[0]);
379 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
380 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
385 static GLuint
*r300TranslateOpcodeFLR(struct r300_vertex_program_code
*vp
,
386 struct prog_instruction
*vpi
,
388 struct prog_src_register src
[3],
391 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
392 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
394 inst
[0] = PVS_OP_DST_OPERAND(VE_FRACTION
,
398 t_dst_mask(vpi
->DstReg
.WriteMask
),
399 PVS_DST_REG_TEMPORARY
);
400 inst
[1] = t_src(vp
, &src
[0]);
401 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
402 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
405 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
408 t_dst_index(vp
, &vpi
->DstReg
),
409 t_dst_mask(vpi
->DstReg
.WriteMask
),
410 t_dst_class(vpi
->DstReg
.File
));
411 inst
[1] = t_src(vp
, &src
[0]);
412 inst
[2] = PVS_SRC_OPERAND(*u_temp_i
,
416 PVS_SRC_SELECT_W
, PVS_SRC_REG_TEMPORARY
,
417 /* Not 100% sure about this */
419 Negate
) ? NEGATE_XYZW
: NEGATE_NONE
);
420 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
426 static GLuint
*r300TranslateOpcodeFRC(struct r300_vertex_program_code
*vp
,
427 struct prog_instruction
*vpi
,
429 struct prog_src_register src
[3])
431 inst
[0] = PVS_OP_DST_OPERAND(VE_FRACTION
,
434 t_dst_index(vp
, &vpi
->DstReg
),
435 t_dst_mask(vpi
->DstReg
.WriteMask
),
436 t_dst_class(vpi
->DstReg
.File
));
437 inst
[1] = t_src(vp
, &src
[0]);
438 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
439 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
444 static GLuint
*r300TranslateOpcodeLG2(struct r300_vertex_program_code
*vp
,
445 struct prog_instruction
*vpi
,
447 struct prog_src_register src
[3])
449 // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
451 inst
[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX
,
454 t_dst_index(vp
, &vpi
->DstReg
),
455 t_dst_mask(vpi
->DstReg
.WriteMask
),
456 t_dst_class(vpi
->DstReg
.File
));
457 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
458 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
459 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
460 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
461 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
462 t_src_class(src
[0].File
),
463 src
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
464 (src
[0].RelAddr
<< 4);
465 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
466 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
471 static GLuint
*r300TranslateOpcodeLIT(struct r300_vertex_program_code
*vp
,
472 struct prog_instruction
*vpi
,
474 struct prog_src_register src
[3])
476 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
478 inst
[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX
,
481 t_dst_index(vp
, &vpi
->DstReg
),
482 t_dst_mask(vpi
->DstReg
.WriteMask
),
483 t_dst_class(vpi
->DstReg
.File
));
484 /* NOTE: Users swizzling might not work. */
485 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
486 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
487 PVS_SRC_SELECT_FORCE_0
, // Z
488 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
489 t_src_class(src
[0].File
),
490 src
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
491 (src
[0].RelAddr
<< 4);
492 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
493 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
494 PVS_SRC_SELECT_FORCE_0
, // Z
495 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
496 t_src_class(src
[0].File
),
497 src
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
498 (src
[0].RelAddr
<< 4);
499 inst
[3] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
500 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
501 PVS_SRC_SELECT_FORCE_0
, // Z
502 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
503 t_src_class(src
[0].File
),
504 src
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
505 (src
[0].RelAddr
<< 4);
510 static GLuint
*r300TranslateOpcodeLOG(struct r300_vertex_program_code
*vp
,
511 struct prog_instruction
*vpi
,
513 struct prog_src_register src
[3])
515 inst
[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX
,
518 t_dst_index(vp
, &vpi
->DstReg
),
519 t_dst_mask(vpi
->DstReg
.WriteMask
),
520 t_dst_class(vpi
->DstReg
.File
));
521 inst
[1] = t_src_scalar(vp
, &src
[0]);
522 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
523 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
528 static GLuint
*r300TranslateOpcodeMAD(struct r300_vertex_program_code
*vp
,
529 struct prog_instruction
*vpi
,
531 struct prog_src_register src
[3])
533 inst
[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD
,
536 t_dst_index(vp
, &vpi
->DstReg
),
537 t_dst_mask(vpi
->DstReg
.WriteMask
),
538 t_dst_class(vpi
->DstReg
.File
));
539 inst
[1] = t_src(vp
, &src
[0]);
540 inst
[2] = t_src(vp
, &src
[1]);
541 inst
[3] = t_src(vp
, &src
[2]);
546 static GLuint
*r300TranslateOpcodeMAX(struct r300_vertex_program_code
*vp
,
547 struct prog_instruction
*vpi
,
549 struct prog_src_register src
[3])
551 inst
[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM
,
554 t_dst_index(vp
, &vpi
->DstReg
),
555 t_dst_mask(vpi
->DstReg
.WriteMask
),
556 t_dst_class(vpi
->DstReg
.File
));
557 inst
[1] = t_src(vp
, &src
[0]);
558 inst
[2] = t_src(vp
, &src
[1]);
559 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
564 static GLuint
*r300TranslateOpcodeMIN(struct r300_vertex_program_code
*vp
,
565 struct prog_instruction
*vpi
,
567 struct prog_src_register src
[3])
569 inst
[0] = PVS_OP_DST_OPERAND(VE_MINIMUM
,
572 t_dst_index(vp
, &vpi
->DstReg
),
573 t_dst_mask(vpi
->DstReg
.WriteMask
),
574 t_dst_class(vpi
->DstReg
.File
));
575 inst
[1] = t_src(vp
, &src
[0]);
576 inst
[2] = t_src(vp
, &src
[1]);
577 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
582 static GLuint
*r300TranslateOpcodeMOV(struct r300_vertex_program_code
*vp
,
583 struct prog_instruction
*vpi
,
585 struct prog_src_register src
[3])
587 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
589 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
592 t_dst_index(vp
, &vpi
->DstReg
),
593 t_dst_mask(vpi
->DstReg
.WriteMask
),
594 t_dst_class(vpi
->DstReg
.File
));
595 inst
[1] = t_src(vp
, &src
[0]);
596 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
597 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
602 static GLuint
*r300TranslateOpcodeMUL(struct r300_vertex_program_code
*vp
,
603 struct prog_instruction
*vpi
,
605 struct prog_src_register src
[3])
607 inst
[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY
,
610 t_dst_index(vp
, &vpi
->DstReg
),
611 t_dst_mask(vpi
->DstReg
.WriteMask
),
612 t_dst_class(vpi
->DstReg
.File
));
613 inst
[1] = t_src(vp
, &src
[0]);
614 inst
[2] = t_src(vp
, &src
[1]);
615 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
620 static GLuint
*r300TranslateOpcodePOW(struct r300_vertex_program_code
*vp
,
621 struct prog_instruction
*vpi
,
623 struct prog_src_register src
[3])
625 inst
[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF
,
628 t_dst_index(vp
, &vpi
->DstReg
),
629 t_dst_mask(vpi
->DstReg
.WriteMask
),
630 t_dst_class(vpi
->DstReg
.File
));
631 inst
[1] = t_src_scalar(vp
, &src
[0]);
632 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
633 inst
[3] = t_src_scalar(vp
, &src
[1]);
638 static GLuint
*r300TranslateOpcodeRCP(struct r300_vertex_program_code
*vp
,
639 struct prog_instruction
*vpi
,
641 struct prog_src_register src
[3])
643 inst
[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX
,
646 t_dst_index(vp
, &vpi
->DstReg
),
647 t_dst_mask(vpi
->DstReg
.WriteMask
),
648 t_dst_class(vpi
->DstReg
.File
));
649 inst
[1] = t_src_scalar(vp
, &src
[0]);
650 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
651 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
656 static GLuint
*r300TranslateOpcodeRSQ(struct r300_vertex_program_code
*vp
,
657 struct prog_instruction
*vpi
,
659 struct prog_src_register src
[3])
661 inst
[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX
,
664 t_dst_index(vp
, &vpi
->DstReg
),
665 t_dst_mask(vpi
->DstReg
.WriteMask
),
666 t_dst_class(vpi
->DstReg
.File
));
667 inst
[1] = t_src_scalar(vp
, &src
[0]);
668 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
669 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
674 static GLuint
*r300TranslateOpcodeSGE(struct r300_vertex_program_code
*vp
,
675 struct prog_instruction
*vpi
,
677 struct prog_src_register src
[3])
679 inst
[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL
,
682 t_dst_index(vp
, &vpi
->DstReg
),
683 t_dst_mask(vpi
->DstReg
.WriteMask
),
684 t_dst_class(vpi
->DstReg
.File
));
685 inst
[1] = t_src(vp
, &src
[0]);
686 inst
[2] = t_src(vp
, &src
[1]);
687 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
692 static GLuint
*r300TranslateOpcodeSLT(struct r300_vertex_program_code
*vp
,
693 struct prog_instruction
*vpi
,
695 struct prog_src_register src
[3])
697 inst
[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN
,
700 t_dst_index(vp
, &vpi
->DstReg
),
701 t_dst_mask(vpi
->DstReg
.WriteMask
),
702 t_dst_class(vpi
->DstReg
.File
));
703 inst
[1] = t_src(vp
, &src
[0]);
704 inst
[2] = t_src(vp
, &src
[1]);
705 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
710 static GLuint
*r300TranslateOpcodeSUB(struct r300_vertex_program_code
*vp
,
711 struct prog_instruction
*vpi
,
713 struct prog_src_register src
[3])
715 //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
718 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
721 t_dst_index(vp
, &vpi
->DstReg
),
722 t_dst_mask(vpi
->DstReg
.WriteMask
),
723 t_dst_class(vpi
->DstReg
.File
));
724 inst
[1] = t_src(vp
, &src
[0]);
725 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]),
726 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
727 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
728 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
729 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)),
730 t_src_class(src
[1].File
),
732 Negate
) ? NEGATE_XYZW
: NEGATE_NONE
) |
733 (src
[1].RelAddr
<< 4);
737 PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD
,
740 t_dst_index(vp
, &vpi
->DstReg
),
741 t_dst_mask(vpi
->DstReg
.WriteMask
),
742 t_dst_class(vpi
->DstReg
.File
));
743 inst
[1] = t_src(vp
, &src
[0]);
744 inst
[2] = __CONST(0, SWIZZLE_ONE
);
745 inst
[3] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]),
746 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
747 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
748 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
749 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)),
750 t_src_class(src
[1].File
),
752 Negate
) ? NEGATE_XYZW
: NEGATE_NONE
) |
753 (src
[1].RelAddr
<< 4);
759 static GLuint
*r300TranslateOpcodeSWZ(struct r300_vertex_program_code
*vp
,
760 struct prog_instruction
*vpi
,
762 struct prog_src_register src
[3])
764 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
766 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
769 t_dst_index(vp
, &vpi
->DstReg
),
770 t_dst_mask(vpi
->DstReg
.WriteMask
),
771 t_dst_class(vpi
->DstReg
.File
));
772 inst
[1] = t_src(vp
, &src
[0]);
773 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
774 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
779 static GLuint
*r300TranslateOpcodeXPD(struct r300_vertex_program_code
*vp
,
780 struct prog_instruction
*vpi
,
782 struct prog_src_register src
[3],
785 /* mul r0, r1.yzxw, r2.zxyw
786 mad r0, -r2.yzxw, r1.zxyw, r0
789 inst
[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD
,
793 t_dst_mask(vpi
->DstReg
.WriteMask
),
794 PVS_DST_REG_TEMPORARY
);
795 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
796 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // Z
797 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
798 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
799 t_src_class(src
[0].File
),
800 src
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
801 (src
[0].RelAddr
<< 4);
802 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]), t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // Z
803 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // X
804 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // Y
805 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // W
806 t_src_class(src
[1].File
),
807 src
[1].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
808 (src
[1].RelAddr
<< 4);
809 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
812 inst
[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD
,
815 t_dst_index(vp
, &vpi
->DstReg
),
816 t_dst_mask(vpi
->DstReg
.WriteMask
),
817 t_dst_class(vpi
->DstReg
.File
));
818 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]), t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // Y
819 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // Z
820 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // X
821 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // W
822 t_src_class(src
[1].File
),
824 Negate
) ? NEGATE_XYZW
: NEGATE_NONE
) |
825 (src
[1].RelAddr
<< 4);
826 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // Z
827 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
828 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
829 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
830 t_src_class(src
[0].File
),
831 src
[0].Negate
? NEGATE_XYZW
: NEGATE_NONE
) |
832 (src
[0].RelAddr
<< 4);
834 PVS_SRC_OPERAND(*u_temp_i
, PVS_SRC_SELECT_X
, PVS_SRC_SELECT_Y
,
835 PVS_SRC_SELECT_Z
, PVS_SRC_SELECT_W
,
836 PVS_SRC_REG_TEMPORARY
, NEGATE_NONE
);
843 static void t_inputs_outputs(struct r300_vertex_program_code
*vp
, struct gl_program
* glvp
)
847 GLuint OutputsWritten
, InputsRead
;
849 OutputsWritten
= glvp
->OutputsWritten
;
850 InputsRead
= glvp
->InputsRead
;
853 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++) {
854 if (InputsRead
& (1 << i
))
855 vp
->inputs
[i
] = ++cur_reg
;
861 for (i
= 0; i
< VERT_RESULT_MAX
; i
++)
864 assert(OutputsWritten
& (1 << VERT_RESULT_HPOS
));
866 if (OutputsWritten
& (1 << VERT_RESULT_HPOS
)) {
867 vp
->outputs
[VERT_RESULT_HPOS
] = cur_reg
++;
870 if (OutputsWritten
& (1 << VERT_RESULT_PSIZ
)) {
871 vp
->outputs
[VERT_RESULT_PSIZ
] = cur_reg
++;
874 /* If we're writing back facing colors we need to send
875 * four colors to make front/back face colors selection work.
876 * If the vertex program doesn't write all 4 colors, lets
877 * pretend it does by skipping output index reg so the colors
878 * get written into appropriate output vectors.
880 if (OutputsWritten
& (1 << VERT_RESULT_COL0
)) {
881 vp
->outputs
[VERT_RESULT_COL0
] = cur_reg
++;
882 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC0
) ||
883 OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
887 if (OutputsWritten
& (1 << VERT_RESULT_COL1
)) {
888 vp
->outputs
[VERT_RESULT_COL1
] = cur_reg
++;
889 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC0
) ||
890 OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
894 if (OutputsWritten
& (1 << VERT_RESULT_BFC0
)) {
895 vp
->outputs
[VERT_RESULT_BFC0
] = cur_reg
++;
896 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
900 if (OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
901 vp
->outputs
[VERT_RESULT_BFC1
] = cur_reg
++;
902 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC0
)) {
906 for (i
= VERT_RESULT_TEX0
; i
<= VERT_RESULT_TEX7
; i
++) {
907 if (OutputsWritten
& (1 << i
)) {
908 vp
->outputs
[i
] = cur_reg
++;
912 if (OutputsWritten
& (1 << VERT_RESULT_FOGC
)) {
913 vp
->outputs
[VERT_RESULT_FOGC
] = cur_reg
++;
917 static GLboolean
translate_vertex_program(struct r300_vertex_program_compiler
* compiler
)
919 struct prog_instruction
*vpi
= compiler
->program
->Instructions
;
922 unsigned long num_operands
;
923 /* Initial value should be last tmp reg that hw supports.
924 Strangely enough r300 doesnt mind even though these would be out of range.
925 Smart enough to realize that it doesnt need it? */
926 int u_temp_i
= VSF_MAX_FRAGMENT_TEMPS
- 1;
927 struct prog_src_register src
[3];
928 struct r300_vertex_program_code
* vp
= compiler
->code
;
930 compiler
->code
->pos_end
= 0; /* Not supported yet */
931 compiler
->code
->length
= 0;
933 t_inputs_outputs(compiler
->code
, compiler
->program
);
935 for (inst
= compiler
->code
->body
.d
; vpi
->Opcode
!= OPCODE_END
;
939 int u_temp_used
= (VSF_MAX_FRAGMENT_TEMPS
- 1) - u_temp_i
;
940 if((compiler
->code
->num_temporaries
+ u_temp_used
) > VSF_MAX_FRAGMENT_TEMPS
) {
941 fprintf(stderr
, "Ran out of temps, num temps %d, us %d\n", compiler
->code
->num_temporaries
, u_temp_used
);
944 u_temp_i
=VSF_MAX_FRAGMENT_TEMPS
-1;
947 if (!valid_dst(compiler
->code
, &vpi
->DstReg
)) {
948 /* redirect result to unused temp */
949 vpi
->DstReg
.File
= PROGRAM_TEMPORARY
;
950 vpi
->DstReg
.Index
= u_temp_i
;
953 num_operands
= _mesa_num_inst_src_regs(vpi
->Opcode
);
955 /* copy the sources (src) from mesa into a local variable... is this needed? */
956 for (i
= 0; i
< num_operands
; i
++) {
957 src
[i
] = vpi
->SrcReg
[i
];
960 if (num_operands
== 3) { /* TODO: scalars */
961 if (CMP_SRCS(src
[1], src
[2])
962 || CMP_SRCS(src
[0], src
[2])) {
963 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
968 PVS_DST_REG_TEMPORARY
);
970 PVS_SRC_OPERAND(t_src_index(compiler
->code
, &src
[2]),
975 t_src_class(src
[2].File
),
976 NEGATE_NONE
) | (src
[2].
979 inst
[2] = __CONST(2, SWIZZLE_ZERO
);
980 inst
[3] = __CONST(2, SWIZZLE_ZERO
);
983 src
[2].File
= PROGRAM_TEMPORARY
;
984 src
[2].Index
= u_temp_i
;
990 if (num_operands
>= 2) {
991 if (CMP_SRCS(src
[1], src
[0])) {
992 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
997 PVS_DST_REG_TEMPORARY
);
999 PVS_SRC_OPERAND(t_src_index(compiler
->code
, &src
[0]),
1004 t_src_class(src
[0].File
),
1005 NEGATE_NONE
) | (src
[0].
1008 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
1009 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
1012 src
[0].File
= PROGRAM_TEMPORARY
;
1013 src
[0].Index
= u_temp_i
;
1019 switch (vpi
->Opcode
) {
1021 inst
= r300TranslateOpcodeABS(compiler
->code
, vpi
, inst
, src
);
1024 inst
= r300TranslateOpcodeADD(compiler
->code
, vpi
, inst
, src
);
1027 inst
= r300TranslateOpcodeARL(compiler
->code
, vpi
, inst
, src
);
1030 inst
= r300TranslateOpcodeDP3(compiler
->code
, vpi
, inst
, src
);
1033 inst
= r300TranslateOpcodeDP4(compiler
->code
, vpi
, inst
, src
);
1036 inst
= r300TranslateOpcodeDPH(compiler
->code
, vpi
, inst
, src
);
1039 inst
= r300TranslateOpcodeDST(compiler
->code
, vpi
, inst
, src
);
1042 inst
= r300TranslateOpcodeEX2(compiler
->code
, vpi
, inst
, src
);
1045 inst
= r300TranslateOpcodeEXP(compiler
->code
, vpi
, inst
, src
);
1048 inst
= r300TranslateOpcodeFLR(compiler
->code
, vpi
, inst
, src
, /* FIXME */
1052 inst
= r300TranslateOpcodeFRC(compiler
->code
, vpi
, inst
, src
);
1055 inst
= r300TranslateOpcodeLG2(compiler
->code
, vpi
, inst
, src
);
1058 inst
= r300TranslateOpcodeLIT(compiler
->code
, vpi
, inst
, src
);
1061 inst
= r300TranslateOpcodeLOG(compiler
->code
, vpi
, inst
, src
);
1064 inst
= r300TranslateOpcodeMAD(compiler
->code
, vpi
, inst
, src
);
1067 inst
= r300TranslateOpcodeMAX(compiler
->code
, vpi
, inst
, src
);
1070 inst
= r300TranslateOpcodeMIN(compiler
->code
, vpi
, inst
, src
);
1073 inst
= r300TranslateOpcodeMOV(compiler
->code
, vpi
, inst
, src
);
1076 inst
= r300TranslateOpcodeMUL(compiler
->code
, vpi
, inst
, src
);
1079 inst
= r300TranslateOpcodePOW(compiler
->code
, vpi
, inst
, src
);
1082 inst
= r300TranslateOpcodeRCP(compiler
->code
, vpi
, inst
, src
);
1085 inst
= r300TranslateOpcodeRSQ(compiler
->code
, vpi
, inst
, src
);
1088 inst
= r300TranslateOpcodeSGE(compiler
->code
, vpi
, inst
, src
);
1091 inst
= r300TranslateOpcodeSLT(compiler
->code
, vpi
, inst
, src
);
1094 inst
= r300TranslateOpcodeSUB(compiler
->code
, vpi
, inst
, src
);
1097 inst
= r300TranslateOpcodeSWZ(compiler
->code
, vpi
, inst
, src
);
1100 inst
= r300TranslateOpcodeXPD(compiler
->code
, vpi
, inst
, src
, /* FIXME */
1108 compiler
->code
->length
= (inst
- compiler
->code
->body
.d
);
1109 if (compiler
->code
->length
>= VSF_MAX_FRAGMENT_LENGTH
) {
1116 static void insert_wpos(struct gl_program
*prog
, GLuint temp_index
, int tex_id
)
1118 struct prog_instruction
*vpi
;
1120 _mesa_insert_instructions(prog
, prog
->NumInstructions
- 1, 2);
1122 vpi
= &prog
->Instructions
[prog
->NumInstructions
- 3];
1124 vpi
->Opcode
= OPCODE_MOV
;
1126 vpi
->DstReg
.File
= PROGRAM_OUTPUT
;
1127 vpi
->DstReg
.Index
= VERT_RESULT_HPOS
;
1128 vpi
->DstReg
.WriteMask
= WRITEMASK_XYZW
;
1129 vpi
->DstReg
.CondMask
= COND_TR
;
1131 vpi
->SrcReg
[0].File
= PROGRAM_TEMPORARY
;
1132 vpi
->SrcReg
[0].Index
= temp_index
;
1133 vpi
->SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1137 vpi
->Opcode
= OPCODE_MOV
;
1139 vpi
->DstReg
.File
= PROGRAM_OUTPUT
;
1140 vpi
->DstReg
.Index
= VERT_RESULT_TEX0
+ tex_id
;
1141 vpi
->DstReg
.WriteMask
= WRITEMASK_XYZW
;
1142 vpi
->DstReg
.CondMask
= COND_TR
;
1144 vpi
->SrcReg
[0].File
= PROGRAM_TEMPORARY
;
1145 vpi
->SrcReg
[0].Index
= temp_index
;
1146 vpi
->SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1150 vpi
->Opcode
= OPCODE_END
;
1153 static void pos_as_texcoord(struct gl_program
*prog
, int tex_id
)
1155 struct prog_instruction
*vpi
;
1156 GLuint tempregi
= prog
->NumTemporaries
;
1158 prog
->NumTemporaries
++;
1160 for (vpi
= prog
->Instructions
; vpi
->Opcode
!= OPCODE_END
; vpi
++) {
1161 if (vpi
->DstReg
.File
== PROGRAM_OUTPUT
&& vpi
->DstReg
.Index
== VERT_RESULT_HPOS
) {
1162 vpi
->DstReg
.File
= PROGRAM_TEMPORARY
;
1163 vpi
->DstReg
.Index
= tempregi
;
1167 insert_wpos(prog
, tempregi
, tex_id
);
1169 prog
->OutputsWritten
|= 1 << (VERT_RESULT_TEX0
+ tex_id
);
1173 * The fogcoord attribute is special in that only the first component
1174 * is relevant, and the remaining components are always fixed (when read
1175 * from by the fragment program) to yield an X001 pattern.
1177 * We need to enforce this either in the vertex program or in the fragment
1178 * program, and this code chooses not to enforce it in the vertex program.
1179 * This is slightly cheaper, as long as the fragment program does not use
1182 * And it seems that usually, weird swizzles are not used, so...
1184 * See also the counterpart rewriting for fragment programs.
1186 static void fog_as_texcoord(struct gl_program
*prog
, int tex_id
)
1188 struct prog_instruction
*vpi
;
1190 vpi
= prog
->Instructions
;
1191 while (vpi
->Opcode
!= OPCODE_END
) {
1192 if (vpi
->DstReg
.File
== PROGRAM_OUTPUT
&& vpi
->DstReg
.Index
== VERT_RESULT_FOGC
) {
1193 vpi
->DstReg
.Index
= VERT_RESULT_TEX0
+ tex_id
;
1194 vpi
->DstReg
.WriteMask
= WRITEMASK_X
;
1200 prog
->OutputsWritten
&= ~(1 << VERT_RESULT_FOGC
);
1201 prog
->OutputsWritten
|= 1 << (VERT_RESULT_TEX0
+ tex_id
);
1204 static int translateABS(struct gl_program
*prog
, int pos
)
1206 struct prog_instruction
*inst
;
1208 inst
= &prog
->Instructions
[pos
];
1210 inst
->Opcode
= OPCODE_MAX
;
1211 inst
->SrcReg
[1] = inst
->SrcReg
[0];
1212 inst
->SrcReg
[1].Negate
^= NEGATE_XYZW
;
1217 static int translateDP3(struct gl_program
*prog
, int pos
)
1219 struct prog_instruction
*inst
;
1221 inst
= &prog
->Instructions
[pos
];
1223 inst
->Opcode
= OPCODE_DP4
;
1224 inst
->SrcReg
[0].Swizzle
= combine_swizzles4(inst
->SrcReg
[0].Swizzle
, SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_ZERO
);
1229 static int translateDPH(struct gl_program
*prog
, int pos
)
1231 struct prog_instruction
*inst
;
1233 inst
= &prog
->Instructions
[pos
];
1235 inst
->Opcode
= OPCODE_DP4
;
1236 inst
->SrcReg
[0].Swizzle
= combine_swizzles4(inst
->SrcReg
[0].Swizzle
, SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_ONE
);
1241 static int translateFLR(struct gl_program
*prog
, int pos
)
1243 struct prog_instruction
*inst
;
1244 struct prog_dst_register dst
;
1247 tmp_idx
= prog
->NumTemporaries
++;
1249 _mesa_insert_instructions(prog
, pos
+ 1, 1);
1251 inst
= &prog
->Instructions
[pos
];
1254 inst
->Opcode
= OPCODE_FRC
;
1255 inst
->DstReg
.File
= PROGRAM_TEMPORARY
;
1256 inst
->DstReg
.Index
= tmp_idx
;
1259 inst
->Opcode
= OPCODE_ADD
;
1261 inst
->SrcReg
[0] = (inst
-1)->SrcReg
[0];
1262 inst
->SrcReg
[1].File
= PROGRAM_TEMPORARY
;
1263 inst
->SrcReg
[1].Index
= tmp_idx
;
1264 inst
->SrcReg
[1].Negate
= NEGATE_XYZW
;
1269 static int translateSUB(struct gl_program
*prog
, int pos
)
1271 struct prog_instruction
*inst
;
1273 inst
= &prog
->Instructions
[pos
];
1275 inst
->Opcode
= OPCODE_ADD
;
1276 inst
->SrcReg
[1].Negate
^= NEGATE_XYZW
;
1281 static int translateSWZ(struct gl_program
*prog
, int pos
)
1283 prog
->Instructions
[pos
].Opcode
= OPCODE_MOV
;
1288 static int translateXPD(struct gl_program
*prog
, int pos
)
1290 struct prog_instruction
*inst
;
1293 tmp_idx
= prog
->NumTemporaries
++;
1295 _mesa_insert_instructions(prog
, pos
+ 1, 1);
1297 inst
= &prog
->Instructions
[pos
];
1301 inst
->Opcode
= OPCODE_MUL
;
1302 inst
->DstReg
.File
= PROGRAM_TEMPORARY
;
1303 inst
->DstReg
.Index
= tmp_idx
;
1304 inst
->SrcReg
[0].Swizzle
= combine_swizzles4(inst
->SrcReg
[0].Swizzle
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_X
, SWIZZLE_W
);
1305 inst
->SrcReg
[1].Swizzle
= combine_swizzles4(inst
->SrcReg
[1].Swizzle
, SWIZZLE_Z
, SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_W
);
1308 inst
->Opcode
= OPCODE_MAD
;
1309 inst
->SrcReg
[0].Swizzle
= combine_swizzles4(inst
->SrcReg
[0].Swizzle
, SWIZZLE_Z
, SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_W
);
1310 inst
->SrcReg
[1].Swizzle
= combine_swizzles4(inst
->SrcReg
[1].Swizzle
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_X
, SWIZZLE_W
);
1311 inst
->SrcReg
[1].Negate
^= NEGATE_XYZW
;
1312 inst
->SrcReg
[2].File
= PROGRAM_TEMPORARY
;
1313 inst
->SrcReg
[2].Index
= tmp_idx
;
1318 static void translateInsts(struct gl_program
*prog
)
1320 struct prog_instruction
*inst
;
1323 for (i
= 0; i
< prog
->NumInstructions
; ++i
) {
1324 inst
= &prog
->Instructions
[i
];
1326 switch (inst
->Opcode
) {
1328 i
+= translateABS(prog
, i
);
1331 i
+= translateDP3(prog
, i
);
1334 i
+= translateDPH(prog
, i
);
1337 i
+= translateFLR(prog
, i
);
1340 i
+= translateSUB(prog
, i
);
1343 i
+= translateSWZ(prog
, i
);
1346 i
+= translateXPD(prog
, i
);
1354 #define ADD_OUTPUT(fp_attr, vp_result) \
1356 if ((FpReads & (1 << (fp_attr))) && !(compiler->program->OutputsWritten & (1 << (vp_result)))) { \
1357 OutputsAdded |= 1 << (vp_result); \
1362 static void addArtificialOutputs(struct r300_vertex_program_compiler
* compiler
)
1364 GLuint OutputsAdded
, FpReads
;
1369 FpReads
= compiler
->state
.FpReads
;
1371 ADD_OUTPUT(FRAG_ATTRIB_COL0
, VERT_RESULT_COL0
);
1372 ADD_OUTPUT(FRAG_ATTRIB_COL1
, VERT_RESULT_COL1
);
1374 for (i
= 0; i
< 7; ++i
) {
1375 ADD_OUTPUT(FRAG_ATTRIB_TEX0
+ i
, VERT_RESULT_TEX0
+ i
);
1378 /* Some outputs may be artificially added, to match the inputs of the fragment program.
1379 * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by
1380 * vertex program are undefined, so just use MOV [vertex_result], CONST[0]
1383 struct prog_instruction
*inst
;
1385 _mesa_insert_instructions(compiler
->program
, compiler
->program
->NumInstructions
- 1, count
);
1386 inst
= &compiler
->program
->Instructions
[compiler
->program
->NumInstructions
- 1 - count
];
1388 for (i
= 0; i
< VERT_RESULT_MAX
; ++i
) {
1389 if (OutputsAdded
& (1 << i
)) {
1390 inst
->Opcode
= OPCODE_MOV
;
1392 inst
->DstReg
.File
= PROGRAM_OUTPUT
;
1393 inst
->DstReg
.Index
= i
;
1394 inst
->DstReg
.WriteMask
= WRITEMASK_XYZW
;
1395 inst
->DstReg
.CondMask
= COND_TR
;
1397 inst
->SrcReg
[0].File
= PROGRAM_CONSTANT
;
1398 inst
->SrcReg
[0].Index
= 0;
1399 inst
->SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1405 compiler
->program
->OutputsWritten
|= OutputsAdded
;
1411 static void nqssadceInit(struct nqssadce_state
* s
)
1413 struct r300_vertex_program_compiler
* compiler
= s
->UserData
;
1416 fp_reads
= compiler
->state
.FpReads
;
1418 if (fp_reads
& FRAG_BIT_COL0
) {
1419 s
->Outputs
[VERT_RESULT_COL0
].Sourced
= WRITEMASK_XYZW
;
1420 s
->Outputs
[VERT_RESULT_BFC0
].Sourced
= WRITEMASK_XYZW
;
1423 if (fp_reads
& FRAG_BIT_COL1
) {
1424 s
->Outputs
[VERT_RESULT_COL1
].Sourced
= WRITEMASK_XYZW
;
1425 s
->Outputs
[VERT_RESULT_BFC1
].Sourced
= WRITEMASK_XYZW
;
1431 for (i
= 0; i
< 8; ++i
) {
1432 if (fp_reads
& FRAG_BIT_TEX(i
)) {
1433 s
->Outputs
[VERT_RESULT_TEX0
+ i
].Sourced
= WRITEMASK_XYZW
;
1438 s
->Outputs
[VERT_RESULT_HPOS
].Sourced
= WRITEMASK_XYZW
;
1439 if (s
->Program
->OutputsWritten
& (1 << VERT_RESULT_PSIZ
))
1440 s
->Outputs
[VERT_RESULT_PSIZ
].Sourced
= WRITEMASK_X
;
1443 static GLboolean
swizzleIsNative(GLuint opcode
, struct prog_src_register reg
)
1453 GLboolean
r3xx_compile_vertex_program(struct r300_vertex_program_compiler
* compiler
, GLcontext
* ctx
)
1457 if (compiler
->state
.WPosAttr
!= FRAG_ATTRIB_MAX
) {
1458 pos_as_texcoord(compiler
->program
, compiler
->state
.WPosAttr
- FRAG_ATTRIB_TEX0
);
1461 if (compiler
->state
.FogAttr
!= FRAG_ATTRIB_MAX
) {
1462 fog_as_texcoord(compiler
->program
, compiler
->state
.FogAttr
- FRAG_ATTRIB_TEX0
);
1465 addArtificialOutputs(compiler
);
1467 translateInsts(compiler
->program
);
1469 if (compiler
->Base
.Debug
) {
1470 fprintf(stderr
, "Vertex program after native rewrite:\n");
1471 _mesa_print_program(compiler
->program
);
1476 struct radeon_nqssadce_descr nqssadce
= {
1477 .Init
= &nqssadceInit
,
1478 .IsNativeSwizzle
= &swizzleIsNative
,
1479 .BuildSwizzle
= NULL
1481 radeonNqssaDce(compiler
->program
, &nqssadce
, compiler
);
1483 /* We need this step for reusing temporary registers */
1484 _mesa_optimize_program(ctx
, compiler
->program
);
1486 if (compiler
->Base
.Debug
) {
1487 fprintf(stderr
, "Vertex program after NQSSADCE:\n");
1488 _mesa_print_program(compiler
->program
);
1493 assert(compiler
->program
->NumInstructions
);
1495 struct prog_instruction
*inst
;
1498 inst
= compiler
->program
->Instructions
;
1500 while (inst
->Opcode
!= OPCODE_END
) {
1501 tmp
= _mesa_num_inst_src_regs(inst
->Opcode
);
1502 for (i
= 0; i
< tmp
; ++i
) {
1503 if (inst
->SrcReg
[i
].File
== PROGRAM_TEMPORARY
) {
1504 if ((int) inst
->SrcReg
[i
].Index
> max
) {
1505 max
= inst
->SrcReg
[i
].Index
;
1510 if (_mesa_num_inst_dst_regs(inst
->Opcode
)) {
1511 if (inst
->DstReg
.File
== PROGRAM_TEMPORARY
) {
1512 if ((int) inst
->DstReg
.Index
> max
) {
1513 max
= inst
->DstReg
.Index
;
1520 /* We actually want highest index of used temporary register,
1521 * not the number of temporaries used.
1522 * These values aren't always the same.
1524 compiler
->code
->num_temporaries
= max
+ 1;
1527 success
= translate_vertex_program(compiler
);
1529 compiler
->code
->InputsRead
= compiler
->program
->InputsRead
;
1530 compiler
->code
->OutputsWritten
= compiler
->program
->OutputsWritten
;