r300: Move vertex program compilation to compiler
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / r3xx_vertprog.c
1 /*
2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23 #include "radeon_compiler.h"
24
25 #include "../r300_reg.h"
26
27 #include "radeon_nqssadce.h"
28
29 #include "shader/prog_optimize.h"
30 #include "shader/prog_print.h"
31
32
33 /* TODO: Get rid of t_src_class call */
34 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
35 ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
36 t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
37 (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
38 t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
39
40 /*
41 * Take an already-setup and valid source then swizzle it appropriately to
42 * obtain a constant ZERO or ONE source.
43 */
44 #define __CONST(x, y) \
45 (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \
46 t_swizzle(y), \
47 t_swizzle(y), \
48 t_swizzle(y), \
49 t_swizzle(y), \
50 t_src_class(src[x].File), \
51 NEGATE_NONE) | (src[x].RelAddr << 4))
52
53
54
55
56 static unsigned long t_dst_mask(GLuint mask)
57 {
58 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
59 return mask & WRITEMASK_XYZW;
60 }
61
62 static unsigned long t_dst_class(gl_register_file file)
63 {
64
65 switch (file) {
66 case PROGRAM_TEMPORARY:
67 return PVS_DST_REG_TEMPORARY;
68 case PROGRAM_OUTPUT:
69 return PVS_DST_REG_OUT;
70 case PROGRAM_ADDRESS:
71 return PVS_DST_REG_A0;
72 /*
73 case PROGRAM_INPUT:
74 case PROGRAM_LOCAL_PARAM:
75 case PROGRAM_ENV_PARAM:
76 case PROGRAM_NAMED_PARAM:
77 case PROGRAM_STATE_VAR:
78 case PROGRAM_WRITE_ONLY:
79 case PROGRAM_ADDRESS:
80 */
81 default:
82 fprintf(stderr, "problem in %s", __FUNCTION__);
83 _mesa_exit(-1);
84 return -1;
85 }
86 }
87
88 static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
89 struct prog_dst_register *dst)
90 {
91 if (dst->File == PROGRAM_OUTPUT)
92 return vp->outputs[dst->Index];
93
94 return dst->Index;
95 }
96
97 static unsigned long t_src_class(gl_register_file file)
98 {
99 switch (file) {
100 case PROGRAM_TEMPORARY:
101 return PVS_SRC_REG_TEMPORARY;
102 case PROGRAM_INPUT:
103 return PVS_SRC_REG_INPUT;
104 case PROGRAM_LOCAL_PARAM:
105 case PROGRAM_ENV_PARAM:
106 case PROGRAM_NAMED_PARAM:
107 case PROGRAM_CONSTANT:
108 case PROGRAM_STATE_VAR:
109 return PVS_SRC_REG_CONSTANT;
110 /*
111 case PROGRAM_OUTPUT:
112 case PROGRAM_WRITE_ONLY:
113 case PROGRAM_ADDRESS:
114 */
115 default:
116 fprintf(stderr, "problem in %s", __FUNCTION__);
117 _mesa_exit(-1);
118 return -1;
119 }
120 }
121
122 static INLINE unsigned long t_swizzle(GLubyte swizzle)
123 {
124 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
125 return swizzle;
126 }
127
128 static unsigned long t_src_index(struct r300_vertex_program_code *vp,
129 struct prog_src_register *src)
130 {
131 if (src->File == PROGRAM_INPUT) {
132 assert(vp->inputs[src->Index] != -1);
133 return vp->inputs[src->Index];
134 } else {
135 if (src->Index < 0) {
136 fprintf(stderr,
137 "negative offsets for indirect addressing do not work.\n");
138 return 0;
139 }
140 return src->Index;
141 }
142 }
143
144 /* these two functions should probably be merged... */
145
146 static unsigned long t_src(struct r300_vertex_program_code *vp,
147 struct prog_src_register *src)
148 {
149 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
150 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
151 */
152 return PVS_SRC_OPERAND(t_src_index(vp, src),
153 t_swizzle(GET_SWZ(src->Swizzle, 0)),
154 t_swizzle(GET_SWZ(src->Swizzle, 1)),
155 t_swizzle(GET_SWZ(src->Swizzle, 2)),
156 t_swizzle(GET_SWZ(src->Swizzle, 3)),
157 t_src_class(src->File),
158 src->Negate) | (src->RelAddr << 4);
159 }
160
161 static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
162 struct prog_src_register *src)
163 {
164 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
165 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
166 */
167 return PVS_SRC_OPERAND(t_src_index(vp, src),
168 t_swizzle(GET_SWZ(src->Swizzle, 0)),
169 t_swizzle(GET_SWZ(src->Swizzle, 0)),
170 t_swizzle(GET_SWZ(src->Swizzle, 0)),
171 t_swizzle(GET_SWZ(src->Swizzle, 0)),
172 t_src_class(src->File),
173 src->Negate ? NEGATE_XYZW : NEGATE_NONE) |
174 (src->RelAddr << 4);
175 }
176
177 static GLboolean valid_dst(struct r300_vertex_program_code *vp,
178 struct prog_dst_register *dst)
179 {
180 if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
181 return GL_FALSE;
182 } else if (dst->File == PROGRAM_ADDRESS) {
183 assert(dst->Index == 0);
184 }
185
186 return GL_TRUE;
187 }
188
189 static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program_code *vp,
190 struct prog_instruction *vpi,
191 GLuint * inst,
192 struct prog_src_register src[3])
193 {
194 //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
195
196 inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
197 GL_FALSE,
198 GL_FALSE,
199 t_dst_index(vp, &vpi->DstReg),
200 t_dst_mask(vpi->DstReg.WriteMask),
201 t_dst_class(vpi->DstReg.File));
202 inst[1] = t_src(vp, &src[0]);
203 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
204 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
205 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
206 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
207 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
208 t_src_class(src[0].File),
209 (!src[0].
210 Negate) ? NEGATE_XYZW : NEGATE_NONE) |
211 (src[0].RelAddr << 4);
212 inst[3] = 0;
213
214 return inst;
215 }
216
217 static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program_code *vp,
218 struct prog_instruction *vpi,
219 GLuint * inst,
220 struct prog_src_register src[3])
221 {
222 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
223 GL_FALSE,
224 GL_FALSE,
225 t_dst_index(vp, &vpi->DstReg),
226 t_dst_mask(vpi->DstReg.WriteMask),
227 t_dst_class(vpi->DstReg.File));
228 inst[1] = t_src(vp, &src[0]);
229 inst[2] = t_src(vp, &src[1]);
230 inst[3] = __CONST(1, SWIZZLE_ZERO);
231
232 return inst;
233 }
234
235 static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program_code *vp,
236 struct prog_instruction *vpi,
237 GLuint * inst,
238 struct prog_src_register src[3])
239 {
240 inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX,
241 GL_FALSE,
242 GL_FALSE,
243 t_dst_index(vp, &vpi->DstReg),
244 t_dst_mask(vpi->DstReg.WriteMask),
245 t_dst_class(vpi->DstReg.File));
246 inst[1] = t_src(vp, &src[0]);
247 inst[2] = __CONST(0, SWIZZLE_ZERO);
248 inst[3] = __CONST(0, SWIZZLE_ZERO);
249
250 return inst;
251 }
252
253 static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program_code *vp,
254 struct prog_instruction *vpi,
255 GLuint * inst,
256 struct prog_src_register src[3])
257 {
258 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
259
260 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
261 GL_FALSE,
262 GL_FALSE,
263 t_dst_index(vp, &vpi->DstReg),
264 t_dst_mask(vpi->DstReg.WriteMask),
265 t_dst_class(vpi->DstReg.File));
266 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
267 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
268 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
269 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
270 SWIZZLE_ZERO,
271 t_src_class(src[0].File),
272 src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
273 (src[0].RelAddr << 4);
274 inst[2] =
275 PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
276 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
277 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
278 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO,
279 t_src_class(src[1].File),
280 src[1].Negate ? NEGATE_XYZW : NEGATE_NONE) |
281 (src[1].RelAddr << 4);
282 inst[3] = __CONST(1, SWIZZLE_ZERO);
283
284 return inst;
285 }
286
287 static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program_code *vp,
288 struct prog_instruction *vpi,
289 GLuint * inst,
290 struct prog_src_register src[3])
291 {
292 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
293 GL_FALSE,
294 GL_FALSE,
295 t_dst_index(vp, &vpi->DstReg),
296 t_dst_mask(vpi->DstReg.WriteMask),
297 t_dst_class(vpi->DstReg.File));
298 inst[1] = t_src(vp, &src[0]);
299 inst[2] = t_src(vp, &src[1]);
300 inst[3] = __CONST(1, SWIZZLE_ZERO);
301
302 return inst;
303 }
304
305 static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program_code *vp,
306 struct prog_instruction *vpi,
307 GLuint * inst,
308 struct prog_src_register src[3])
309 {
310 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
311 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
312 GL_FALSE,
313 GL_FALSE,
314 t_dst_index(vp, &vpi->DstReg),
315 t_dst_mask(vpi->DstReg.WriteMask),
316 t_dst_class(vpi->DstReg.File));
317 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
318 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
319 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
320 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
321 PVS_SRC_SELECT_FORCE_1,
322 t_src_class(src[0].File),
323 src[0].Negate ? NEGATE_XYZ : NEGATE_NONE) |
324 (src[0].RelAddr << 4);
325 inst[2] = t_src(vp, &src[1]);
326 inst[3] = __CONST(1, SWIZZLE_ZERO);
327
328 return inst;
329 }
330
331 static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program_code *vp,
332 struct prog_instruction *vpi,
333 GLuint * inst,
334 struct prog_src_register src[3])
335 {
336 inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR,
337 GL_FALSE,
338 GL_FALSE,
339 t_dst_index(vp, &vpi->DstReg),
340 t_dst_mask(vpi->DstReg.WriteMask),
341 t_dst_class(vpi->DstReg.File));
342 inst[1] = t_src(vp, &src[0]);
343 inst[2] = t_src(vp, &src[1]);
344 inst[3] = __CONST(1, SWIZZLE_ZERO);
345
346 return inst;
347 }
348
349 static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program_code *vp,
350 struct prog_instruction *vpi,
351 GLuint * inst,
352 struct prog_src_register src[3])
353 {
354 inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX,
355 GL_TRUE,
356 GL_FALSE,
357 t_dst_index(vp, &vpi->DstReg),
358 t_dst_mask(vpi->DstReg.WriteMask),
359 t_dst_class(vpi->DstReg.File));
360 inst[1] = t_src_scalar(vp, &src[0]);
361 inst[2] = __CONST(0, SWIZZLE_ZERO);
362 inst[3] = __CONST(0, SWIZZLE_ZERO);
363
364 return inst;
365 }
366
367 static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program_code *vp,
368 struct prog_instruction *vpi,
369 GLuint * inst,
370 struct prog_src_register src[3])
371 {
372 inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX,
373 GL_TRUE,
374 GL_FALSE,
375 t_dst_index(vp, &vpi->DstReg),
376 t_dst_mask(vpi->DstReg.WriteMask),
377 t_dst_class(vpi->DstReg.File));
378 inst[1] = t_src_scalar(vp, &src[0]);
379 inst[2] = __CONST(0, SWIZZLE_ZERO);
380 inst[3] = __CONST(0, SWIZZLE_ZERO);
381
382 return inst;
383 }
384
385 static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program_code *vp,
386 struct prog_instruction *vpi,
387 GLuint * inst,
388 struct prog_src_register src[3],
389 int *u_temp_i)
390 {
391 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
392 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
393
394 inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
395 GL_FALSE,
396 GL_FALSE,
397 *u_temp_i,
398 t_dst_mask(vpi->DstReg.WriteMask),
399 PVS_DST_REG_TEMPORARY);
400 inst[1] = t_src(vp, &src[0]);
401 inst[2] = __CONST(0, SWIZZLE_ZERO);
402 inst[3] = __CONST(0, SWIZZLE_ZERO);
403 inst += 4;
404
405 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
406 GL_FALSE,
407 GL_FALSE,
408 t_dst_index(vp, &vpi->DstReg),
409 t_dst_mask(vpi->DstReg.WriteMask),
410 t_dst_class(vpi->DstReg.File));
411 inst[1] = t_src(vp, &src[0]);
412 inst[2] = PVS_SRC_OPERAND(*u_temp_i,
413 PVS_SRC_SELECT_X,
414 PVS_SRC_SELECT_Y,
415 PVS_SRC_SELECT_Z,
416 PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY,
417 /* Not 100% sure about this */
418 (!src[0].
419 Negate) ? NEGATE_XYZW : NEGATE_NONE);
420 inst[3] = __CONST(0, SWIZZLE_ZERO);
421 (*u_temp_i)--;
422
423 return inst;
424 }
425
426 static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program_code *vp,
427 struct prog_instruction *vpi,
428 GLuint * inst,
429 struct prog_src_register src[3])
430 {
431 inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
432 GL_FALSE,
433 GL_FALSE,
434 t_dst_index(vp, &vpi->DstReg),
435 t_dst_mask(vpi->DstReg.WriteMask),
436 t_dst_class(vpi->DstReg.File));
437 inst[1] = t_src(vp, &src[0]);
438 inst[2] = __CONST(0, SWIZZLE_ZERO);
439 inst[3] = __CONST(0, SWIZZLE_ZERO);
440
441 return inst;
442 }
443
444 static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program_code *vp,
445 struct prog_instruction *vpi,
446 GLuint * inst,
447 struct prog_src_register src[3])
448 {
449 // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
450
451 inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX,
452 GL_TRUE,
453 GL_FALSE,
454 t_dst_index(vp, &vpi->DstReg),
455 t_dst_mask(vpi->DstReg.WriteMask),
456 t_dst_class(vpi->DstReg.File));
457 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
458 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
459 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
460 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
461 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
462 t_src_class(src[0].File),
463 src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
464 (src[0].RelAddr << 4);
465 inst[2] = __CONST(0, SWIZZLE_ZERO);
466 inst[3] = __CONST(0, SWIZZLE_ZERO);
467
468 return inst;
469 }
470
471 static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program_code *vp,
472 struct prog_instruction *vpi,
473 GLuint * inst,
474 struct prog_src_register src[3])
475 {
476 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
477
478 inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
479 GL_TRUE,
480 GL_FALSE,
481 t_dst_index(vp, &vpi->DstReg),
482 t_dst_mask(vpi->DstReg.WriteMask),
483 t_dst_class(vpi->DstReg.File));
484 /* NOTE: Users swizzling might not work. */
485 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
486 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
487 PVS_SRC_SELECT_FORCE_0, // Z
488 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
489 t_src_class(src[0].File),
490 src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
491 (src[0].RelAddr << 4);
492 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
493 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
494 PVS_SRC_SELECT_FORCE_0, // Z
495 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
496 t_src_class(src[0].File),
497 src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
498 (src[0].RelAddr << 4);
499 inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
500 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
501 PVS_SRC_SELECT_FORCE_0, // Z
502 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
503 t_src_class(src[0].File),
504 src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
505 (src[0].RelAddr << 4);
506
507 return inst;
508 }
509
510 static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program_code *vp,
511 struct prog_instruction *vpi,
512 GLuint * inst,
513 struct prog_src_register src[3])
514 {
515 inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX,
516 GL_TRUE,
517 GL_FALSE,
518 t_dst_index(vp, &vpi->DstReg),
519 t_dst_mask(vpi->DstReg.WriteMask),
520 t_dst_class(vpi->DstReg.File));
521 inst[1] = t_src_scalar(vp, &src[0]);
522 inst[2] = __CONST(0, SWIZZLE_ZERO);
523 inst[3] = __CONST(0, SWIZZLE_ZERO);
524
525 return inst;
526 }
527
528 static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program_code *vp,
529 struct prog_instruction *vpi,
530 GLuint * inst,
531 struct prog_src_register src[3])
532 {
533 inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
534 GL_FALSE,
535 GL_TRUE,
536 t_dst_index(vp, &vpi->DstReg),
537 t_dst_mask(vpi->DstReg.WriteMask),
538 t_dst_class(vpi->DstReg.File));
539 inst[1] = t_src(vp, &src[0]);
540 inst[2] = t_src(vp, &src[1]);
541 inst[3] = t_src(vp, &src[2]);
542
543 return inst;
544 }
545
546 static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program_code *vp,
547 struct prog_instruction *vpi,
548 GLuint * inst,
549 struct prog_src_register src[3])
550 {
551 inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
552 GL_FALSE,
553 GL_FALSE,
554 t_dst_index(vp, &vpi->DstReg),
555 t_dst_mask(vpi->DstReg.WriteMask),
556 t_dst_class(vpi->DstReg.File));
557 inst[1] = t_src(vp, &src[0]);
558 inst[2] = t_src(vp, &src[1]);
559 inst[3] = __CONST(1, SWIZZLE_ZERO);
560
561 return inst;
562 }
563
564 static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program_code *vp,
565 struct prog_instruction *vpi,
566 GLuint * inst,
567 struct prog_src_register src[3])
568 {
569 inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM,
570 GL_FALSE,
571 GL_FALSE,
572 t_dst_index(vp, &vpi->DstReg),
573 t_dst_mask(vpi->DstReg.WriteMask),
574 t_dst_class(vpi->DstReg.File));
575 inst[1] = t_src(vp, &src[0]);
576 inst[2] = t_src(vp, &src[1]);
577 inst[3] = __CONST(1, SWIZZLE_ZERO);
578
579 return inst;
580 }
581
582 static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program_code *vp,
583 struct prog_instruction *vpi,
584 GLuint * inst,
585 struct prog_src_register src[3])
586 {
587 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
588
589 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
590 GL_FALSE,
591 GL_FALSE,
592 t_dst_index(vp, &vpi->DstReg),
593 t_dst_mask(vpi->DstReg.WriteMask),
594 t_dst_class(vpi->DstReg.File));
595 inst[1] = t_src(vp, &src[0]);
596 inst[2] = __CONST(0, SWIZZLE_ZERO);
597 inst[3] = __CONST(0, SWIZZLE_ZERO);
598
599 return inst;
600 }
601
602 static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program_code *vp,
603 struct prog_instruction *vpi,
604 GLuint * inst,
605 struct prog_src_register src[3])
606 {
607 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY,
608 GL_FALSE,
609 GL_FALSE,
610 t_dst_index(vp, &vpi->DstReg),
611 t_dst_mask(vpi->DstReg.WriteMask),
612 t_dst_class(vpi->DstReg.File));
613 inst[1] = t_src(vp, &src[0]);
614 inst[2] = t_src(vp, &src[1]);
615 inst[3] = __CONST(1, SWIZZLE_ZERO);
616
617 return inst;
618 }
619
620 static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program_code *vp,
621 struct prog_instruction *vpi,
622 GLuint * inst,
623 struct prog_src_register src[3])
624 {
625 inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
626 GL_TRUE,
627 GL_FALSE,
628 t_dst_index(vp, &vpi->DstReg),
629 t_dst_mask(vpi->DstReg.WriteMask),
630 t_dst_class(vpi->DstReg.File));
631 inst[1] = t_src_scalar(vp, &src[0]);
632 inst[2] = __CONST(0, SWIZZLE_ZERO);
633 inst[3] = t_src_scalar(vp, &src[1]);
634
635 return inst;
636 }
637
638 static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program_code *vp,
639 struct prog_instruction *vpi,
640 GLuint * inst,
641 struct prog_src_register src[3])
642 {
643 inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX,
644 GL_TRUE,
645 GL_FALSE,
646 t_dst_index(vp, &vpi->DstReg),
647 t_dst_mask(vpi->DstReg.WriteMask),
648 t_dst_class(vpi->DstReg.File));
649 inst[1] = t_src_scalar(vp, &src[0]);
650 inst[2] = __CONST(0, SWIZZLE_ZERO);
651 inst[3] = __CONST(0, SWIZZLE_ZERO);
652
653 return inst;
654 }
655
656 static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program_code *vp,
657 struct prog_instruction *vpi,
658 GLuint * inst,
659 struct prog_src_register src[3])
660 {
661 inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX,
662 GL_TRUE,
663 GL_FALSE,
664 t_dst_index(vp, &vpi->DstReg),
665 t_dst_mask(vpi->DstReg.WriteMask),
666 t_dst_class(vpi->DstReg.File));
667 inst[1] = t_src_scalar(vp, &src[0]);
668 inst[2] = __CONST(0, SWIZZLE_ZERO);
669 inst[3] = __CONST(0, SWIZZLE_ZERO);
670
671 return inst;
672 }
673
674 static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program_code *vp,
675 struct prog_instruction *vpi,
676 GLuint * inst,
677 struct prog_src_register src[3])
678 {
679 inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL,
680 GL_FALSE,
681 GL_FALSE,
682 t_dst_index(vp, &vpi->DstReg),
683 t_dst_mask(vpi->DstReg.WriteMask),
684 t_dst_class(vpi->DstReg.File));
685 inst[1] = t_src(vp, &src[0]);
686 inst[2] = t_src(vp, &src[1]);
687 inst[3] = __CONST(1, SWIZZLE_ZERO);
688
689 return inst;
690 }
691
692 static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program_code *vp,
693 struct prog_instruction *vpi,
694 GLuint * inst,
695 struct prog_src_register src[3])
696 {
697 inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN,
698 GL_FALSE,
699 GL_FALSE,
700 t_dst_index(vp, &vpi->DstReg),
701 t_dst_mask(vpi->DstReg.WriteMask),
702 t_dst_class(vpi->DstReg.File));
703 inst[1] = t_src(vp, &src[0]);
704 inst[2] = t_src(vp, &src[1]);
705 inst[3] = __CONST(1, SWIZZLE_ZERO);
706
707 return inst;
708 }
709
710 static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program_code *vp,
711 struct prog_instruction *vpi,
712 GLuint * inst,
713 struct prog_src_register src[3])
714 {
715 //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
716
717 #if 0
718 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
719 GL_FALSE,
720 GL_FALSE,
721 t_dst_index(vp, &vpi->DstReg),
722 t_dst_mask(vpi->DstReg.WriteMask),
723 t_dst_class(vpi->DstReg.File));
724 inst[1] = t_src(vp, &src[0]);
725 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
726 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
727 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
728 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
729 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
730 t_src_class(src[1].File),
731 (!src[1].
732 Negate) ? NEGATE_XYZW : NEGATE_NONE) |
733 (src[1].RelAddr << 4);
734 inst[3] = 0;
735 #else
736 inst[0] =
737 PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
738 GL_FALSE,
739 GL_FALSE,
740 t_dst_index(vp, &vpi->DstReg),
741 t_dst_mask(vpi->DstReg.WriteMask),
742 t_dst_class(vpi->DstReg.File));
743 inst[1] = t_src(vp, &src[0]);
744 inst[2] = __CONST(0, SWIZZLE_ONE);
745 inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
746 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
747 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
748 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
749 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
750 t_src_class(src[1].File),
751 (!src[1].
752 Negate) ? NEGATE_XYZW : NEGATE_NONE) |
753 (src[1].RelAddr << 4);
754 #endif
755
756 return inst;
757 }
758
759 static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program_code *vp,
760 struct prog_instruction *vpi,
761 GLuint * inst,
762 struct prog_src_register src[3])
763 {
764 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
765
766 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
767 GL_FALSE,
768 GL_FALSE,
769 t_dst_index(vp, &vpi->DstReg),
770 t_dst_mask(vpi->DstReg.WriteMask),
771 t_dst_class(vpi->DstReg.File));
772 inst[1] = t_src(vp, &src[0]);
773 inst[2] = __CONST(0, SWIZZLE_ZERO);
774 inst[3] = __CONST(0, SWIZZLE_ZERO);
775
776 return inst;
777 }
778
779 static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program_code *vp,
780 struct prog_instruction *vpi,
781 GLuint * inst,
782 struct prog_src_register src[3],
783 int *u_temp_i)
784 {
785 /* mul r0, r1.yzxw, r2.zxyw
786 mad r0, -r2.yzxw, r1.zxyw, r0
787 */
788
789 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
790 GL_FALSE,
791 GL_FALSE,
792 *u_temp_i,
793 t_dst_mask(vpi->DstReg.WriteMask),
794 PVS_DST_REG_TEMPORARY);
795 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
796 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
797 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
798 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
799 t_src_class(src[0].File),
800 src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
801 (src[0].RelAddr << 4);
802 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
803 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
804 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
805 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
806 t_src_class(src[1].File),
807 src[1].Negate ? NEGATE_XYZW : NEGATE_NONE) |
808 (src[1].RelAddr << 4);
809 inst[3] = __CONST(1, SWIZZLE_ZERO);
810 inst += 4;
811
812 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
813 GL_FALSE,
814 GL_FALSE,
815 t_dst_index(vp, &vpi->DstReg),
816 t_dst_mask(vpi->DstReg.WriteMask),
817 t_dst_class(vpi->DstReg.File));
818 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y
819 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z
820 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X
821 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W
822 t_src_class(src[1].File),
823 (!src[1].
824 Negate) ? NEGATE_XYZW : NEGATE_NONE) |
825 (src[1].RelAddr << 4);
826 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z
827 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
828 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
829 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
830 t_src_class(src[0].File),
831 src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
832 (src[0].RelAddr << 4);
833 inst[3] =
834 PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
835 PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
836 PVS_SRC_REG_TEMPORARY, NEGATE_NONE);
837
838 (*u_temp_i)--;
839
840 return inst;
841 }
842
843 static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_program * glvp)
844 {
845 int i;
846 int cur_reg;
847 GLuint OutputsWritten, InputsRead;
848
849 OutputsWritten = glvp->OutputsWritten;
850 InputsRead = glvp->InputsRead;
851
852 cur_reg = -1;
853 for (i = 0; i < VERT_ATTRIB_MAX; i++) {
854 if (InputsRead & (1 << i))
855 vp->inputs[i] = ++cur_reg;
856 else
857 vp->inputs[i] = -1;
858 }
859
860 cur_reg = 0;
861 for (i = 0; i < VERT_RESULT_MAX; i++)
862 vp->outputs[i] = -1;
863
864 assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
865
866 if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
867 vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
868 }
869
870 if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
871 vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
872 }
873
874 /* If we're writing back facing colors we need to send
875 * four colors to make front/back face colors selection work.
876 * If the vertex program doesn't write all 4 colors, lets
877 * pretend it does by skipping output index reg so the colors
878 * get written into appropriate output vectors.
879 */
880 if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
881 vp->outputs[VERT_RESULT_COL0] = cur_reg++;
882 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
883 OutputsWritten & (1 << VERT_RESULT_BFC1)) {
884 cur_reg++;
885 }
886
887 if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
888 vp->outputs[VERT_RESULT_COL1] = cur_reg++;
889 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
890 OutputsWritten & (1 << VERT_RESULT_BFC1)) {
891 cur_reg++;
892 }
893
894 if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
895 vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
896 } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
897 cur_reg++;
898 }
899
900 if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
901 vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
902 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
903 cur_reg++;
904 }
905
906 for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
907 if (OutputsWritten & (1 << i)) {
908 vp->outputs[i] = cur_reg++;
909 }
910 }
911
912 if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
913 vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
914 }
915 }
916
917 static GLboolean translate_vertex_program(struct r300_vertex_program_compiler * compiler)
918 {
919 struct prog_instruction *vpi = compiler->program->Instructions;
920 int i;
921 GLuint *inst;
922 unsigned long num_operands;
923 /* Initial value should be last tmp reg that hw supports.
924 Strangely enough r300 doesnt mind even though these would be out of range.
925 Smart enough to realize that it doesnt need it? */
926 int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1;
927 struct prog_src_register src[3];
928 struct r300_vertex_program_code * vp = compiler->code;
929
930 compiler->code->pos_end = 0; /* Not supported yet */
931 compiler->code->length = 0;
932
933 t_inputs_outputs(compiler->code, compiler->program);
934
935 for (inst = compiler->code->body.d; vpi->Opcode != OPCODE_END;
936 vpi++, inst += 4) {
937
938 {
939 int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i;
940 if((compiler->code->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) {
941 fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", compiler->code->num_temporaries, u_temp_used);
942 return GL_FALSE;
943 }
944 u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1;
945 }
946
947 if (!valid_dst(compiler->code, &vpi->DstReg)) {
948 /* redirect result to unused temp */
949 vpi->DstReg.File = PROGRAM_TEMPORARY;
950 vpi->DstReg.Index = u_temp_i;
951 }
952
953 num_operands = _mesa_num_inst_src_regs(vpi->Opcode);
954
955 /* copy the sources (src) from mesa into a local variable... is this needed? */
956 for (i = 0; i < num_operands; i++) {
957 src[i] = vpi->SrcReg[i];
958 }
959
960 if (num_operands == 3) { /* TODO: scalars */
961 if (CMP_SRCS(src[1], src[2])
962 || CMP_SRCS(src[0], src[2])) {
963 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
964 GL_FALSE,
965 GL_FALSE,
966 u_temp_i,
967 WRITEMASK_XYZW,
968 PVS_DST_REG_TEMPORARY);
969 inst[1] =
970 PVS_SRC_OPERAND(t_src_index(compiler->code, &src[2]),
971 SWIZZLE_X,
972 SWIZZLE_Y,
973 SWIZZLE_Z,
974 SWIZZLE_W,
975 t_src_class(src[2].File),
976 NEGATE_NONE) | (src[2].
977 RelAddr <<
978 4);
979 inst[2] = __CONST(2, SWIZZLE_ZERO);
980 inst[3] = __CONST(2, SWIZZLE_ZERO);
981 inst += 4;
982
983 src[2].File = PROGRAM_TEMPORARY;
984 src[2].Index = u_temp_i;
985 src[2].RelAddr = 0;
986 u_temp_i--;
987 }
988 }
989
990 if (num_operands >= 2) {
991 if (CMP_SRCS(src[1], src[0])) {
992 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
993 GL_FALSE,
994 GL_FALSE,
995 u_temp_i,
996 WRITEMASK_XYZW,
997 PVS_DST_REG_TEMPORARY);
998 inst[1] =
999 PVS_SRC_OPERAND(t_src_index(compiler->code, &src[0]),
1000 SWIZZLE_X,
1001 SWIZZLE_Y,
1002 SWIZZLE_Z,
1003 SWIZZLE_W,
1004 t_src_class(src[0].File),
1005 NEGATE_NONE) | (src[0].
1006 RelAddr <<
1007 4);
1008 inst[2] = __CONST(0, SWIZZLE_ZERO);
1009 inst[3] = __CONST(0, SWIZZLE_ZERO);
1010 inst += 4;
1011
1012 src[0].File = PROGRAM_TEMPORARY;
1013 src[0].Index = u_temp_i;
1014 src[0].RelAddr = 0;
1015 u_temp_i--;
1016 }
1017 }
1018
1019 switch (vpi->Opcode) {
1020 case OPCODE_ABS:
1021 inst = r300TranslateOpcodeABS(compiler->code, vpi, inst, src);
1022 break;
1023 case OPCODE_ADD:
1024 inst = r300TranslateOpcodeADD(compiler->code, vpi, inst, src);
1025 break;
1026 case OPCODE_ARL:
1027 inst = r300TranslateOpcodeARL(compiler->code, vpi, inst, src);
1028 break;
1029 case OPCODE_DP3:
1030 inst = r300TranslateOpcodeDP3(compiler->code, vpi, inst, src);
1031 break;
1032 case OPCODE_DP4:
1033 inst = r300TranslateOpcodeDP4(compiler->code, vpi, inst, src);
1034 break;
1035 case OPCODE_DPH:
1036 inst = r300TranslateOpcodeDPH(compiler->code, vpi, inst, src);
1037 break;
1038 case OPCODE_DST:
1039 inst = r300TranslateOpcodeDST(compiler->code, vpi, inst, src);
1040 break;
1041 case OPCODE_EX2:
1042 inst = r300TranslateOpcodeEX2(compiler->code, vpi, inst, src);
1043 break;
1044 case OPCODE_EXP:
1045 inst = r300TranslateOpcodeEXP(compiler->code, vpi, inst, src);
1046 break;
1047 case OPCODE_FLR:
1048 inst = r300TranslateOpcodeFLR(compiler->code, vpi, inst, src, /* FIXME */
1049 &u_temp_i);
1050 break;
1051 case OPCODE_FRC:
1052 inst = r300TranslateOpcodeFRC(compiler->code, vpi, inst, src);
1053 break;
1054 case OPCODE_LG2:
1055 inst = r300TranslateOpcodeLG2(compiler->code, vpi, inst, src);
1056 break;
1057 case OPCODE_LIT:
1058 inst = r300TranslateOpcodeLIT(compiler->code, vpi, inst, src);
1059 break;
1060 case OPCODE_LOG:
1061 inst = r300TranslateOpcodeLOG(compiler->code, vpi, inst, src);
1062 break;
1063 case OPCODE_MAD:
1064 inst = r300TranslateOpcodeMAD(compiler->code, vpi, inst, src);
1065 break;
1066 case OPCODE_MAX:
1067 inst = r300TranslateOpcodeMAX(compiler->code, vpi, inst, src);
1068 break;
1069 case OPCODE_MIN:
1070 inst = r300TranslateOpcodeMIN(compiler->code, vpi, inst, src);
1071 break;
1072 case OPCODE_MOV:
1073 inst = r300TranslateOpcodeMOV(compiler->code, vpi, inst, src);
1074 break;
1075 case OPCODE_MUL:
1076 inst = r300TranslateOpcodeMUL(compiler->code, vpi, inst, src);
1077 break;
1078 case OPCODE_POW:
1079 inst = r300TranslateOpcodePOW(compiler->code, vpi, inst, src);
1080 break;
1081 case OPCODE_RCP:
1082 inst = r300TranslateOpcodeRCP(compiler->code, vpi, inst, src);
1083 break;
1084 case OPCODE_RSQ:
1085 inst = r300TranslateOpcodeRSQ(compiler->code, vpi, inst, src);
1086 break;
1087 case OPCODE_SGE:
1088 inst = r300TranslateOpcodeSGE(compiler->code, vpi, inst, src);
1089 break;
1090 case OPCODE_SLT:
1091 inst = r300TranslateOpcodeSLT(compiler->code, vpi, inst, src);
1092 break;
1093 case OPCODE_SUB:
1094 inst = r300TranslateOpcodeSUB(compiler->code, vpi, inst, src);
1095 break;
1096 case OPCODE_SWZ:
1097 inst = r300TranslateOpcodeSWZ(compiler->code, vpi, inst, src);
1098 break;
1099 case OPCODE_XPD:
1100 inst = r300TranslateOpcodeXPD(compiler->code, vpi, inst, src, /* FIXME */
1101 &u_temp_i);
1102 break;
1103 default:
1104 return GL_FALSE;
1105 }
1106 }
1107
1108 compiler->code->length = (inst - compiler->code->body.d);
1109 if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) {
1110 return GL_FALSE;
1111 }
1112
1113 return GL_TRUE;
1114 }
1115
1116 static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id)
1117 {
1118 struct prog_instruction *vpi;
1119
1120 _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2);
1121
1122 vpi = &prog->Instructions[prog->NumInstructions - 3];
1123
1124 vpi->Opcode = OPCODE_MOV;
1125
1126 vpi->DstReg.File = PROGRAM_OUTPUT;
1127 vpi->DstReg.Index = VERT_RESULT_HPOS;
1128 vpi->DstReg.WriteMask = WRITEMASK_XYZW;
1129 vpi->DstReg.CondMask = COND_TR;
1130
1131 vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
1132 vpi->SrcReg[0].Index = temp_index;
1133 vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
1134
1135 ++vpi;
1136
1137 vpi->Opcode = OPCODE_MOV;
1138
1139 vpi->DstReg.File = PROGRAM_OUTPUT;
1140 vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
1141 vpi->DstReg.WriteMask = WRITEMASK_XYZW;
1142 vpi->DstReg.CondMask = COND_TR;
1143
1144 vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
1145 vpi->SrcReg[0].Index = temp_index;
1146 vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
1147
1148 ++vpi;
1149
1150 vpi->Opcode = OPCODE_END;
1151 }
1152
1153 static void pos_as_texcoord(struct gl_program *prog, int tex_id)
1154 {
1155 struct prog_instruction *vpi;
1156 GLuint tempregi = prog->NumTemporaries;
1157
1158 prog->NumTemporaries++;
1159
1160 for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
1161 if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) {
1162 vpi->DstReg.File = PROGRAM_TEMPORARY;
1163 vpi->DstReg.Index = tempregi;
1164 }
1165 }
1166
1167 insert_wpos(prog, tempregi, tex_id);
1168
1169 prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
1170 }
1171
1172 /**
1173 * The fogcoord attribute is special in that only the first component
1174 * is relevant, and the remaining components are always fixed (when read
1175 * from by the fragment program) to yield an X001 pattern.
1176 *
1177 * We need to enforce this either in the vertex program or in the fragment
1178 * program, and this code chooses not to enforce it in the vertex program.
1179 * This is slightly cheaper, as long as the fragment program does not use
1180 * weird swizzles.
1181 *
1182 * And it seems that usually, weird swizzles are not used, so...
1183 *
1184 * See also the counterpart rewriting for fragment programs.
1185 */
1186 static void fog_as_texcoord(struct gl_program *prog, int tex_id)
1187 {
1188 struct prog_instruction *vpi;
1189
1190 vpi = prog->Instructions;
1191 while (vpi->Opcode != OPCODE_END) {
1192 if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) {
1193 vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
1194 vpi->DstReg.WriteMask = WRITEMASK_X;
1195 }
1196
1197 ++vpi;
1198 }
1199
1200 prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC);
1201 prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
1202 }
1203
1204 static int translateABS(struct gl_program *prog, int pos)
1205 {
1206 struct prog_instruction *inst;
1207
1208 inst = &prog->Instructions[pos];
1209
1210 inst->Opcode = OPCODE_MAX;
1211 inst->SrcReg[1] = inst->SrcReg[0];
1212 inst->SrcReg[1].Negate ^= NEGATE_XYZW;
1213
1214 return 0;
1215 }
1216
1217 static int translateDP3(struct gl_program *prog, int pos)
1218 {
1219 struct prog_instruction *inst;
1220
1221 inst = &prog->Instructions[pos];
1222
1223 inst->Opcode = OPCODE_DP4;
1224 inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
1225
1226 return 0;
1227 }
1228
1229 static int translateDPH(struct gl_program *prog, int pos)
1230 {
1231 struct prog_instruction *inst;
1232
1233 inst = &prog->Instructions[pos];
1234
1235 inst->Opcode = OPCODE_DP4;
1236 inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
1237
1238 return 0;
1239 }
1240
1241 static int translateFLR(struct gl_program *prog, int pos)
1242 {
1243 struct prog_instruction *inst;
1244 struct prog_dst_register dst;
1245 int tmp_idx;
1246
1247 tmp_idx = prog->NumTemporaries++;
1248
1249 _mesa_insert_instructions(prog, pos + 1, 1);
1250
1251 inst = &prog->Instructions[pos];
1252 dst = inst->DstReg;
1253
1254 inst->Opcode = OPCODE_FRC;
1255 inst->DstReg.File = PROGRAM_TEMPORARY;
1256 inst->DstReg.Index = tmp_idx;
1257 ++inst;
1258
1259 inst->Opcode = OPCODE_ADD;
1260 inst->DstReg = dst;
1261 inst->SrcReg[0] = (inst-1)->SrcReg[0];
1262 inst->SrcReg[1].File = PROGRAM_TEMPORARY;
1263 inst->SrcReg[1].Index = tmp_idx;
1264 inst->SrcReg[1].Negate = NEGATE_XYZW;
1265
1266 return 1;
1267 }
1268
1269 static int translateSUB(struct gl_program *prog, int pos)
1270 {
1271 struct prog_instruction *inst;
1272
1273 inst = &prog->Instructions[pos];
1274
1275 inst->Opcode = OPCODE_ADD;
1276 inst->SrcReg[1].Negate ^= NEGATE_XYZW;
1277
1278 return 0;
1279 }
1280
1281 static int translateSWZ(struct gl_program *prog, int pos)
1282 {
1283 prog->Instructions[pos].Opcode = OPCODE_MOV;
1284
1285 return 0;
1286 }
1287
1288 static int translateXPD(struct gl_program *prog, int pos)
1289 {
1290 struct prog_instruction *inst;
1291 int tmp_idx;
1292
1293 tmp_idx = prog->NumTemporaries++;
1294
1295 _mesa_insert_instructions(prog, pos + 1, 1);
1296
1297 inst = &prog->Instructions[pos];
1298
1299 *(inst+1) = *inst;
1300
1301 inst->Opcode = OPCODE_MUL;
1302 inst->DstReg.File = PROGRAM_TEMPORARY;
1303 inst->DstReg.Index = tmp_idx;
1304 inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W);
1305 inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W);
1306 ++inst;
1307
1308 inst->Opcode = OPCODE_MAD;
1309 inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W);
1310 inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W);
1311 inst->SrcReg[1].Negate ^= NEGATE_XYZW;
1312 inst->SrcReg[2].File = PROGRAM_TEMPORARY;
1313 inst->SrcReg[2].Index = tmp_idx;
1314
1315 return 1;
1316 }
1317
1318 static void translateInsts(struct gl_program *prog)
1319 {
1320 struct prog_instruction *inst;
1321 int i;
1322
1323 for (i = 0; i < prog->NumInstructions; ++i) {
1324 inst = &prog->Instructions[i];
1325
1326 switch (inst->Opcode) {
1327 case OPCODE_ABS:
1328 i += translateABS(prog, i);
1329 break;
1330 case OPCODE_DP3:
1331 i += translateDP3(prog, i);
1332 break;
1333 case OPCODE_DPH:
1334 i += translateDPH(prog, i);
1335 break;
1336 case OPCODE_FLR:
1337 i += translateFLR(prog, i);
1338 break;
1339 case OPCODE_SUB:
1340 i += translateSUB(prog, i);
1341 break;
1342 case OPCODE_SWZ:
1343 i += translateSWZ(prog, i);
1344 break;
1345 case OPCODE_XPD:
1346 i += translateXPD(prog, i);
1347 break;
1348 default:
1349 break;
1350 }
1351 }
1352 }
1353
1354 #define ADD_OUTPUT(fp_attr, vp_result) \
1355 do { \
1356 if ((FpReads & (1 << (fp_attr))) && !(compiler->program->OutputsWritten & (1 << (vp_result)))) { \
1357 OutputsAdded |= 1 << (vp_result); \
1358 count++; \
1359 } \
1360 } while (0)
1361
1362 static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
1363 {
1364 GLuint OutputsAdded, FpReads;
1365 int i, count;
1366
1367 OutputsAdded = 0;
1368 count = 0;
1369 FpReads = compiler->state.FpReads;
1370
1371 ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
1372 ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
1373
1374 for (i = 0; i < 7; ++i) {
1375 ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
1376 }
1377
1378 /* Some outputs may be artificially added, to match the inputs of the fragment program.
1379 * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by
1380 * vertex program are undefined, so just use MOV [vertex_result], CONST[0]
1381 */
1382 if (count > 0) {
1383 struct prog_instruction *inst;
1384
1385 _mesa_insert_instructions(compiler->program, compiler->program->NumInstructions - 1, count);
1386 inst = &compiler->program->Instructions[compiler->program->NumInstructions - 1 - count];
1387
1388 for (i = 0; i < VERT_RESULT_MAX; ++i) {
1389 if (OutputsAdded & (1 << i)) {
1390 inst->Opcode = OPCODE_MOV;
1391
1392 inst->DstReg.File = PROGRAM_OUTPUT;
1393 inst->DstReg.Index = i;
1394 inst->DstReg.WriteMask = WRITEMASK_XYZW;
1395 inst->DstReg.CondMask = COND_TR;
1396
1397 inst->SrcReg[0].File = PROGRAM_CONSTANT;
1398 inst->SrcReg[0].Index = 0;
1399 inst->SrcReg[0].Swizzle = SWIZZLE_XYZW;
1400
1401 ++inst;
1402 }
1403 }
1404
1405 compiler->program->OutputsWritten |= OutputsAdded;
1406 }
1407 }
1408
1409 #undef ADD_OUTPUT
1410
1411 static void nqssadceInit(struct nqssadce_state* s)
1412 {
1413 struct r300_vertex_program_compiler * compiler = s->UserData;
1414 GLuint fp_reads;
1415
1416 fp_reads = compiler->state.FpReads;
1417 {
1418 if (fp_reads & FRAG_BIT_COL0) {
1419 s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW;
1420 s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW;
1421 }
1422
1423 if (fp_reads & FRAG_BIT_COL1) {
1424 s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW;
1425 s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW;
1426 }
1427 }
1428
1429 {
1430 int i;
1431 for (i = 0; i < 8; ++i) {
1432 if (fp_reads & FRAG_BIT_TEX(i)) {
1433 s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW;
1434 }
1435 }
1436 }
1437
1438 s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW;
1439 if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ))
1440 s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X;
1441 }
1442
1443 static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
1444 {
1445 (void) opcode;
1446 (void) reg;
1447
1448 return GL_TRUE;
1449 }
1450
1451
1452
1453 GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler, GLcontext * ctx)
1454 {
1455 GLboolean success;
1456
1457 if (compiler->state.WPosAttr != FRAG_ATTRIB_MAX) {
1458 pos_as_texcoord(compiler->program, compiler->state.WPosAttr - FRAG_ATTRIB_TEX0);
1459 }
1460
1461 if (compiler->state.FogAttr != FRAG_ATTRIB_MAX) {
1462 fog_as_texcoord(compiler->program, compiler->state.FogAttr - FRAG_ATTRIB_TEX0);
1463 }
1464
1465 addArtificialOutputs(compiler);
1466
1467 translateInsts(compiler->program);
1468
1469 if (compiler->Base.Debug) {
1470 fprintf(stderr, "Vertex program after native rewrite:\n");
1471 _mesa_print_program(compiler->program);
1472 fflush(stdout);
1473 }
1474
1475 {
1476 struct radeon_nqssadce_descr nqssadce = {
1477 .Init = &nqssadceInit,
1478 .IsNativeSwizzle = &swizzleIsNative,
1479 .BuildSwizzle = NULL
1480 };
1481 radeonNqssaDce(compiler->program, &nqssadce, compiler);
1482
1483 /* We need this step for reusing temporary registers */
1484 _mesa_optimize_program(ctx, compiler->program);
1485
1486 if (compiler->Base.Debug) {
1487 fprintf(stderr, "Vertex program after NQSSADCE:\n");
1488 _mesa_print_program(compiler->program);
1489 fflush(stdout);
1490 }
1491 }
1492
1493 assert(compiler->program->NumInstructions);
1494 {
1495 struct prog_instruction *inst;
1496 int max, i, tmp;
1497
1498 inst = compiler->program->Instructions;
1499 max = -1;
1500 while (inst->Opcode != OPCODE_END) {
1501 tmp = _mesa_num_inst_src_regs(inst->Opcode);
1502 for (i = 0; i < tmp; ++i) {
1503 if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) {
1504 if ((int) inst->SrcReg[i].Index > max) {
1505 max = inst->SrcReg[i].Index;
1506 }
1507 }
1508 }
1509
1510 if (_mesa_num_inst_dst_regs(inst->Opcode)) {
1511 if (inst->DstReg.File == PROGRAM_TEMPORARY) {
1512 if ((int) inst->DstReg.Index > max) {
1513 max = inst->DstReg.Index;
1514 }
1515 }
1516 }
1517 ++inst;
1518 }
1519
1520 /* We actually want highest index of used temporary register,
1521 * not the number of temporaries used.
1522 * These values aren't always the same.
1523 */
1524 compiler->code->num_temporaries = max + 1;
1525 }
1526
1527 success = translate_vertex_program(compiler);
1528
1529 compiler->code->InputsRead = compiler->program->InputsRead;
1530 compiler->code->OutputsWritten = compiler->program->OutputsWritten;
1531
1532 return success;
1533 }