c7fc2617de1498c27cfc408b616f46adedf9aacb
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / r3xx_vertprog.c
1 /*
2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23 #include "radeon_compiler.h"
24
25 #include "../r300_reg.h"
26
27 #include "radeon_nqssadce.h"
28 #include "radeon_program.h"
29 #include "radeon_program_alu.h"
30
31 #include "shader/prog_optimize.h"
32 #include "shader/prog_print.h"
33
34
35 /* TODO: Get rid of t_src_class call */
36 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
37 ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
38 t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
39 (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
40 t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
41
42 /*
43 * Take an already-setup and valid source then swizzle it appropriately to
44 * obtain a constant ZERO or ONE source.
45 */
46 #define __CONST(x, y) \
47 (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \
48 t_swizzle(y), \
49 t_swizzle(y), \
50 t_swizzle(y), \
51 t_swizzle(y), \
52 t_src_class(src[x].File), \
53 NEGATE_NONE) | (src[x].RelAddr << 4))
54
55
56
57
58 static unsigned long t_dst_mask(GLuint mask)
59 {
60 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
61 return mask & WRITEMASK_XYZW;
62 }
63
64 static unsigned long t_dst_class(gl_register_file file)
65 {
66
67 switch (file) {
68 case PROGRAM_TEMPORARY:
69 return PVS_DST_REG_TEMPORARY;
70 case PROGRAM_OUTPUT:
71 return PVS_DST_REG_OUT;
72 case PROGRAM_ADDRESS:
73 return PVS_DST_REG_A0;
74 /*
75 case PROGRAM_INPUT:
76 case PROGRAM_LOCAL_PARAM:
77 case PROGRAM_ENV_PARAM:
78 case PROGRAM_NAMED_PARAM:
79 case PROGRAM_STATE_VAR:
80 case PROGRAM_WRITE_ONLY:
81 case PROGRAM_ADDRESS:
82 */
83 default:
84 fprintf(stderr, "problem in %s", __FUNCTION__);
85 _mesa_exit(-1);
86 return -1;
87 }
88 }
89
90 static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
91 struct prog_dst_register *dst)
92 {
93 if (dst->File == PROGRAM_OUTPUT)
94 return vp->outputs[dst->Index];
95
96 return dst->Index;
97 }
98
99 static unsigned long t_src_class(gl_register_file file)
100 {
101 switch (file) {
102 case PROGRAM_TEMPORARY:
103 return PVS_SRC_REG_TEMPORARY;
104 case PROGRAM_INPUT:
105 return PVS_SRC_REG_INPUT;
106 case PROGRAM_LOCAL_PARAM:
107 case PROGRAM_ENV_PARAM:
108 case PROGRAM_NAMED_PARAM:
109 case PROGRAM_CONSTANT:
110 case PROGRAM_STATE_VAR:
111 return PVS_SRC_REG_CONSTANT;
112 /*
113 case PROGRAM_OUTPUT:
114 case PROGRAM_WRITE_ONLY:
115 case PROGRAM_ADDRESS:
116 */
117 default:
118 fprintf(stderr, "problem in %s", __FUNCTION__);
119 _mesa_exit(-1);
120 return -1;
121 }
122 }
123
124 static INLINE unsigned long t_swizzle(GLubyte swizzle)
125 {
126 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
127 return swizzle;
128 }
129
130 static unsigned long t_src_index(struct r300_vertex_program_code *vp,
131 struct prog_src_register *src)
132 {
133 if (src->File == PROGRAM_INPUT) {
134 assert(vp->inputs[src->Index] != -1);
135 return vp->inputs[src->Index];
136 } else {
137 if (src->Index < 0) {
138 fprintf(stderr,
139 "negative offsets for indirect addressing do not work.\n");
140 return 0;
141 }
142 return src->Index;
143 }
144 }
145
146 /* these two functions should probably be merged... */
147
148 static unsigned long t_src(struct r300_vertex_program_code *vp,
149 struct prog_src_register *src)
150 {
151 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
152 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
153 */
154 return PVS_SRC_OPERAND(t_src_index(vp, src),
155 t_swizzle(GET_SWZ(src->Swizzle, 0)),
156 t_swizzle(GET_SWZ(src->Swizzle, 1)),
157 t_swizzle(GET_SWZ(src->Swizzle, 2)),
158 t_swizzle(GET_SWZ(src->Swizzle, 3)),
159 t_src_class(src->File),
160 src->Negate) | (src->RelAddr << 4);
161 }
162
163 static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
164 struct prog_src_register *src)
165 {
166 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
167 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
168 */
169 return PVS_SRC_OPERAND(t_src_index(vp, src),
170 t_swizzle(GET_SWZ(src->Swizzle, 0)),
171 t_swizzle(GET_SWZ(src->Swizzle, 0)),
172 t_swizzle(GET_SWZ(src->Swizzle, 0)),
173 t_swizzle(GET_SWZ(src->Swizzle, 0)),
174 t_src_class(src->File),
175 src->Negate ? NEGATE_XYZW : NEGATE_NONE) |
176 (src->RelAddr << 4);
177 }
178
179 static GLboolean valid_dst(struct r300_vertex_program_code *vp,
180 struct prog_dst_register *dst)
181 {
182 if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
183 return GL_FALSE;
184 } else if (dst->File == PROGRAM_ADDRESS) {
185 assert(dst->Index == 0);
186 }
187
188 return GL_TRUE;
189 }
190
191 static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program_code *vp,
192 struct prog_instruction *vpi,
193 GLuint * inst,
194 struct prog_src_register src[3])
195 {
196 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
197 GL_FALSE,
198 GL_FALSE,
199 t_dst_index(vp, &vpi->DstReg),
200 t_dst_mask(vpi->DstReg.WriteMask),
201 t_dst_class(vpi->DstReg.File));
202 inst[1] = t_src(vp, &src[0]);
203 inst[2] = t_src(vp, &src[1]);
204 inst[3] = __CONST(1, SWIZZLE_ZERO);
205
206 return inst;
207 }
208
209 static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program_code *vp,
210 struct prog_instruction *vpi,
211 GLuint * inst,
212 struct prog_src_register src[3])
213 {
214 inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX,
215 GL_FALSE,
216 GL_FALSE,
217 t_dst_index(vp, &vpi->DstReg),
218 t_dst_mask(vpi->DstReg.WriteMask),
219 t_dst_class(vpi->DstReg.File));
220 inst[1] = t_src(vp, &src[0]);
221 inst[2] = __CONST(0, SWIZZLE_ZERO);
222 inst[3] = __CONST(0, SWIZZLE_ZERO);
223
224 return inst;
225 }
226
227 static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program_code *vp,
228 struct prog_instruction *vpi,
229 GLuint * inst,
230 struct prog_src_register src[3])
231 {
232 inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
233 GL_FALSE,
234 GL_FALSE,
235 t_dst_index(vp, &vpi->DstReg),
236 t_dst_mask(vpi->DstReg.WriteMask),
237 t_dst_class(vpi->DstReg.File));
238 inst[1] = t_src(vp, &src[0]);
239 inst[2] = t_src(vp, &src[1]);
240 inst[3] = __CONST(1, SWIZZLE_ZERO);
241
242 return inst;
243 }
244
245 static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program_code *vp,
246 struct prog_instruction *vpi,
247 GLuint * inst,
248 struct prog_src_register src[3])
249 {
250 inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR,
251 GL_FALSE,
252 GL_FALSE,
253 t_dst_index(vp, &vpi->DstReg),
254 t_dst_mask(vpi->DstReg.WriteMask),
255 t_dst_class(vpi->DstReg.File));
256 inst[1] = t_src(vp, &src[0]);
257 inst[2] = t_src(vp, &src[1]);
258 inst[3] = __CONST(1, SWIZZLE_ZERO);
259
260 return inst;
261 }
262
263 static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program_code *vp,
264 struct prog_instruction *vpi,
265 GLuint * inst,
266 struct prog_src_register src[3])
267 {
268 inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX,
269 GL_TRUE,
270 GL_FALSE,
271 t_dst_index(vp, &vpi->DstReg),
272 t_dst_mask(vpi->DstReg.WriteMask),
273 t_dst_class(vpi->DstReg.File));
274 inst[1] = t_src_scalar(vp, &src[0]);
275 inst[2] = __CONST(0, SWIZZLE_ZERO);
276 inst[3] = __CONST(0, SWIZZLE_ZERO);
277
278 return inst;
279 }
280
281 static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program_code *vp,
282 struct prog_instruction *vpi,
283 GLuint * inst,
284 struct prog_src_register src[3])
285 {
286 inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX,
287 GL_TRUE,
288 GL_FALSE,
289 t_dst_index(vp, &vpi->DstReg),
290 t_dst_mask(vpi->DstReg.WriteMask),
291 t_dst_class(vpi->DstReg.File));
292 inst[1] = t_src_scalar(vp, &src[0]);
293 inst[2] = __CONST(0, SWIZZLE_ZERO);
294 inst[3] = __CONST(0, SWIZZLE_ZERO);
295
296 return inst;
297 }
298
299 static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program_code *vp,
300 struct prog_instruction *vpi,
301 GLuint * inst,
302 struct prog_src_register src[3])
303 {
304 inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
305 GL_FALSE,
306 GL_FALSE,
307 t_dst_index(vp, &vpi->DstReg),
308 t_dst_mask(vpi->DstReg.WriteMask),
309 t_dst_class(vpi->DstReg.File));
310 inst[1] = t_src(vp, &src[0]);
311 inst[2] = __CONST(0, SWIZZLE_ZERO);
312 inst[3] = __CONST(0, SWIZZLE_ZERO);
313
314 return inst;
315 }
316
317 static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program_code *vp,
318 struct prog_instruction *vpi,
319 GLuint * inst,
320 struct prog_src_register src[3])
321 {
322 // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
323
324 inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX,
325 GL_TRUE,
326 GL_FALSE,
327 t_dst_index(vp, &vpi->DstReg),
328 t_dst_mask(vpi->DstReg.WriteMask),
329 t_dst_class(vpi->DstReg.File));
330 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
331 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
332 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
333 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
334 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
335 t_src_class(src[0].File),
336 src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
337 (src[0].RelAddr << 4);
338 inst[2] = __CONST(0, SWIZZLE_ZERO);
339 inst[3] = __CONST(0, SWIZZLE_ZERO);
340
341 return inst;
342 }
343
344 static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program_code *vp,
345 struct prog_instruction *vpi,
346 GLuint * inst,
347 struct prog_src_register src[3])
348 {
349 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
350
351 inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
352 GL_TRUE,
353 GL_FALSE,
354 t_dst_index(vp, &vpi->DstReg),
355 t_dst_mask(vpi->DstReg.WriteMask),
356 t_dst_class(vpi->DstReg.File));
357 /* NOTE: Users swizzling might not work. */
358 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
359 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
360 PVS_SRC_SELECT_FORCE_0, // Z
361 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
362 t_src_class(src[0].File),
363 src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
364 (src[0].RelAddr << 4);
365 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
366 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
367 PVS_SRC_SELECT_FORCE_0, // Z
368 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
369 t_src_class(src[0].File),
370 src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
371 (src[0].RelAddr << 4);
372 inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
373 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
374 PVS_SRC_SELECT_FORCE_0, // Z
375 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
376 t_src_class(src[0].File),
377 src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
378 (src[0].RelAddr << 4);
379
380 return inst;
381 }
382
383 static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program_code *vp,
384 struct prog_instruction *vpi,
385 GLuint * inst,
386 struct prog_src_register src[3])
387 {
388 inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX,
389 GL_TRUE,
390 GL_FALSE,
391 t_dst_index(vp, &vpi->DstReg),
392 t_dst_mask(vpi->DstReg.WriteMask),
393 t_dst_class(vpi->DstReg.File));
394 inst[1] = t_src_scalar(vp, &src[0]);
395 inst[2] = __CONST(0, SWIZZLE_ZERO);
396 inst[3] = __CONST(0, SWIZZLE_ZERO);
397
398 return inst;
399 }
400
401 static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program_code *vp,
402 struct prog_instruction *vpi,
403 GLuint * inst,
404 struct prog_src_register src[3])
405 {
406 inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
407 GL_FALSE,
408 GL_TRUE,
409 t_dst_index(vp, &vpi->DstReg),
410 t_dst_mask(vpi->DstReg.WriteMask),
411 t_dst_class(vpi->DstReg.File));
412 inst[1] = t_src(vp, &src[0]);
413 inst[2] = t_src(vp, &src[1]);
414 inst[3] = t_src(vp, &src[2]);
415
416 return inst;
417 }
418
419 static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program_code *vp,
420 struct prog_instruction *vpi,
421 GLuint * inst,
422 struct prog_src_register src[3])
423 {
424 inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
425 GL_FALSE,
426 GL_FALSE,
427 t_dst_index(vp, &vpi->DstReg),
428 t_dst_mask(vpi->DstReg.WriteMask),
429 t_dst_class(vpi->DstReg.File));
430 inst[1] = t_src(vp, &src[0]);
431 inst[2] = t_src(vp, &src[1]);
432 inst[3] = __CONST(1, SWIZZLE_ZERO);
433
434 return inst;
435 }
436
437 static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program_code *vp,
438 struct prog_instruction *vpi,
439 GLuint * inst,
440 struct prog_src_register src[3])
441 {
442 inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM,
443 GL_FALSE,
444 GL_FALSE,
445 t_dst_index(vp, &vpi->DstReg),
446 t_dst_mask(vpi->DstReg.WriteMask),
447 t_dst_class(vpi->DstReg.File));
448 inst[1] = t_src(vp, &src[0]);
449 inst[2] = t_src(vp, &src[1]);
450 inst[3] = __CONST(1, SWIZZLE_ZERO);
451
452 return inst;
453 }
454
455 static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program_code *vp,
456 struct prog_instruction *vpi,
457 GLuint * inst,
458 struct prog_src_register src[3])
459 {
460 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
461
462 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
463 GL_FALSE,
464 GL_FALSE,
465 t_dst_index(vp, &vpi->DstReg),
466 t_dst_mask(vpi->DstReg.WriteMask),
467 t_dst_class(vpi->DstReg.File));
468 inst[1] = t_src(vp, &src[0]);
469 inst[2] = __CONST(0, SWIZZLE_ZERO);
470 inst[3] = __CONST(0, SWIZZLE_ZERO);
471
472 return inst;
473 }
474
475 static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program_code *vp,
476 struct prog_instruction *vpi,
477 GLuint * inst,
478 struct prog_src_register src[3])
479 {
480 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY,
481 GL_FALSE,
482 GL_FALSE,
483 t_dst_index(vp, &vpi->DstReg),
484 t_dst_mask(vpi->DstReg.WriteMask),
485 t_dst_class(vpi->DstReg.File));
486 inst[1] = t_src(vp, &src[0]);
487 inst[2] = t_src(vp, &src[1]);
488 inst[3] = __CONST(1, SWIZZLE_ZERO);
489
490 return inst;
491 }
492
493 static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program_code *vp,
494 struct prog_instruction *vpi,
495 GLuint * inst,
496 struct prog_src_register src[3])
497 {
498 inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
499 GL_TRUE,
500 GL_FALSE,
501 t_dst_index(vp, &vpi->DstReg),
502 t_dst_mask(vpi->DstReg.WriteMask),
503 t_dst_class(vpi->DstReg.File));
504 inst[1] = t_src_scalar(vp, &src[0]);
505 inst[2] = __CONST(0, SWIZZLE_ZERO);
506 inst[3] = t_src_scalar(vp, &src[1]);
507
508 return inst;
509 }
510
511 static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program_code *vp,
512 struct prog_instruction *vpi,
513 GLuint * inst,
514 struct prog_src_register src[3])
515 {
516 inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX,
517 GL_TRUE,
518 GL_FALSE,
519 t_dst_index(vp, &vpi->DstReg),
520 t_dst_mask(vpi->DstReg.WriteMask),
521 t_dst_class(vpi->DstReg.File));
522 inst[1] = t_src_scalar(vp, &src[0]);
523 inst[2] = __CONST(0, SWIZZLE_ZERO);
524 inst[3] = __CONST(0, SWIZZLE_ZERO);
525
526 return inst;
527 }
528
529 static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program_code *vp,
530 struct prog_instruction *vpi,
531 GLuint * inst,
532 struct prog_src_register src[3])
533 {
534 inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX,
535 GL_TRUE,
536 GL_FALSE,
537 t_dst_index(vp, &vpi->DstReg),
538 t_dst_mask(vpi->DstReg.WriteMask),
539 t_dst_class(vpi->DstReg.File));
540 inst[1] = t_src_scalar(vp, &src[0]);
541 inst[2] = __CONST(0, SWIZZLE_ZERO);
542 inst[3] = __CONST(0, SWIZZLE_ZERO);
543
544 return inst;
545 }
546
547 static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program_code *vp,
548 struct prog_instruction *vpi,
549 GLuint * inst,
550 struct prog_src_register src[3])
551 {
552 inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL,
553 GL_FALSE,
554 GL_FALSE,
555 t_dst_index(vp, &vpi->DstReg),
556 t_dst_mask(vpi->DstReg.WriteMask),
557 t_dst_class(vpi->DstReg.File));
558 inst[1] = t_src(vp, &src[0]);
559 inst[2] = t_src(vp, &src[1]);
560 inst[3] = __CONST(1, SWIZZLE_ZERO);
561
562 return inst;
563 }
564
565 static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program_code *vp,
566 struct prog_instruction *vpi,
567 GLuint * inst,
568 struct prog_src_register src[3])
569 {
570 inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN,
571 GL_FALSE,
572 GL_FALSE,
573 t_dst_index(vp, &vpi->DstReg),
574 t_dst_mask(vpi->DstReg.WriteMask),
575 t_dst_class(vpi->DstReg.File));
576 inst[1] = t_src(vp, &src[0]);
577 inst[2] = t_src(vp, &src[1]);
578 inst[3] = __CONST(1, SWIZZLE_ZERO);
579
580 return inst;
581 }
582
583 static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_program * glvp)
584 {
585 int i;
586 int cur_reg;
587 GLuint OutputsWritten, InputsRead;
588
589 OutputsWritten = glvp->OutputsWritten;
590 InputsRead = glvp->InputsRead;
591
592 cur_reg = -1;
593 for (i = 0; i < VERT_ATTRIB_MAX; i++) {
594 if (InputsRead & (1 << i))
595 vp->inputs[i] = ++cur_reg;
596 else
597 vp->inputs[i] = -1;
598 }
599
600 cur_reg = 0;
601 for (i = 0; i < VERT_RESULT_MAX; i++)
602 vp->outputs[i] = -1;
603
604 assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
605
606 if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
607 vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
608 }
609
610 if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
611 vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
612 }
613
614 /* If we're writing back facing colors we need to send
615 * four colors to make front/back face colors selection work.
616 * If the vertex program doesn't write all 4 colors, lets
617 * pretend it does by skipping output index reg so the colors
618 * get written into appropriate output vectors.
619 */
620 if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
621 vp->outputs[VERT_RESULT_COL0] = cur_reg++;
622 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
623 OutputsWritten & (1 << VERT_RESULT_BFC1)) {
624 cur_reg++;
625 }
626
627 if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
628 vp->outputs[VERT_RESULT_COL1] = cur_reg++;
629 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
630 OutputsWritten & (1 << VERT_RESULT_BFC1)) {
631 cur_reg++;
632 }
633
634 if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
635 vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
636 } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
637 cur_reg++;
638 }
639
640 if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
641 vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
642 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
643 cur_reg++;
644 }
645
646 for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
647 if (OutputsWritten & (1 << i)) {
648 vp->outputs[i] = cur_reg++;
649 }
650 }
651
652 if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
653 vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
654 }
655 }
656
657 static GLboolean translate_vertex_program(struct r300_vertex_program_compiler * compiler)
658 {
659 struct prog_instruction *vpi = compiler->program->Instructions;
660 int i;
661 GLuint *inst;
662 unsigned long num_operands;
663 /* Initial value should be last tmp reg that hw supports.
664 Strangely enough r300 doesnt mind even though these would be out of range.
665 Smart enough to realize that it doesnt need it? */
666 int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1;
667 struct prog_src_register src[3];
668 struct r300_vertex_program_code * vp = compiler->code;
669
670 compiler->code->pos_end = 0; /* Not supported yet */
671 compiler->code->length = 0;
672
673 t_inputs_outputs(compiler->code, compiler->program);
674
675 for (inst = compiler->code->body.d; vpi->Opcode != OPCODE_END;
676 vpi++, inst += 4) {
677
678 {
679 int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i;
680 if((compiler->code->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) {
681 fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", compiler->code->num_temporaries, u_temp_used);
682 return GL_FALSE;
683 }
684 u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1;
685 }
686
687 if (!valid_dst(compiler->code, &vpi->DstReg)) {
688 /* redirect result to unused temp */
689 vpi->DstReg.File = PROGRAM_TEMPORARY;
690 vpi->DstReg.Index = u_temp_i;
691 }
692
693 num_operands = _mesa_num_inst_src_regs(vpi->Opcode);
694
695 /* copy the sources (src) from mesa into a local variable... is this needed? */
696 for (i = 0; i < num_operands; i++) {
697 src[i] = vpi->SrcReg[i];
698 }
699
700 if (num_operands == 3) { /* TODO: scalars */
701 if (CMP_SRCS(src[1], src[2])
702 || CMP_SRCS(src[0], src[2])) {
703 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
704 GL_FALSE,
705 GL_FALSE,
706 u_temp_i,
707 WRITEMASK_XYZW,
708 PVS_DST_REG_TEMPORARY);
709 inst[1] =
710 PVS_SRC_OPERAND(t_src_index(compiler->code, &src[2]),
711 SWIZZLE_X,
712 SWIZZLE_Y,
713 SWIZZLE_Z,
714 SWIZZLE_W,
715 t_src_class(src[2].File),
716 NEGATE_NONE) | (src[2].
717 RelAddr <<
718 4);
719 inst[2] = __CONST(2, SWIZZLE_ZERO);
720 inst[3] = __CONST(2, SWIZZLE_ZERO);
721 inst += 4;
722
723 src[2].File = PROGRAM_TEMPORARY;
724 src[2].Index = u_temp_i;
725 src[2].RelAddr = 0;
726 u_temp_i--;
727 }
728 }
729
730 if (num_operands >= 2) {
731 if (CMP_SRCS(src[1], src[0])) {
732 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
733 GL_FALSE,
734 GL_FALSE,
735 u_temp_i,
736 WRITEMASK_XYZW,
737 PVS_DST_REG_TEMPORARY);
738 inst[1] =
739 PVS_SRC_OPERAND(t_src_index(compiler->code, &src[0]),
740 SWIZZLE_X,
741 SWIZZLE_Y,
742 SWIZZLE_Z,
743 SWIZZLE_W,
744 t_src_class(src[0].File),
745 NEGATE_NONE) | (src[0].
746 RelAddr <<
747 4);
748 inst[2] = __CONST(0, SWIZZLE_ZERO);
749 inst[3] = __CONST(0, SWIZZLE_ZERO);
750 inst += 4;
751
752 src[0].File = PROGRAM_TEMPORARY;
753 src[0].Index = u_temp_i;
754 src[0].RelAddr = 0;
755 u_temp_i--;
756 }
757 }
758
759 switch (vpi->Opcode) {
760 case OPCODE_ADD:
761 inst = r300TranslateOpcodeADD(compiler->code, vpi, inst, src);
762 break;
763 case OPCODE_ARL:
764 inst = r300TranslateOpcodeARL(compiler->code, vpi, inst, src);
765 break;
766 case OPCODE_DP4:
767 inst = r300TranslateOpcodeDP4(compiler->code, vpi, inst, src);
768 break;
769 case OPCODE_DST:
770 inst = r300TranslateOpcodeDST(compiler->code, vpi, inst, src);
771 break;
772 case OPCODE_EX2:
773 inst = r300TranslateOpcodeEX2(compiler->code, vpi, inst, src);
774 break;
775 case OPCODE_EXP:
776 inst = r300TranslateOpcodeEXP(compiler->code, vpi, inst, src);
777 break;
778 case OPCODE_FRC:
779 inst = r300TranslateOpcodeFRC(compiler->code, vpi, inst, src);
780 break;
781 case OPCODE_LG2:
782 inst = r300TranslateOpcodeLG2(compiler->code, vpi, inst, src);
783 break;
784 case OPCODE_LIT:
785 inst = r300TranslateOpcodeLIT(compiler->code, vpi, inst, src);
786 break;
787 case OPCODE_LOG:
788 inst = r300TranslateOpcodeLOG(compiler->code, vpi, inst, src);
789 break;
790 case OPCODE_MAD:
791 inst = r300TranslateOpcodeMAD(compiler->code, vpi, inst, src);
792 break;
793 case OPCODE_MAX:
794 inst = r300TranslateOpcodeMAX(compiler->code, vpi, inst, src);
795 break;
796 case OPCODE_MIN:
797 inst = r300TranslateOpcodeMIN(compiler->code, vpi, inst, src);
798 break;
799 case OPCODE_MOV:
800 inst = r300TranslateOpcodeMOV(compiler->code, vpi, inst, src);
801 break;
802 case OPCODE_MUL:
803 inst = r300TranslateOpcodeMUL(compiler->code, vpi, inst, src);
804 break;
805 case OPCODE_POW:
806 inst = r300TranslateOpcodePOW(compiler->code, vpi, inst, src);
807 break;
808 case OPCODE_RCP:
809 inst = r300TranslateOpcodeRCP(compiler->code, vpi, inst, src);
810 break;
811 case OPCODE_RSQ:
812 inst = r300TranslateOpcodeRSQ(compiler->code, vpi, inst, src);
813 break;
814 case OPCODE_SGE:
815 inst = r300TranslateOpcodeSGE(compiler->code, vpi, inst, src);
816 break;
817 case OPCODE_SLT:
818 inst = r300TranslateOpcodeSLT(compiler->code, vpi, inst, src);
819 break;
820 default:
821 return GL_FALSE;
822 }
823 }
824
825 compiler->code->length = (inst - compiler->code->body.d);
826 if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) {
827 return GL_FALSE;
828 }
829
830 return GL_TRUE;
831 }
832
833 static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id)
834 {
835 struct prog_instruction *vpi;
836
837 _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2);
838
839 vpi = &prog->Instructions[prog->NumInstructions - 3];
840
841 vpi->Opcode = OPCODE_MOV;
842
843 vpi->DstReg.File = PROGRAM_OUTPUT;
844 vpi->DstReg.Index = VERT_RESULT_HPOS;
845 vpi->DstReg.WriteMask = WRITEMASK_XYZW;
846 vpi->DstReg.CondMask = COND_TR;
847
848 vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
849 vpi->SrcReg[0].Index = temp_index;
850 vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
851
852 ++vpi;
853
854 vpi->Opcode = OPCODE_MOV;
855
856 vpi->DstReg.File = PROGRAM_OUTPUT;
857 vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
858 vpi->DstReg.WriteMask = WRITEMASK_XYZW;
859 vpi->DstReg.CondMask = COND_TR;
860
861 vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
862 vpi->SrcReg[0].Index = temp_index;
863 vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
864
865 ++vpi;
866
867 vpi->Opcode = OPCODE_END;
868 }
869
870 static void pos_as_texcoord(struct gl_program *prog, int tex_id)
871 {
872 struct prog_instruction *vpi;
873 GLuint tempregi = prog->NumTemporaries;
874
875 prog->NumTemporaries++;
876
877 for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
878 if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) {
879 vpi->DstReg.File = PROGRAM_TEMPORARY;
880 vpi->DstReg.Index = tempregi;
881 }
882 }
883
884 insert_wpos(prog, tempregi, tex_id);
885
886 prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
887 }
888
889 /**
890 * The fogcoord attribute is special in that only the first component
891 * is relevant, and the remaining components are always fixed (when read
892 * from by the fragment program) to yield an X001 pattern.
893 *
894 * We need to enforce this either in the vertex program or in the fragment
895 * program, and this code chooses not to enforce it in the vertex program.
896 * This is slightly cheaper, as long as the fragment program does not use
897 * weird swizzles.
898 *
899 * And it seems that usually, weird swizzles are not used, so...
900 *
901 * See also the counterpart rewriting for fragment programs.
902 */
903 static void fog_as_texcoord(struct gl_program *prog, int tex_id)
904 {
905 struct prog_instruction *vpi;
906
907 vpi = prog->Instructions;
908 while (vpi->Opcode != OPCODE_END) {
909 if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) {
910 vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
911 vpi->DstReg.WriteMask = WRITEMASK_X;
912 }
913
914 ++vpi;
915 }
916
917 prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC);
918 prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
919 }
920
921
922 #define ADD_OUTPUT(fp_attr, vp_result) \
923 do { \
924 if ((FpReads & (1 << (fp_attr))) && !(compiler->program->OutputsWritten & (1 << (vp_result)))) { \
925 OutputsAdded |= 1 << (vp_result); \
926 count++; \
927 } \
928 } while (0)
929
930 static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
931 {
932 GLuint OutputsAdded, FpReads;
933 int i, count;
934
935 OutputsAdded = 0;
936 count = 0;
937 FpReads = compiler->state.FpReads;
938
939 ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
940 ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
941
942 for (i = 0; i < 7; ++i) {
943 ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
944 }
945
946 /* Some outputs may be artificially added, to match the inputs of the fragment program.
947 * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by
948 * vertex program are undefined, so just use MOV [vertex_result], CONST[0]
949 */
950 if (count > 0) {
951 struct prog_instruction *inst;
952
953 _mesa_insert_instructions(compiler->program, compiler->program->NumInstructions - 1, count);
954 inst = &compiler->program->Instructions[compiler->program->NumInstructions - 1 - count];
955
956 for (i = 0; i < VERT_RESULT_MAX; ++i) {
957 if (OutputsAdded & (1 << i)) {
958 inst->Opcode = OPCODE_MOV;
959
960 inst->DstReg.File = PROGRAM_OUTPUT;
961 inst->DstReg.Index = i;
962 inst->DstReg.WriteMask = WRITEMASK_XYZW;
963 inst->DstReg.CondMask = COND_TR;
964
965 inst->SrcReg[0].File = PROGRAM_CONSTANT;
966 inst->SrcReg[0].Index = 0;
967 inst->SrcReg[0].Swizzle = SWIZZLE_XYZW;
968
969 ++inst;
970 }
971 }
972
973 compiler->program->OutputsWritten |= OutputsAdded;
974 }
975 }
976
977 #undef ADD_OUTPUT
978
979 static void nqssadceInit(struct nqssadce_state* s)
980 {
981 struct r300_vertex_program_compiler * compiler = s->UserData;
982 GLuint fp_reads;
983
984 fp_reads = compiler->state.FpReads;
985 {
986 if (fp_reads & FRAG_BIT_COL0) {
987 s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW;
988 s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW;
989 }
990
991 if (fp_reads & FRAG_BIT_COL1) {
992 s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW;
993 s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW;
994 }
995 }
996
997 {
998 int i;
999 for (i = 0; i < 8; ++i) {
1000 if (fp_reads & FRAG_BIT_TEX(i)) {
1001 s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW;
1002 }
1003 }
1004 }
1005
1006 s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW;
1007 if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ))
1008 s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X;
1009 }
1010
1011 static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
1012 {
1013 (void) opcode;
1014 (void) reg;
1015
1016 return GL_TRUE;
1017 }
1018
1019
1020
1021 GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler, GLcontext * ctx)
1022 {
1023 GLboolean success;
1024
1025 if (compiler->state.WPosAttr != FRAG_ATTRIB_MAX) {
1026 pos_as_texcoord(compiler->program, compiler->state.WPosAttr - FRAG_ATTRIB_TEX0);
1027 }
1028
1029 if (compiler->state.FogAttr != FRAG_ATTRIB_MAX) {
1030 fog_as_texcoord(compiler->program, compiler->state.FogAttr - FRAG_ATTRIB_TEX0);
1031 }
1032
1033 addArtificialOutputs(compiler);
1034
1035 {
1036 struct radeon_program_transformation transformations[] = {
1037 { &r300_transform_vertex_alu, 0 },
1038 };
1039 radeonLocalTransform(compiler->program, 1, transformations);
1040 }
1041
1042 if (compiler->Base.Debug) {
1043 fprintf(stderr, "Vertex program after native rewrite:\n");
1044 _mesa_print_program(compiler->program);
1045 fflush(stdout);
1046 }
1047
1048 {
1049 struct radeon_nqssadce_descr nqssadce = {
1050 .Init = &nqssadceInit,
1051 .IsNativeSwizzle = &swizzleIsNative,
1052 .BuildSwizzle = NULL
1053 };
1054 radeonNqssaDce(compiler->program, &nqssadce, compiler);
1055
1056 /* We need this step for reusing temporary registers */
1057 _mesa_optimize_program(ctx, compiler->program);
1058
1059 if (compiler->Base.Debug) {
1060 fprintf(stderr, "Vertex program after NQSSADCE:\n");
1061 _mesa_print_program(compiler->program);
1062 fflush(stdout);
1063 }
1064 }
1065
1066 assert(compiler->program->NumInstructions);
1067 {
1068 struct prog_instruction *inst;
1069 int max, i, tmp;
1070
1071 inst = compiler->program->Instructions;
1072 max = -1;
1073 while (inst->Opcode != OPCODE_END) {
1074 tmp = _mesa_num_inst_src_regs(inst->Opcode);
1075 for (i = 0; i < tmp; ++i) {
1076 if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) {
1077 if ((int) inst->SrcReg[i].Index > max) {
1078 max = inst->SrcReg[i].Index;
1079 }
1080 }
1081 }
1082
1083 if (_mesa_num_inst_dst_regs(inst->Opcode)) {
1084 if (inst->DstReg.File == PROGRAM_TEMPORARY) {
1085 if ((int) inst->DstReg.Index > max) {
1086 max = inst->DstReg.Index;
1087 }
1088 }
1089 }
1090 ++inst;
1091 }
1092
1093 /* We actually want highest index of used temporary register,
1094 * not the number of temporaries used.
1095 * These values aren't always the same.
1096 */
1097 compiler->code->num_temporaries = max + 1;
1098 }
1099
1100 success = translate_vertex_program(compiler);
1101
1102 compiler->code->InputsRead = compiler->program->InputsRead;
1103 compiler->code->OutputsWritten = compiler->program->OutputsWritten;
1104
1105 return success;
1106 }