ff6575b303c4b39f24c482d771a6ebb149c3bf10
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / r3xx_vertprog.c
1 /*
2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23 #include "radeon_compiler.h"
24
25 #include "../r300_reg.h"
26
27 #include "radeon_nqssadce.h"
28 #include "radeon_program.h"
29 #include "radeon_program_alu.h"
30
31 #include "shader/prog_optimize.h"
32 #include "shader/prog_print.h"
33
34
35 /* TODO: Get rid of t_src_class call */
36 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
37 ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
38 t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
39 (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
40 t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
41
42 /*
43 * Take an already-setup and valid source then swizzle it appropriately to
44 * obtain a constant ZERO or ONE source.
45 */
46 #define __CONST(x, y) \
47 (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \
48 t_swizzle(y), \
49 t_swizzle(y), \
50 t_swizzle(y), \
51 t_swizzle(y), \
52 t_src_class(src[x].File), \
53 NEGATE_NONE) | (src[x].RelAddr << 4))
54
55
56
57
58 static unsigned long t_dst_mask(GLuint mask)
59 {
60 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
61 return mask & WRITEMASK_XYZW;
62 }
63
64 static unsigned long t_dst_class(gl_register_file file)
65 {
66
67 switch (file) {
68 case PROGRAM_TEMPORARY:
69 return PVS_DST_REG_TEMPORARY;
70 case PROGRAM_OUTPUT:
71 return PVS_DST_REG_OUT;
72 case PROGRAM_ADDRESS:
73 return PVS_DST_REG_A0;
74 /*
75 case PROGRAM_INPUT:
76 case PROGRAM_LOCAL_PARAM:
77 case PROGRAM_ENV_PARAM:
78 case PROGRAM_NAMED_PARAM:
79 case PROGRAM_STATE_VAR:
80 case PROGRAM_WRITE_ONLY:
81 case PROGRAM_ADDRESS:
82 */
83 default:
84 fprintf(stderr, "problem in %s", __FUNCTION__);
85 _mesa_exit(-1);
86 return -1;
87 }
88 }
89
90 static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
91 struct prog_dst_register *dst)
92 {
93 if (dst->File == PROGRAM_OUTPUT)
94 return vp->outputs[dst->Index];
95
96 return dst->Index;
97 }
98
99 static unsigned long t_src_class(gl_register_file file)
100 {
101 switch (file) {
102 case PROGRAM_TEMPORARY:
103 return PVS_SRC_REG_TEMPORARY;
104 case PROGRAM_INPUT:
105 return PVS_SRC_REG_INPUT;
106 case PROGRAM_LOCAL_PARAM:
107 case PROGRAM_ENV_PARAM:
108 case PROGRAM_NAMED_PARAM:
109 case PROGRAM_CONSTANT:
110 case PROGRAM_STATE_VAR:
111 return PVS_SRC_REG_CONSTANT;
112 /*
113 case PROGRAM_OUTPUT:
114 case PROGRAM_WRITE_ONLY:
115 case PROGRAM_ADDRESS:
116 */
117 default:
118 fprintf(stderr, "problem in %s", __FUNCTION__);
119 _mesa_exit(-1);
120 return -1;
121 }
122 }
123
124 static INLINE unsigned long t_swizzle(GLubyte swizzle)
125 {
126 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
127 return swizzle;
128 }
129
130 static unsigned long t_src_index(struct r300_vertex_program_code *vp,
131 struct prog_src_register *src)
132 {
133 if (src->File == PROGRAM_INPUT) {
134 assert(vp->inputs[src->Index] != -1);
135 return vp->inputs[src->Index];
136 } else {
137 if (src->Index < 0) {
138 fprintf(stderr,
139 "negative offsets for indirect addressing do not work.\n");
140 return 0;
141 }
142 return src->Index;
143 }
144 }
145
146 /* these two functions should probably be merged... */
147
148 static unsigned long t_src(struct r300_vertex_program_code *vp,
149 struct prog_src_register *src)
150 {
151 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
152 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
153 */
154 return PVS_SRC_OPERAND(t_src_index(vp, src),
155 t_swizzle(GET_SWZ(src->Swizzle, 0)),
156 t_swizzle(GET_SWZ(src->Swizzle, 1)),
157 t_swizzle(GET_SWZ(src->Swizzle, 2)),
158 t_swizzle(GET_SWZ(src->Swizzle, 3)),
159 t_src_class(src->File),
160 src->Negate) | (src->RelAddr << 4);
161 }
162
163 static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
164 struct prog_src_register *src)
165 {
166 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
167 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
168 */
169 return PVS_SRC_OPERAND(t_src_index(vp, src),
170 t_swizzle(GET_SWZ(src->Swizzle, 0)),
171 t_swizzle(GET_SWZ(src->Swizzle, 0)),
172 t_swizzle(GET_SWZ(src->Swizzle, 0)),
173 t_swizzle(GET_SWZ(src->Swizzle, 0)),
174 t_src_class(src->File),
175 src->Negate ? NEGATE_XYZW : NEGATE_NONE) |
176 (src->RelAddr << 4);
177 }
178
179 static GLboolean valid_dst(struct r300_vertex_program_code *vp,
180 struct prog_dst_register *dst)
181 {
182 if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
183 return GL_FALSE;
184 } else if (dst->File == PROGRAM_ADDRESS) {
185 assert(dst->Index == 0);
186 }
187
188 return GL_TRUE;
189 }
190
191 static GLuint * ei_vector1(struct r300_vertex_program_code *vp,
192 GLuint hw_opcode,
193 struct prog_instruction *vpi,
194 GLuint * inst,
195 struct prog_src_register src[3])
196 {
197 inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
198 GL_FALSE,
199 GL_FALSE,
200 t_dst_index(vp, &vpi->DstReg),
201 t_dst_mask(vpi->DstReg.WriteMask),
202 t_dst_class(vpi->DstReg.File));
203 inst[1] = t_src(vp, &src[0]);
204 inst[2] = __CONST(0, SWIZZLE_ZERO);
205 inst[3] = __CONST(0, SWIZZLE_ZERO);
206
207 return inst;
208 }
209
210 static GLuint * ei_vector2(struct r300_vertex_program_code *vp,
211 GLuint hw_opcode,
212 struct prog_instruction *vpi,
213 GLuint * inst,
214 struct prog_src_register src[3])
215 {
216 inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
217 GL_FALSE,
218 GL_FALSE,
219 t_dst_index(vp, &vpi->DstReg),
220 t_dst_mask(vpi->DstReg.WriteMask),
221 t_dst_class(vpi->DstReg.File));
222 inst[1] = t_src(vp, &src[0]);
223 inst[2] = t_src(vp, &src[1]);
224 inst[3] = __CONST(1, SWIZZLE_ZERO);
225
226 return inst;
227 }
228
229 static GLuint *ei_math1(struct r300_vertex_program_code *vp,
230 GLuint hw_opcode,
231 struct prog_instruction *vpi,
232 GLuint * inst,
233 struct prog_src_register src[3])
234 {
235 inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
236 GL_TRUE,
237 GL_FALSE,
238 t_dst_index(vp, &vpi->DstReg),
239 t_dst_mask(vpi->DstReg.WriteMask),
240 t_dst_class(vpi->DstReg.File));
241 inst[1] = t_src_scalar(vp, &src[0]);
242 inst[2] = __CONST(0, SWIZZLE_ZERO);
243 inst[3] = __CONST(0, SWIZZLE_ZERO);
244
245 return inst;
246 }
247
248 static GLuint *ei_lit(struct r300_vertex_program_code *vp,
249 struct prog_instruction *vpi,
250 GLuint * inst,
251 struct prog_src_register src[3])
252 {
253 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
254
255 inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
256 GL_TRUE,
257 GL_FALSE,
258 t_dst_index(vp, &vpi->DstReg),
259 t_dst_mask(vpi->DstReg.WriteMask),
260 t_dst_class(vpi->DstReg.File));
261 /* NOTE: Users swizzling might not work. */
262 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
263 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
264 PVS_SRC_SELECT_FORCE_0, // Z
265 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
266 t_src_class(src[0].File),
267 src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
268 (src[0].RelAddr << 4);
269 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
270 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
271 PVS_SRC_SELECT_FORCE_0, // Z
272 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
273 t_src_class(src[0].File),
274 src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
275 (src[0].RelAddr << 4);
276 inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y
277 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X
278 PVS_SRC_SELECT_FORCE_0, // Z
279 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W
280 t_src_class(src[0].File),
281 src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
282 (src[0].RelAddr << 4);
283
284 return inst;
285 }
286
287 static GLuint *ei_mad(struct r300_vertex_program_code *vp,
288 struct prog_instruction *vpi,
289 GLuint * inst,
290 struct prog_src_register src[3])
291 {
292 inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
293 GL_FALSE,
294 GL_TRUE,
295 t_dst_index(vp, &vpi->DstReg),
296 t_dst_mask(vpi->DstReg.WriteMask),
297 t_dst_class(vpi->DstReg.File));
298 inst[1] = t_src(vp, &src[0]);
299 inst[2] = t_src(vp, &src[1]);
300 inst[3] = t_src(vp, &src[2]);
301
302 return inst;
303 }
304
305 static GLuint *ei_pow(struct r300_vertex_program_code *vp,
306 struct prog_instruction *vpi,
307 GLuint * inst,
308 struct prog_src_register src[3])
309 {
310 inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
311 GL_TRUE,
312 GL_FALSE,
313 t_dst_index(vp, &vpi->DstReg),
314 t_dst_mask(vpi->DstReg.WriteMask),
315 t_dst_class(vpi->DstReg.File));
316 inst[1] = t_src_scalar(vp, &src[0]);
317 inst[2] = __CONST(0, SWIZZLE_ZERO);
318 inst[3] = t_src_scalar(vp, &src[1]);
319
320 return inst;
321 }
322
323 static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_program * glvp)
324 {
325 int i;
326 int cur_reg;
327 GLuint OutputsWritten, InputsRead;
328
329 OutputsWritten = glvp->OutputsWritten;
330 InputsRead = glvp->InputsRead;
331
332 cur_reg = -1;
333 for (i = 0; i < VERT_ATTRIB_MAX; i++) {
334 if (InputsRead & (1 << i))
335 vp->inputs[i] = ++cur_reg;
336 else
337 vp->inputs[i] = -1;
338 }
339
340 cur_reg = 0;
341 for (i = 0; i < VERT_RESULT_MAX; i++)
342 vp->outputs[i] = -1;
343
344 assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
345
346 if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
347 vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
348 }
349
350 if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
351 vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
352 }
353
354 /* If we're writing back facing colors we need to send
355 * four colors to make front/back face colors selection work.
356 * If the vertex program doesn't write all 4 colors, lets
357 * pretend it does by skipping output index reg so the colors
358 * get written into appropriate output vectors.
359 */
360 if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
361 vp->outputs[VERT_RESULT_COL0] = cur_reg++;
362 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
363 OutputsWritten & (1 << VERT_RESULT_BFC1)) {
364 cur_reg++;
365 }
366
367 if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
368 vp->outputs[VERT_RESULT_COL1] = cur_reg++;
369 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
370 OutputsWritten & (1 << VERT_RESULT_BFC1)) {
371 cur_reg++;
372 }
373
374 if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
375 vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
376 } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
377 cur_reg++;
378 }
379
380 if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
381 vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
382 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
383 cur_reg++;
384 }
385
386 for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
387 if (OutputsWritten & (1 << i)) {
388 vp->outputs[i] = cur_reg++;
389 }
390 }
391
392 if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
393 vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
394 }
395 }
396
397 static GLboolean translate_vertex_program(struct r300_vertex_program_compiler * compiler)
398 {
399 struct prog_instruction *vpi = compiler->program->Instructions;
400 int i;
401 GLuint *inst;
402 unsigned long num_operands;
403 /* Initial value should be last tmp reg that hw supports.
404 Strangely enough r300 doesnt mind even though these would be out of range.
405 Smart enough to realize that it doesnt need it? */
406 int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1;
407 struct prog_src_register src[3];
408 struct r300_vertex_program_code * vp = compiler->code;
409
410 compiler->code->pos_end = 0; /* Not supported yet */
411 compiler->code->length = 0;
412
413 t_inputs_outputs(compiler->code, compiler->program);
414
415 for (inst = compiler->code->body.d; vpi->Opcode != OPCODE_END;
416 vpi++, inst += 4) {
417
418 {
419 int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i;
420 if((compiler->code->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) {
421 fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", compiler->code->num_temporaries, u_temp_used);
422 return GL_FALSE;
423 }
424 u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1;
425 }
426
427 if (!valid_dst(compiler->code, &vpi->DstReg)) {
428 /* redirect result to unused temp */
429 vpi->DstReg.File = PROGRAM_TEMPORARY;
430 vpi->DstReg.Index = u_temp_i;
431 }
432
433 num_operands = _mesa_num_inst_src_regs(vpi->Opcode);
434
435 /* copy the sources (src) from mesa into a local variable... is this needed? */
436 for (i = 0; i < num_operands; i++) {
437 src[i] = vpi->SrcReg[i];
438 }
439
440 if (num_operands == 3) { /* TODO: scalars */
441 if (CMP_SRCS(src[1], src[2])
442 || CMP_SRCS(src[0], src[2])) {
443 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
444 GL_FALSE,
445 GL_FALSE,
446 u_temp_i,
447 WRITEMASK_XYZW,
448 PVS_DST_REG_TEMPORARY);
449 inst[1] =
450 PVS_SRC_OPERAND(t_src_index(compiler->code, &src[2]),
451 SWIZZLE_X,
452 SWIZZLE_Y,
453 SWIZZLE_Z,
454 SWIZZLE_W,
455 t_src_class(src[2].File),
456 NEGATE_NONE) | (src[2].
457 RelAddr <<
458 4);
459 inst[2] = __CONST(2, SWIZZLE_ZERO);
460 inst[3] = __CONST(2, SWIZZLE_ZERO);
461 inst += 4;
462
463 src[2].File = PROGRAM_TEMPORARY;
464 src[2].Index = u_temp_i;
465 src[2].RelAddr = 0;
466 u_temp_i--;
467 }
468 }
469
470 if (num_operands >= 2) {
471 if (CMP_SRCS(src[1], src[0])) {
472 inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
473 GL_FALSE,
474 GL_FALSE,
475 u_temp_i,
476 WRITEMASK_XYZW,
477 PVS_DST_REG_TEMPORARY);
478 inst[1] =
479 PVS_SRC_OPERAND(t_src_index(compiler->code, &src[0]),
480 SWIZZLE_X,
481 SWIZZLE_Y,
482 SWIZZLE_Z,
483 SWIZZLE_W,
484 t_src_class(src[0].File),
485 NEGATE_NONE) | (src[0].
486 RelAddr <<
487 4);
488 inst[2] = __CONST(0, SWIZZLE_ZERO);
489 inst[3] = __CONST(0, SWIZZLE_ZERO);
490 inst += 4;
491
492 src[0].File = PROGRAM_TEMPORARY;
493 src[0].Index = u_temp_i;
494 src[0].RelAddr = 0;
495 u_temp_i--;
496 }
497 }
498
499 switch (vpi->Opcode) {
500 case OPCODE_ADD: inst = ei_vector2(compiler->code, VE_ADD, vpi, inst, src); break;
501 case OPCODE_ARL: inst = ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst, src); break;
502 case OPCODE_DP4: inst = ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst, src); break;
503 case OPCODE_DST: inst = ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst, src); break;
504 case OPCODE_EX2: inst = ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst, src); break;
505 case OPCODE_EXP: inst = ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst, src); break;
506 case OPCODE_FRC: inst = ei_vector1(compiler->code, VE_FRACTION, vpi, inst, src); break;
507 case OPCODE_LG2: inst = ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst, src); break;
508 case OPCODE_LIT: inst = ei_lit(compiler->code, vpi, inst, src); break;
509 case OPCODE_LOG: inst = ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst, src); break;
510 case OPCODE_MAD: inst = ei_mad(compiler->code, vpi, inst, src); break;
511 case OPCODE_MAX: inst = ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst, src); break;
512 case OPCODE_MIN: inst = ei_vector2(compiler->code, VE_MINIMUM, vpi, inst, src); break;
513 case OPCODE_MOV: inst = ei_vector1(compiler->code, VE_ADD, vpi, inst, src); break;
514 case OPCODE_MUL: inst = ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst, src); break;
515 case OPCODE_POW: inst = ei_pow(compiler->code, vpi, inst, src); break;
516 case OPCODE_RCP: inst = ei_math1(compiler->code, ME_RECIP_DX, vpi, inst, src); break;
517 case OPCODE_RSQ: inst = ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst, src); break;
518 case OPCODE_SGE: inst = ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst, src); break;
519 case OPCODE_SLT: inst = ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst, src); break;
520 default:
521 fprintf(stderr, "Unknown opcode %i\n", vpi->Opcode);
522 return GL_FALSE;
523 }
524 }
525
526 compiler->code->length = (inst - compiler->code->body.d);
527 if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) {
528 return GL_FALSE;
529 }
530
531 return GL_TRUE;
532 }
533
534 static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id)
535 {
536 struct prog_instruction *vpi;
537
538 _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2);
539
540 vpi = &prog->Instructions[prog->NumInstructions - 3];
541
542 vpi->Opcode = OPCODE_MOV;
543
544 vpi->DstReg.File = PROGRAM_OUTPUT;
545 vpi->DstReg.Index = VERT_RESULT_HPOS;
546 vpi->DstReg.WriteMask = WRITEMASK_XYZW;
547 vpi->DstReg.CondMask = COND_TR;
548
549 vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
550 vpi->SrcReg[0].Index = temp_index;
551 vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
552
553 ++vpi;
554
555 vpi->Opcode = OPCODE_MOV;
556
557 vpi->DstReg.File = PROGRAM_OUTPUT;
558 vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
559 vpi->DstReg.WriteMask = WRITEMASK_XYZW;
560 vpi->DstReg.CondMask = COND_TR;
561
562 vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
563 vpi->SrcReg[0].Index = temp_index;
564 vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
565
566 ++vpi;
567
568 vpi->Opcode = OPCODE_END;
569 }
570
571 static void pos_as_texcoord(struct gl_program *prog, int tex_id)
572 {
573 struct prog_instruction *vpi;
574 GLuint tempregi = prog->NumTemporaries;
575
576 prog->NumTemporaries++;
577
578 for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
579 if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) {
580 vpi->DstReg.File = PROGRAM_TEMPORARY;
581 vpi->DstReg.Index = tempregi;
582 }
583 }
584
585 insert_wpos(prog, tempregi, tex_id);
586
587 prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
588 }
589
590 /**
591 * The fogcoord attribute is special in that only the first component
592 * is relevant, and the remaining components are always fixed (when read
593 * from by the fragment program) to yield an X001 pattern.
594 *
595 * We need to enforce this either in the vertex program or in the fragment
596 * program, and this code chooses not to enforce it in the vertex program.
597 * This is slightly cheaper, as long as the fragment program does not use
598 * weird swizzles.
599 *
600 * And it seems that usually, weird swizzles are not used, so...
601 *
602 * See also the counterpart rewriting for fragment programs.
603 */
604 static void fog_as_texcoord(struct gl_program *prog, int tex_id)
605 {
606 struct prog_instruction *vpi;
607
608 vpi = prog->Instructions;
609 while (vpi->Opcode != OPCODE_END) {
610 if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) {
611 vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
612 vpi->DstReg.WriteMask = WRITEMASK_X;
613 }
614
615 ++vpi;
616 }
617
618 prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC);
619 prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
620 }
621
622
623 #define ADD_OUTPUT(fp_attr, vp_result) \
624 do { \
625 if ((FpReads & (1 << (fp_attr))) && !(compiler->program->OutputsWritten & (1 << (vp_result)))) { \
626 OutputsAdded |= 1 << (vp_result); \
627 count++; \
628 } \
629 } while (0)
630
631 static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
632 {
633 GLuint OutputsAdded, FpReads;
634 int i, count;
635
636 OutputsAdded = 0;
637 count = 0;
638 FpReads = compiler->state.FpReads;
639
640 ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
641 ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
642
643 for (i = 0; i < 7; ++i) {
644 ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
645 }
646
647 /* Some outputs may be artificially added, to match the inputs of the fragment program.
648 * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by
649 * vertex program are undefined, so just use MOV [vertex_result], CONST[0]
650 */
651 if (count > 0) {
652 struct prog_instruction *inst;
653
654 _mesa_insert_instructions(compiler->program, compiler->program->NumInstructions - 1, count);
655 inst = &compiler->program->Instructions[compiler->program->NumInstructions - 1 - count];
656
657 for (i = 0; i < VERT_RESULT_MAX; ++i) {
658 if (OutputsAdded & (1 << i)) {
659 inst->Opcode = OPCODE_MOV;
660
661 inst->DstReg.File = PROGRAM_OUTPUT;
662 inst->DstReg.Index = i;
663 inst->DstReg.WriteMask = WRITEMASK_XYZW;
664 inst->DstReg.CondMask = COND_TR;
665
666 inst->SrcReg[0].File = PROGRAM_CONSTANT;
667 inst->SrcReg[0].Index = 0;
668 inst->SrcReg[0].Swizzle = SWIZZLE_XYZW;
669
670 ++inst;
671 }
672 }
673
674 compiler->program->OutputsWritten |= OutputsAdded;
675 }
676 }
677
678 #undef ADD_OUTPUT
679
680 static void nqssadceInit(struct nqssadce_state* s)
681 {
682 struct r300_vertex_program_compiler * compiler = s->UserData;
683 GLuint fp_reads;
684
685 fp_reads = compiler->state.FpReads;
686 {
687 if (fp_reads & FRAG_BIT_COL0) {
688 s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW;
689 s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW;
690 }
691
692 if (fp_reads & FRAG_BIT_COL1) {
693 s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW;
694 s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW;
695 }
696 }
697
698 {
699 int i;
700 for (i = 0; i < 8; ++i) {
701 if (fp_reads & FRAG_BIT_TEX(i)) {
702 s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW;
703 }
704 }
705 }
706
707 s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW;
708 if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ))
709 s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X;
710 }
711
712 static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
713 {
714 (void) opcode;
715 (void) reg;
716
717 return GL_TRUE;
718 }
719
720
721
722 GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler, GLcontext * ctx)
723 {
724 GLboolean success;
725
726 if (compiler->state.WPosAttr != FRAG_ATTRIB_MAX) {
727 pos_as_texcoord(compiler->program, compiler->state.WPosAttr - FRAG_ATTRIB_TEX0);
728 }
729
730 if (compiler->state.FogAttr != FRAG_ATTRIB_MAX) {
731 fog_as_texcoord(compiler->program, compiler->state.FogAttr - FRAG_ATTRIB_TEX0);
732 }
733
734 addArtificialOutputs(compiler);
735
736 {
737 struct radeon_program_transformation transformations[] = {
738 { &r300_transform_vertex_alu, 0 },
739 };
740 radeonLocalTransform(compiler->program, 1, transformations);
741 }
742
743 if (compiler->Base.Debug) {
744 fprintf(stderr, "Vertex program after native rewrite:\n");
745 _mesa_print_program(compiler->program);
746 fflush(stdout);
747 }
748
749 {
750 struct radeon_nqssadce_descr nqssadce = {
751 .Init = &nqssadceInit,
752 .IsNativeSwizzle = &swizzleIsNative,
753 .BuildSwizzle = NULL
754 };
755 radeonNqssaDce(compiler->program, &nqssadce, compiler);
756
757 /* We need this step for reusing temporary registers */
758 _mesa_optimize_program(ctx, compiler->program);
759
760 if (compiler->Base.Debug) {
761 fprintf(stderr, "Vertex program after NQSSADCE:\n");
762 _mesa_print_program(compiler->program);
763 fflush(stdout);
764 }
765 }
766
767 assert(compiler->program->NumInstructions);
768 {
769 struct prog_instruction *inst;
770 int max, i, tmp;
771
772 inst = compiler->program->Instructions;
773 max = -1;
774 while (inst->Opcode != OPCODE_END) {
775 tmp = _mesa_num_inst_src_regs(inst->Opcode);
776 for (i = 0; i < tmp; ++i) {
777 if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) {
778 if ((int) inst->SrcReg[i].Index > max) {
779 max = inst->SrcReg[i].Index;
780 }
781 }
782 }
783
784 if (_mesa_num_inst_dst_regs(inst->Opcode)) {
785 if (inst->DstReg.File == PROGRAM_TEMPORARY) {
786 if ((int) inst->DstReg.Index > max) {
787 max = inst->DstReg.Index;
788 }
789 }
790 }
791 ++inst;
792 }
793
794 /* We actually want highest index of used temporary register,
795 * not the number of temporaries used.
796 * These values aren't always the same.
797 */
798 compiler->code->num_temporaries = max + 1;
799 }
800
801 success = translate_vertex_program(compiler);
802
803 compiler->code->InputsRead = compiler->program->InputsRead;
804 compiler->code->OutputsWritten = compiler->program->OutputsWritten;
805
806 return success;
807 }