Merge branch 'softpipe-opt'
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / r3xx_vertprog.c
1 /*
2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23 #include "radeon_compiler.h"
24
25 #include "../r300_reg.h"
26
27 #include "radeon_nqssadce.h"
28 #include "radeon_program.h"
29 #include "radeon_program_alu.h"
30
31 #include "shader/prog_print.h"
32
33
34 /*
35 * Take an already-setup and valid source then swizzle it appropriately to
36 * obtain a constant ZERO or ONE source.
37 */
38 #define __CONST(x, y) \
39 (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \
40 t_swizzle(y), \
41 t_swizzle(y), \
42 t_swizzle(y), \
43 t_swizzle(y), \
44 t_src_class(vpi->SrcReg[x].File), \
45 NEGATE_NONE) | (vpi->SrcReg[x].RelAddr << 4))
46
47
48 static unsigned long t_dst_mask(GLuint mask)
49 {
50 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
51 return mask & WRITEMASK_XYZW;
52 }
53
54 static unsigned long t_dst_class(gl_register_file file)
55 {
56
57 switch (file) {
58 case PROGRAM_TEMPORARY:
59 return PVS_DST_REG_TEMPORARY;
60 case PROGRAM_OUTPUT:
61 return PVS_DST_REG_OUT;
62 case PROGRAM_ADDRESS:
63 return PVS_DST_REG_A0;
64 /*
65 case PROGRAM_INPUT:
66 case PROGRAM_LOCAL_PARAM:
67 case PROGRAM_ENV_PARAM:
68 case PROGRAM_NAMED_PARAM:
69 case PROGRAM_STATE_VAR:
70 case PROGRAM_WRITE_ONLY:
71 case PROGRAM_ADDRESS:
72 */
73 default:
74 fprintf(stderr, "problem in %s", __FUNCTION__);
75 _mesa_exit(-1);
76 return -1;
77 }
78 }
79
80 static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
81 struct prog_dst_register *dst)
82 {
83 if (dst->File == PROGRAM_OUTPUT)
84 return vp->outputs[dst->Index];
85
86 return dst->Index;
87 }
88
89 static unsigned long t_src_class(gl_register_file file)
90 {
91 switch (file) {
92 case PROGRAM_BUILTIN:
93 case PROGRAM_TEMPORARY:
94 return PVS_SRC_REG_TEMPORARY;
95 case PROGRAM_INPUT:
96 return PVS_SRC_REG_INPUT;
97 case PROGRAM_LOCAL_PARAM:
98 case PROGRAM_ENV_PARAM:
99 case PROGRAM_NAMED_PARAM:
100 case PROGRAM_CONSTANT:
101 case PROGRAM_STATE_VAR:
102 return PVS_SRC_REG_CONSTANT;
103 /*
104 case PROGRAM_OUTPUT:
105 case PROGRAM_WRITE_ONLY:
106 case PROGRAM_ADDRESS:
107 */
108 default:
109 fprintf(stderr, "problem in %s", __FUNCTION__);
110 _mesa_exit(-1);
111 return -1;
112 }
113 }
114
115 static GLboolean t_src_conflict(struct prog_src_register a, struct prog_src_register b)
116 {
117 unsigned long aclass = t_src_class(a.File);
118 unsigned long bclass = t_src_class(b.File);
119
120 if (aclass != bclass)
121 return GL_FALSE;
122 if (aclass == PVS_SRC_REG_TEMPORARY)
123 return GL_FALSE;
124
125 if (a.RelAddr || b.RelAddr)
126 return GL_TRUE;
127 if (a.Index != b.Index)
128 return GL_TRUE;
129
130 return GL_FALSE;
131 }
132
133 static INLINE unsigned long t_swizzle(GLubyte swizzle)
134 {
135 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
136 return swizzle;
137 }
138
139 static unsigned long t_src_index(struct r300_vertex_program_code *vp,
140 struct prog_src_register *src)
141 {
142 if (src->File == PROGRAM_INPUT) {
143 assert(vp->inputs[src->Index] != -1);
144 return vp->inputs[src->Index];
145 } else {
146 if (src->Index < 0) {
147 fprintf(stderr,
148 "negative offsets for indirect addressing do not work.\n");
149 return 0;
150 }
151 return src->Index;
152 }
153 }
154
155 /* these two functions should probably be merged... */
156
157 static unsigned long t_src(struct r300_vertex_program_code *vp,
158 struct prog_src_register *src)
159 {
160 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
161 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
162 */
163 return PVS_SRC_OPERAND(t_src_index(vp, src),
164 t_swizzle(GET_SWZ(src->Swizzle, 0)),
165 t_swizzle(GET_SWZ(src->Swizzle, 1)),
166 t_swizzle(GET_SWZ(src->Swizzle, 2)),
167 t_swizzle(GET_SWZ(src->Swizzle, 3)),
168 t_src_class(src->File),
169 src->Negate) | (src->RelAddr << 4);
170 }
171
172 static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
173 struct prog_src_register *src)
174 {
175 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
176 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
177 */
178 return PVS_SRC_OPERAND(t_src_index(vp, src),
179 t_swizzle(GET_SWZ(src->Swizzle, 0)),
180 t_swizzle(GET_SWZ(src->Swizzle, 0)),
181 t_swizzle(GET_SWZ(src->Swizzle, 0)),
182 t_swizzle(GET_SWZ(src->Swizzle, 0)),
183 t_src_class(src->File),
184 src->Negate ? NEGATE_XYZW : NEGATE_NONE) |
185 (src->RelAddr << 4);
186 }
187
188 static GLboolean valid_dst(struct r300_vertex_program_code *vp,
189 struct prog_dst_register *dst)
190 {
191 if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
192 return GL_FALSE;
193 } else if (dst->File == PROGRAM_ADDRESS) {
194 assert(dst->Index == 0);
195 }
196
197 return GL_TRUE;
198 }
199
200 static void ei_vector1(struct r300_vertex_program_code *vp,
201 GLuint hw_opcode,
202 struct prog_instruction *vpi,
203 GLuint * inst)
204 {
205 inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
206 GL_FALSE,
207 GL_FALSE,
208 t_dst_index(vp, &vpi->DstReg),
209 t_dst_mask(vpi->DstReg.WriteMask),
210 t_dst_class(vpi->DstReg.File));
211 inst[1] = t_src(vp, &vpi->SrcReg[0]);
212 inst[2] = __CONST(0, SWIZZLE_ZERO);
213 inst[3] = __CONST(0, SWIZZLE_ZERO);
214 }
215
216 static void ei_vector2(struct r300_vertex_program_code *vp,
217 GLuint hw_opcode,
218 struct prog_instruction *vpi,
219 GLuint * inst)
220 {
221 inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
222 GL_FALSE,
223 GL_FALSE,
224 t_dst_index(vp, &vpi->DstReg),
225 t_dst_mask(vpi->DstReg.WriteMask),
226 t_dst_class(vpi->DstReg.File));
227 inst[1] = t_src(vp, &vpi->SrcReg[0]);
228 inst[2] = t_src(vp, &vpi->SrcReg[1]);
229 inst[3] = __CONST(1, SWIZZLE_ZERO);
230 }
231
232 static void ei_math1(struct r300_vertex_program_code *vp,
233 GLuint hw_opcode,
234 struct prog_instruction *vpi,
235 GLuint * inst)
236 {
237 inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
238 GL_TRUE,
239 GL_FALSE,
240 t_dst_index(vp, &vpi->DstReg),
241 t_dst_mask(vpi->DstReg.WriteMask),
242 t_dst_class(vpi->DstReg.File));
243 inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
244 inst[2] = __CONST(0, SWIZZLE_ZERO);
245 inst[3] = __CONST(0, SWIZZLE_ZERO);
246 }
247
248 static void ei_lit(struct r300_vertex_program_code *vp,
249 struct prog_instruction *vpi,
250 GLuint * inst)
251 {
252 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
253
254 inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
255 GL_TRUE,
256 GL_FALSE,
257 t_dst_index(vp, &vpi->DstReg),
258 t_dst_mask(vpi->DstReg.WriteMask),
259 t_dst_class(vpi->DstReg.File));
260 /* NOTE: Users swizzling might not work. */
261 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
262 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
263 PVS_SRC_SELECT_FORCE_0, // Z
264 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
265 t_src_class(vpi->SrcReg[0].File),
266 vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
267 (vpi->SrcReg[0].RelAddr << 4);
268 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
269 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
270 PVS_SRC_SELECT_FORCE_0, // Z
271 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
272 t_src_class(vpi->SrcReg[0].File),
273 vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
274 (vpi->SrcReg[0].RelAddr << 4);
275 inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
276 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
277 PVS_SRC_SELECT_FORCE_0, // Z
278 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
279 t_src_class(vpi->SrcReg[0].File),
280 vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
281 (vpi->SrcReg[0].RelAddr << 4);
282 }
283
284 static void ei_mad(struct r300_vertex_program_code *vp,
285 struct prog_instruction *vpi,
286 GLuint * inst)
287 {
288 /* Remarks about hardware limitations of MAD
289 * (please preserve this comment, as this information is _NOT_
290 * in the documentation provided by AMD).
291 *
292 * As described in the documentation, MAD with three unique temporary
293 * source registers requires the use of the macro version.
294 *
295 * However (and this is not mentioned in the documentation), apparently
296 * the macro version is _NOT_ a full superset of the normal version.
297 * In particular, the macro version does not always work when relative
298 * addressing is used in the source operands.
299 *
300 * This limitation caused incorrect rendering in Sauerbraten's OpenGL
301 * assembly shader path when using medium quality animations
302 * (i.e. animations with matrix blending instead of quaternion blending).
303 *
304 * Unfortunately, I (nha) have been unable to extract a Piglit regression
305 * test for this issue - for some reason, it is possible to have vertex
306 * programs whose prefix is *exactly* the same as the prefix of the
307 * offending program in Sauerbraten up to the offending instruction
308 * without causing any trouble.
309 *
310 * Bottom line: Only use the macro version only when really necessary;
311 * according to AMD docs, this should improve performance by one clock
312 * as a nice side bonus.
313 */
314 if (vpi->SrcReg[0].File == PROGRAM_TEMPORARY &&
315 vpi->SrcReg[1].File == PROGRAM_TEMPORARY &&
316 vpi->SrcReg[2].File == PROGRAM_TEMPORARY &&
317 vpi->SrcReg[0].Index != vpi->SrcReg[1].Index &&
318 vpi->SrcReg[0].Index != vpi->SrcReg[2].Index &&
319 vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) {
320 inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
321 GL_FALSE,
322 GL_TRUE,
323 t_dst_index(vp, &vpi->DstReg),
324 t_dst_mask(vpi->DstReg.WriteMask),
325 t_dst_class(vpi->DstReg.File));
326 } else {
327 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
328 GL_FALSE,
329 GL_FALSE,
330 t_dst_index(vp, &vpi->DstReg),
331 t_dst_mask(vpi->DstReg.WriteMask),
332 t_dst_class(vpi->DstReg.File));
333 }
334 inst[1] = t_src(vp, &vpi->SrcReg[0]);
335 inst[2] = t_src(vp, &vpi->SrcReg[1]);
336 inst[3] = t_src(vp, &vpi->SrcReg[2]);
337 }
338
339 static void ei_pow(struct r300_vertex_program_code *vp,
340 struct prog_instruction *vpi,
341 GLuint * inst)
342 {
343 inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
344 GL_TRUE,
345 GL_FALSE,
346 t_dst_index(vp, &vpi->DstReg),
347 t_dst_mask(vpi->DstReg.WriteMask),
348 t_dst_class(vpi->DstReg.File));
349 inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
350 inst[2] = __CONST(0, SWIZZLE_ZERO);
351 inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
352 }
353
354
355 static void translate_vertex_program(struct r300_vertex_program_compiler * compiler)
356 {
357 struct rc_instruction *rci;
358
359 compiler->code->pos_end = 0; /* Not supported yet */
360 compiler->code->length = 0;
361
362 compiler->SetHwInputOutput(compiler);
363
364 for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
365 struct prog_instruction *vpi = &rci->I;
366 GLuint *inst = compiler->code->body.d + compiler->code->length;
367
368 /* Skip instructions writing to non-existing destination */
369 if (!valid_dst(compiler->code, &vpi->DstReg))
370 continue;
371
372 if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) {
373 rc_error(&compiler->Base, "Vertex program has too many instructions\n");
374 return;
375 }
376
377 switch (vpi->Opcode) {
378 case OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
379 case OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
380 case OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
381 case OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
382 case OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
383 case OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
384 case OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
385 case OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
386 case OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
387 case OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
388 case OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
389 case OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
390 case OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
391 case OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
392 case OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
393 case OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
394 case OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
395 case OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
396 case OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
397 case OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
398 default:
399 rc_error(&compiler->Base, "Unknown opcode %i\n", vpi->Opcode);
400 return;
401 }
402
403 compiler->code->length += 4;
404
405 if (compiler->Base.Error)
406 return;
407 }
408 }
409
410 struct temporary_allocation {
411 GLuint Allocated:1;
412 GLuint HwTemp:15;
413 struct rc_instruction * LastRead;
414 };
415
416 static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
417 {
418 struct rc_instruction *inst;
419 GLuint num_orig_temps = 0;
420 GLboolean hwtemps[VSF_MAX_FRAGMENT_TEMPS];
421 struct temporary_allocation * ta;
422 GLuint i, j;
423
424 compiler->code->num_temporaries = 0;
425 memset(hwtemps, 0, sizeof(hwtemps));
426
427 /* Pass 1: Count original temporaries and allocate structures */
428 for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
429 GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
430 GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
431
432 for (i = 0; i < numsrcs; ++i) {
433 if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) {
434 if (inst->I.SrcReg[i].Index >= num_orig_temps)
435 num_orig_temps = inst->I.SrcReg[i].Index + 1;
436 }
437 }
438
439 if (numdsts) {
440 if (inst->I.DstReg.File == PROGRAM_TEMPORARY) {
441 if (inst->I.DstReg.Index >= num_orig_temps)
442 num_orig_temps = inst->I.DstReg.Index + 1;
443 }
444 }
445 }
446
447 ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
448 sizeof(struct temporary_allocation) * num_orig_temps);
449 memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
450
451 /* Pass 2: Determine original temporary lifetimes */
452 for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
453 GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
454
455 for (i = 0; i < numsrcs; ++i) {
456 if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY)
457 ta[inst->I.SrcReg[i].Index].LastRead = inst;
458 }
459 }
460
461 /* Pass 3: Register allocation */
462 for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
463 GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
464 GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
465
466 for (i = 0; i < numsrcs; ++i) {
467 if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) {
468 GLuint orig = inst->I.SrcReg[i].Index;
469 inst->I.SrcReg[i].Index = ta[orig].HwTemp;
470
471 if (ta[orig].Allocated && inst == ta[orig].LastRead)
472 hwtemps[ta[orig].HwTemp] = GL_FALSE;
473 }
474 }
475
476 if (numdsts) {
477 if (inst->I.DstReg.File == PROGRAM_TEMPORARY) {
478 GLuint orig = inst->I.DstReg.Index;
479
480 if (!ta[orig].Allocated) {
481 for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) {
482 if (!hwtemps[j])
483 break;
484 }
485 if (j >= VSF_MAX_FRAGMENT_TEMPS) {
486 fprintf(stderr, "Out of hw temporaries\n");
487 } else {
488 ta[orig].Allocated = GL_TRUE;
489 ta[orig].HwTemp = j;
490 hwtemps[j] = GL_TRUE;
491
492 if (j >= compiler->code->num_temporaries)
493 compiler->code->num_temporaries = j + 1;
494 }
495 }
496
497 inst->I.DstReg.Index = ta[orig].HwTemp;
498 }
499 }
500 }
501 }
502
503
504 /**
505 * Vertex engine cannot read two inputs or two constants at the same time.
506 * Introduce intermediate MOVs to temporary registers to account for this.
507 */
508 static GLboolean transform_source_conflicts(
509 struct radeon_compiler *c,
510 struct rc_instruction* inst,
511 void* unused)
512 {
513 GLuint num_operands = _mesa_num_inst_src_regs(inst->I.Opcode);
514
515 if (num_operands == 3) {
516 if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2])
517 || t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) {
518 int tmpreg = rc_find_free_temporary(c);
519 struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
520 inst_mov->I.Opcode = OPCODE_MOV;
521 inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
522 inst_mov->I.DstReg.Index = tmpreg;
523 inst_mov->I.SrcReg[0] = inst->I.SrcReg[2];
524
525 reset_srcreg(&inst->I.SrcReg[2]);
526 inst->I.SrcReg[2].File = PROGRAM_TEMPORARY;
527 inst->I.SrcReg[2].Index = tmpreg;
528 }
529 }
530
531 if (num_operands >= 2) {
532 if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) {
533 int tmpreg = rc_find_free_temporary(c);
534 struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
535 inst_mov->I.Opcode = OPCODE_MOV;
536 inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
537 inst_mov->I.DstReg.Index = tmpreg;
538 inst_mov->I.SrcReg[0] = inst->I.SrcReg[1];
539
540 reset_srcreg(&inst->I.SrcReg[1]);
541 inst->I.SrcReg[1].File = PROGRAM_TEMPORARY;
542 inst->I.SrcReg[1].Index = tmpreg;
543 }
544 }
545
546 return GL_TRUE;
547 }
548
549 static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
550 {
551 int i;
552
553 for(i = 0; i < 32; ++i) {
554 if ((compiler->RequiredOutputs & (1 << i)) &&
555 !(compiler->Base.Program.OutputsWritten & (1 << i))) {
556 struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev);
557 inst->I.Opcode = OPCODE_MOV;
558
559 inst->I.DstReg.File = PROGRAM_OUTPUT;
560 inst->I.DstReg.Index = i;
561 inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
562
563 inst->I.SrcReg[0].File = PROGRAM_CONSTANT;
564 inst->I.SrcReg[0].Index = 0;
565 inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW;
566
567 compiler->Base.Program.OutputsWritten |= 1 << i;
568 }
569 }
570 }
571
572 static void nqssadceInit(struct nqssadce_state* s)
573 {
574 struct r300_vertex_program_compiler * compiler = s->UserData;
575 int i;
576
577 for(i = 0; i < VERT_RESULT_MAX; ++i) {
578 if (compiler->RequiredOutputs & (1 << i))
579 s->Outputs[i].Sourced = WRITEMASK_XYZW;
580 }
581 }
582
583 static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
584 {
585 (void) opcode;
586 (void) reg;
587
588 return GL_TRUE;
589 }
590
591
592
593 void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
594 {
595 addArtificialOutputs(compiler);
596
597 {
598 struct radeon_program_transformation transformations[] = {
599 { &r300_transform_vertex_alu, 0 },
600 };
601 radeonLocalTransform(&compiler->Base, 1, transformations);
602 }
603
604 if (compiler->Base.Debug) {
605 fprintf(stderr, "Vertex program after native rewrite:\n");
606 rc_print_program(&compiler->Base.Program);
607 fflush(stderr);
608 }
609
610 {
611 /* Note: This pass has to be done seperately from ALU rewrite,
612 * otherwise non-native ALU instructions with source conflits
613 * will not be treated properly.
614 */
615 struct radeon_program_transformation transformations[] = {
616 { &transform_source_conflicts, 0 },
617 };
618 radeonLocalTransform(&compiler->Base, 1, transformations);
619 }
620
621 if (compiler->Base.Debug) {
622 fprintf(stderr, "Vertex program after source conflict resolve:\n");
623 rc_print_program(&compiler->Base.Program);
624 fflush(stderr);
625 }
626
627 {
628 struct radeon_nqssadce_descr nqssadce = {
629 .Init = &nqssadceInit,
630 .IsNativeSwizzle = &swizzleIsNative,
631 .BuildSwizzle = NULL
632 };
633 radeonNqssaDce(&compiler->Base, &nqssadce, compiler);
634
635 /* We need this step for reusing temporary registers */
636 allocate_temporary_registers(compiler);
637
638 if (compiler->Base.Debug) {
639 fprintf(stderr, "Vertex program after NQSSADCE:\n");
640 rc_print_program(&compiler->Base.Program);
641 fflush(stderr);
642 }
643 }
644
645 translate_vertex_program(compiler);
646
647 rc_constants_copy(&compiler->code->constants, &compiler->Base.Program.Constants);
648
649 compiler->code->InputsRead = compiler->Base.Program.InputsRead;
650 compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten;
651
652 if (compiler->Base.Debug) {
653 fprintf(stderr, "Final vertex program code:\n");
654 r300_vertex_program_dump(compiler->code);
655 }
656 }