r300/vertprog: Refactor wpos rewrite using rc_program
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / r3xx_vertprog.c
1 /*
2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23 #include "radeon_compiler.h"
24
25 #include "../r300_reg.h"
26
27 #include "radeon_nqssadce.h"
28 #include "radeon_program.h"
29 #include "radeon_program_alu.h"
30
31 #include "shader/prog_print.h"
32
33
34 /*
35 * Take an already-setup and valid source then swizzle it appropriately to
36 * obtain a constant ZERO or ONE source.
37 */
38 #define __CONST(x, y) \
39 (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \
40 t_swizzle(y), \
41 t_swizzle(y), \
42 t_swizzle(y), \
43 t_swizzle(y), \
44 t_src_class(vpi->SrcReg[x].File), \
45 NEGATE_NONE) | (vpi->SrcReg[x].RelAddr << 4))
46
47
48 static unsigned long t_dst_mask(GLuint mask)
49 {
50 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
51 return mask & WRITEMASK_XYZW;
52 }
53
54 static unsigned long t_dst_class(gl_register_file file)
55 {
56
57 switch (file) {
58 case PROGRAM_TEMPORARY:
59 return PVS_DST_REG_TEMPORARY;
60 case PROGRAM_OUTPUT:
61 return PVS_DST_REG_OUT;
62 case PROGRAM_ADDRESS:
63 return PVS_DST_REG_A0;
64 /*
65 case PROGRAM_INPUT:
66 case PROGRAM_LOCAL_PARAM:
67 case PROGRAM_ENV_PARAM:
68 case PROGRAM_NAMED_PARAM:
69 case PROGRAM_STATE_VAR:
70 case PROGRAM_WRITE_ONLY:
71 case PROGRAM_ADDRESS:
72 */
73 default:
74 fprintf(stderr, "problem in %s", __FUNCTION__);
75 _mesa_exit(-1);
76 return -1;
77 }
78 }
79
80 static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
81 struct prog_dst_register *dst)
82 {
83 if (dst->File == PROGRAM_OUTPUT)
84 return vp->outputs[dst->Index];
85
86 return dst->Index;
87 }
88
89 static unsigned long t_src_class(gl_register_file file)
90 {
91 switch (file) {
92 case PROGRAM_TEMPORARY:
93 return PVS_SRC_REG_TEMPORARY;
94 case PROGRAM_INPUT:
95 return PVS_SRC_REG_INPUT;
96 case PROGRAM_LOCAL_PARAM:
97 case PROGRAM_ENV_PARAM:
98 case PROGRAM_NAMED_PARAM:
99 case PROGRAM_CONSTANT:
100 case PROGRAM_STATE_VAR:
101 return PVS_SRC_REG_CONSTANT;
102 /*
103 case PROGRAM_OUTPUT:
104 case PROGRAM_WRITE_ONLY:
105 case PROGRAM_ADDRESS:
106 */
107 default:
108 fprintf(stderr, "problem in %s", __FUNCTION__);
109 _mesa_exit(-1);
110 return -1;
111 }
112 }
113
114 static GLboolean t_src_conflict(struct prog_src_register a, struct prog_src_register b)
115 {
116 unsigned long aclass = t_src_class(a.File);
117 unsigned long bclass = t_src_class(b.File);
118
119 if (aclass != bclass)
120 return GL_FALSE;
121 if (aclass == PVS_SRC_REG_TEMPORARY)
122 return GL_FALSE;
123
124 if (a.RelAddr || b.RelAddr)
125 return GL_TRUE;
126 if (a.Index != b.Index)
127 return GL_TRUE;
128
129 return GL_FALSE;
130 }
131
132 static INLINE unsigned long t_swizzle(GLubyte swizzle)
133 {
134 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
135 return swizzle;
136 }
137
138 static unsigned long t_src_index(struct r300_vertex_program_code *vp,
139 struct prog_src_register *src)
140 {
141 if (src->File == PROGRAM_INPUT) {
142 assert(vp->inputs[src->Index] != -1);
143 return vp->inputs[src->Index];
144 } else {
145 if (src->Index < 0) {
146 fprintf(stderr,
147 "negative offsets for indirect addressing do not work.\n");
148 return 0;
149 }
150 return src->Index;
151 }
152 }
153
154 /* these two functions should probably be merged... */
155
156 static unsigned long t_src(struct r300_vertex_program_code *vp,
157 struct prog_src_register *src)
158 {
159 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
160 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
161 */
162 return PVS_SRC_OPERAND(t_src_index(vp, src),
163 t_swizzle(GET_SWZ(src->Swizzle, 0)),
164 t_swizzle(GET_SWZ(src->Swizzle, 1)),
165 t_swizzle(GET_SWZ(src->Swizzle, 2)),
166 t_swizzle(GET_SWZ(src->Swizzle, 3)),
167 t_src_class(src->File),
168 src->Negate) | (src->RelAddr << 4);
169 }
170
171 static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
172 struct prog_src_register *src)
173 {
174 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
175 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
176 */
177 return PVS_SRC_OPERAND(t_src_index(vp, src),
178 t_swizzle(GET_SWZ(src->Swizzle, 0)),
179 t_swizzle(GET_SWZ(src->Swizzle, 0)),
180 t_swizzle(GET_SWZ(src->Swizzle, 0)),
181 t_swizzle(GET_SWZ(src->Swizzle, 0)),
182 t_src_class(src->File),
183 src->Negate ? NEGATE_XYZW : NEGATE_NONE) |
184 (src->RelAddr << 4);
185 }
186
187 static GLboolean valid_dst(struct r300_vertex_program_code *vp,
188 struct prog_dst_register *dst)
189 {
190 if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
191 return GL_FALSE;
192 } else if (dst->File == PROGRAM_ADDRESS) {
193 assert(dst->Index == 0);
194 }
195
196 return GL_TRUE;
197 }
198
199 static void ei_vector1(struct r300_vertex_program_code *vp,
200 GLuint hw_opcode,
201 struct prog_instruction *vpi,
202 GLuint * inst)
203 {
204 inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
205 GL_FALSE,
206 GL_FALSE,
207 t_dst_index(vp, &vpi->DstReg),
208 t_dst_mask(vpi->DstReg.WriteMask),
209 t_dst_class(vpi->DstReg.File));
210 inst[1] = t_src(vp, &vpi->SrcReg[0]);
211 inst[2] = __CONST(0, SWIZZLE_ZERO);
212 inst[3] = __CONST(0, SWIZZLE_ZERO);
213 }
214
215 static void ei_vector2(struct r300_vertex_program_code *vp,
216 GLuint hw_opcode,
217 struct prog_instruction *vpi,
218 GLuint * inst)
219 {
220 inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
221 GL_FALSE,
222 GL_FALSE,
223 t_dst_index(vp, &vpi->DstReg),
224 t_dst_mask(vpi->DstReg.WriteMask),
225 t_dst_class(vpi->DstReg.File));
226 inst[1] = t_src(vp, &vpi->SrcReg[0]);
227 inst[2] = t_src(vp, &vpi->SrcReg[1]);
228 inst[3] = __CONST(1, SWIZZLE_ZERO);
229 }
230
231 static void ei_math1(struct r300_vertex_program_code *vp,
232 GLuint hw_opcode,
233 struct prog_instruction *vpi,
234 GLuint * inst)
235 {
236 inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
237 GL_TRUE,
238 GL_FALSE,
239 t_dst_index(vp, &vpi->DstReg),
240 t_dst_mask(vpi->DstReg.WriteMask),
241 t_dst_class(vpi->DstReg.File));
242 inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
243 inst[2] = __CONST(0, SWIZZLE_ZERO);
244 inst[3] = __CONST(0, SWIZZLE_ZERO);
245 }
246
247 static void ei_lit(struct r300_vertex_program_code *vp,
248 struct prog_instruction *vpi,
249 GLuint * inst)
250 {
251 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
252
253 inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
254 GL_TRUE,
255 GL_FALSE,
256 t_dst_index(vp, &vpi->DstReg),
257 t_dst_mask(vpi->DstReg.WriteMask),
258 t_dst_class(vpi->DstReg.File));
259 /* NOTE: Users swizzling might not work. */
260 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
261 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
262 PVS_SRC_SELECT_FORCE_0, // Z
263 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
264 t_src_class(vpi->SrcReg[0].File),
265 vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
266 (vpi->SrcReg[0].RelAddr << 4);
267 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
268 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
269 PVS_SRC_SELECT_FORCE_0, // Z
270 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
271 t_src_class(vpi->SrcReg[0].File),
272 vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
273 (vpi->SrcReg[0].RelAddr << 4);
274 inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
275 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
276 PVS_SRC_SELECT_FORCE_0, // Z
277 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
278 t_src_class(vpi->SrcReg[0].File),
279 vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
280 (vpi->SrcReg[0].RelAddr << 4);
281 }
282
283 static void ei_mad(struct r300_vertex_program_code *vp,
284 struct prog_instruction *vpi,
285 GLuint * inst)
286 {
287 inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
288 GL_FALSE,
289 GL_TRUE,
290 t_dst_index(vp, &vpi->DstReg),
291 t_dst_mask(vpi->DstReg.WriteMask),
292 t_dst_class(vpi->DstReg.File));
293 inst[1] = t_src(vp, &vpi->SrcReg[0]);
294 inst[2] = t_src(vp, &vpi->SrcReg[1]);
295 inst[3] = t_src(vp, &vpi->SrcReg[2]);
296 }
297
298 static void ei_pow(struct r300_vertex_program_code *vp,
299 struct prog_instruction *vpi,
300 GLuint * inst)
301 {
302 inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
303 GL_TRUE,
304 GL_FALSE,
305 t_dst_index(vp, &vpi->DstReg),
306 t_dst_mask(vpi->DstReg.WriteMask),
307 t_dst_class(vpi->DstReg.File));
308 inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
309 inst[2] = __CONST(0, SWIZZLE_ZERO);
310 inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
311 }
312
313 static void t_inputs_outputs(struct r300_vertex_program_compiler * c)
314 {
315 int i;
316 int cur_reg;
317 GLuint OutputsWritten, InputsRead;
318
319 OutputsWritten = c->Base.Program.OutputsWritten;
320 InputsRead = c->Base.Program.InputsRead;
321
322 cur_reg = -1;
323 for (i = 0; i < VERT_ATTRIB_MAX; i++) {
324 if (InputsRead & (1 << i))
325 c->code->inputs[i] = ++cur_reg;
326 else
327 c->code->inputs[i] = -1;
328 }
329
330 cur_reg = 0;
331 for (i = 0; i < VERT_RESULT_MAX; i++)
332 c->code->outputs[i] = -1;
333
334 assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
335
336 if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
337 c->code->outputs[VERT_RESULT_HPOS] = cur_reg++;
338 }
339
340 if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
341 c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++;
342 }
343
344 /* If we're writing back facing colors we need to send
345 * four colors to make front/back face colors selection work.
346 * If the vertex program doesn't write all 4 colors, lets
347 * pretend it does by skipping output index reg so the colors
348 * get written into appropriate output vectors.
349 */
350 if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
351 c->code->outputs[VERT_RESULT_COL0] = cur_reg++;
352 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
353 OutputsWritten & (1 << VERT_RESULT_BFC1)) {
354 cur_reg++;
355 }
356
357 if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
358 c->code->outputs[VERT_RESULT_COL1] = cur_reg++;
359 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
360 OutputsWritten & (1 << VERT_RESULT_BFC1)) {
361 cur_reg++;
362 }
363
364 if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
365 c->code->outputs[VERT_RESULT_BFC0] = cur_reg++;
366 } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
367 cur_reg++;
368 }
369
370 if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
371 c->code->outputs[VERT_RESULT_BFC1] = cur_reg++;
372 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
373 cur_reg++;
374 }
375
376 for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
377 if (OutputsWritten & (1 << i)) {
378 c->code->outputs[i] = cur_reg++;
379 }
380 }
381
382 if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
383 c->code->outputs[VERT_RESULT_FOGC] = cur_reg++;
384 }
385 }
386
387 static void translate_vertex_program(struct r300_vertex_program_compiler * compiler)
388 {
389 struct rc_instruction *rci;
390
391 compiler->code->pos_end = 0; /* Not supported yet */
392 compiler->code->length = 0;
393
394 t_inputs_outputs(compiler);
395
396 for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
397 struct prog_instruction *vpi = &rci->I;
398 GLuint *inst = compiler->code->body.d + compiler->code->length;
399
400 /* Skip instructions writing to non-existing destination */
401 if (!valid_dst(compiler->code, &vpi->DstReg))
402 continue;
403
404 if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) {
405 rc_error(&compiler->Base, "Vertex program has too many instructions\n");
406 return;
407 }
408
409 switch (vpi->Opcode) {
410 case OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
411 case OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
412 case OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
413 case OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
414 case OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
415 case OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
416 case OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
417 case OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
418 case OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
419 case OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
420 case OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
421 case OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
422 case OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
423 case OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
424 case OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
425 case OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
426 case OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
427 case OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
428 case OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
429 case OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
430 default:
431 rc_error(&compiler->Base, "Unknown opcode %i\n", vpi->Opcode);
432 return;
433 }
434
435 compiler->code->length += 4;
436
437 if (compiler->Base.Error)
438 return;
439 }
440 }
441
442 struct temporary_allocation {
443 GLuint Allocated:1;
444 GLuint HwTemp:15;
445 struct rc_instruction * LastRead;
446 };
447
448 static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
449 {
450 struct rc_instruction *inst;
451 GLuint num_orig_temps = 0;
452 GLboolean hwtemps[VSF_MAX_FRAGMENT_TEMPS];
453 struct temporary_allocation * ta;
454 GLuint i, j;
455
456 compiler->code->num_temporaries = 0;
457 memset(hwtemps, 0, sizeof(hwtemps));
458
459 /* Pass 1: Count original temporaries and allocate structures */
460 for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
461 GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
462 GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
463
464 for (i = 0; i < numsrcs; ++i) {
465 if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) {
466 if (inst->I.SrcReg[i].Index >= num_orig_temps)
467 num_orig_temps = inst->I.SrcReg[i].Index + 1;
468 }
469 }
470
471 if (numdsts) {
472 if (inst->I.DstReg.File == PROGRAM_TEMPORARY) {
473 if (inst->I.DstReg.Index >= num_orig_temps)
474 num_orig_temps = inst->I.DstReg.Index + 1;
475 }
476 }
477 }
478
479 ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
480 sizeof(struct temporary_allocation) * num_orig_temps);
481 memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
482
483 /* Pass 2: Determine original temporary lifetimes */
484 for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
485 GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
486
487 for (i = 0; i < numsrcs; ++i) {
488 if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY)
489 ta[inst->I.SrcReg[i].Index].LastRead = inst;
490 }
491 }
492
493 /* Pass 3: Register allocation */
494 for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
495 GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
496 GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
497
498 for (i = 0; i < numsrcs; ++i) {
499 if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) {
500 GLuint orig = inst->I.SrcReg[i].Index;
501 inst->I.SrcReg[i].Index = ta[orig].HwTemp;
502
503 if (ta[orig].Allocated && inst == ta[orig].LastRead)
504 hwtemps[ta[orig].HwTemp] = GL_FALSE;
505 }
506 }
507
508 if (numdsts) {
509 if (inst->I.DstReg.File == PROGRAM_TEMPORARY) {
510 GLuint orig = inst->I.DstReg.Index;
511
512 if (!ta[orig].Allocated) {
513 for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) {
514 if (!hwtemps[j])
515 break;
516 }
517 if (j >= VSF_MAX_FRAGMENT_TEMPS) {
518 fprintf(stderr, "Out of hw temporaries\n");
519 } else {
520 ta[orig].Allocated = GL_TRUE;
521 ta[orig].HwTemp = j;
522 hwtemps[j] = GL_TRUE;
523
524 if (j >= compiler->code->num_temporaries)
525 compiler->code->num_temporaries = j + 1;
526 }
527 }
528
529 inst->I.DstReg.Index = ta[orig].HwTemp;
530 }
531 }
532 }
533 }
534
535
536 /**
537 * Vertex engine cannot read two inputs or two constants at the same time.
538 * Introduce intermediate MOVs to temporary registers to account for this.
539 */
540 static GLboolean transform_source_conflicts(
541 struct radeon_compiler *c,
542 struct rc_instruction* inst,
543 void* unused)
544 {
545 GLuint num_operands = _mesa_num_inst_src_regs(inst->I.Opcode);
546
547 if (num_operands == 3) {
548 if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2])
549 || t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) {
550 int tmpreg = rc_find_free_temporary(c);
551 struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
552 inst_mov->I.Opcode = OPCODE_MOV;
553 inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
554 inst_mov->I.DstReg.Index = tmpreg;
555 inst_mov->I.SrcReg[0] = inst->I.SrcReg[2];
556
557 reset_srcreg(&inst->I.SrcReg[2]);
558 inst->I.SrcReg[2].File = PROGRAM_TEMPORARY;
559 inst->I.SrcReg[2].Index = tmpreg;
560 }
561 }
562
563 if (num_operands >= 2) {
564 if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) {
565 int tmpreg = rc_find_free_temporary(c);
566 struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
567 inst_mov->I.Opcode = OPCODE_MOV;
568 inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
569 inst_mov->I.DstReg.Index = tmpreg;
570 inst_mov->I.SrcReg[0] = inst->I.SrcReg[1];
571
572 reset_srcreg(&inst->I.SrcReg[1]);
573 inst->I.SrcReg[1].File = PROGRAM_TEMPORARY;
574 inst->I.SrcReg[1].Index = tmpreg;
575 }
576 }
577
578 return GL_TRUE;
579 }
580
581 static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
582 {
583 int i;
584
585 for(i = 0; i < 32; ++i) {
586 if ((compiler->RequiredOutputs & (1 << i)) &&
587 !(compiler->Base.Program.OutputsWritten & (1 << i))) {
588 struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev);
589 inst->I.Opcode = OPCODE_MOV;
590
591 inst->I.DstReg.File = PROGRAM_OUTPUT;
592 inst->I.DstReg.Index = i;
593 inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
594
595 inst->I.SrcReg[0].File = PROGRAM_CONSTANT;
596 inst->I.SrcReg[0].Index = 0;
597 inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW;
598
599 compiler->Base.Program.OutputsWritten |= 1 << i;
600 }
601 }
602 }
603
604 static void nqssadceInit(struct nqssadce_state* s)
605 {
606 struct r300_vertex_program_compiler * compiler = s->UserData;
607 int i;
608
609 for(i = 0; i < VERT_RESULT_MAX; ++i) {
610 if (compiler->RequiredOutputs & (1 << i)) {
611 if (i != VERT_RESULT_PSIZ)
612 s->Outputs[i].Sourced = WRITEMASK_XYZW;
613 else
614 s->Outputs[i].Sourced = WRITEMASK_X; /* ugly hack! */
615 }
616 }
617 }
618
619 static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
620 {
621 (void) opcode;
622 (void) reg;
623
624 return GL_TRUE;
625 }
626
627
628
629 void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
630 {
631 rc_mesa_to_rc_program(&compiler->Base, compiler->program);
632 compiler->program = 0;
633
634 if (compiler->state.WPosAttr != FRAG_ATTRIB_MAX) {
635 rc_copy_output(&compiler->Base,
636 VERT_RESULT_HPOS,
637 compiler->state.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0);
638 }
639
640 if (compiler->state.FogAttr != FRAG_ATTRIB_MAX) {
641 rc_move_output(&compiler->Base,
642 VERT_RESULT_FOGC,
643 compiler->state.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X);
644 }
645
646 addArtificialOutputs(compiler);
647
648 {
649 struct radeon_program_transformation transformations[] = {
650 { &r300_transform_vertex_alu, 0 },
651 };
652 radeonLocalTransform(&compiler->Base, 1, transformations);
653 }
654
655 if (compiler->Base.Debug) {
656 fprintf(stderr, "Vertex program after native rewrite:\n");
657 rc_print_program(&compiler->Base.Program);
658 fflush(stdout);
659 }
660
661 {
662 /* Note: This pass has to be done seperately from ALU rewrite,
663 * otherwise non-native ALU instructions with source conflits
664 * will not be treated properly.
665 */
666 struct radeon_program_transformation transformations[] = {
667 { &transform_source_conflicts, 0 },
668 };
669 radeonLocalTransform(&compiler->Base, 1, transformations);
670 }
671
672 if (compiler->Base.Debug) {
673 fprintf(stderr, "Vertex program after source conflict resolve:\n");
674 rc_print_program(&compiler->Base.Program);
675 fflush(stdout);
676 }
677
678 {
679 struct radeon_nqssadce_descr nqssadce = {
680 .Init = &nqssadceInit,
681 .IsNativeSwizzle = &swizzleIsNative,
682 .BuildSwizzle = NULL
683 };
684 radeonNqssaDce(&compiler->Base, &nqssadce, compiler);
685
686 /* We need this step for reusing temporary registers */
687 allocate_temporary_registers(compiler);
688
689 if (compiler->Base.Debug) {
690 fprintf(stderr, "Vertex program after NQSSADCE:\n");
691 rc_print_program(&compiler->Base.Program);
692 fflush(stdout);
693 }
694 }
695
696 translate_vertex_program(compiler);
697
698 rc_constants_copy(&compiler->code->constants, &compiler->Base.Program.Constants);
699
700 compiler->code->InputsRead = compiler->Base.Program.InputsRead;
701 compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten;
702 }