r300/compiler: Introduce control flow instructions and refactor dataflow
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / r3xx_vertprog.c
1 /*
2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23 #include "radeon_compiler.h"
24
25 #include <stdio.h>
26
27 #include "../r300_reg.h"
28
29 #include "radeon_dataflow.h"
30 #include "radeon_program_alu.h"
31 #include "radeon_swizzle.h"
32
33
34 /*
35 * Take an already-setup and valid source then swizzle it appropriately to
36 * obtain a constant ZERO or ONE source.
37 */
38 #define __CONST(x, y) \
39 (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \
40 t_swizzle(y), \
41 t_swizzle(y), \
42 t_swizzle(y), \
43 t_swizzle(y), \
44 t_src_class(vpi->SrcReg[x].File), \
45 RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4))
46
47
48 static unsigned long t_dst_mask(unsigned int mask)
49 {
50 /* RC_MASK_* is equivalent to VSF_FLAG_* */
51 return mask & RC_MASK_XYZW;
52 }
53
54 static unsigned long t_dst_class(rc_register_file file)
55 {
56 switch (file) {
57 default:
58 fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
59 /* fall-through */
60 case RC_FILE_TEMPORARY:
61 return PVS_DST_REG_TEMPORARY;
62 case RC_FILE_OUTPUT:
63 return PVS_DST_REG_OUT;
64 case RC_FILE_ADDRESS:
65 return PVS_DST_REG_A0;
66 }
67 }
68
69 static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
70 struct rc_dst_register *dst)
71 {
72 if (dst->File == RC_FILE_OUTPUT)
73 return vp->outputs[dst->Index];
74
75 return dst->Index;
76 }
77
78 static unsigned long t_src_class(rc_register_file file)
79 {
80 switch (file) {
81 default:
82 fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
83 /* fall-through */
84 case RC_FILE_NONE:
85 case RC_FILE_TEMPORARY:
86 return PVS_SRC_REG_TEMPORARY;
87 case RC_FILE_INPUT:
88 return PVS_SRC_REG_INPUT;
89 case RC_FILE_CONSTANT:
90 return PVS_SRC_REG_CONSTANT;
91 }
92 }
93
94 static int t_src_conflict(struct rc_src_register a, struct rc_src_register b)
95 {
96 unsigned long aclass = t_src_class(a.File);
97 unsigned long bclass = t_src_class(b.File);
98
99 if (aclass != bclass)
100 return 0;
101 if (aclass == PVS_SRC_REG_TEMPORARY)
102 return 0;
103
104 if (a.RelAddr || b.RelAddr)
105 return 1;
106 if (a.Index != b.Index)
107 return 1;
108
109 return 0;
110 }
111
112 static inline unsigned long t_swizzle(unsigned int swizzle)
113 {
114 /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
115 return swizzle;
116 }
117
118 static unsigned long t_src_index(struct r300_vertex_program_code *vp,
119 struct rc_src_register *src)
120 {
121 if (src->File == RC_FILE_INPUT) {
122 assert(vp->inputs[src->Index] != -1);
123 return vp->inputs[src->Index];
124 } else {
125 if (src->Index < 0) {
126 fprintf(stderr,
127 "negative offsets for indirect addressing do not work.\n");
128 return 0;
129 }
130 return src->Index;
131 }
132 }
133
134 /* these two functions should probably be merged... */
135
136 static unsigned long t_src(struct r300_vertex_program_code *vp,
137 struct rc_src_register *src)
138 {
139 /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
140 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
141 */
142 return PVS_SRC_OPERAND(t_src_index(vp, src),
143 t_swizzle(GET_SWZ(src->Swizzle, 0)),
144 t_swizzle(GET_SWZ(src->Swizzle, 1)),
145 t_swizzle(GET_SWZ(src->Swizzle, 2)),
146 t_swizzle(GET_SWZ(src->Swizzle, 3)),
147 t_src_class(src->File),
148 src->Negate) | (src->RelAddr << 4);
149 }
150
151 static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
152 struct rc_src_register *src)
153 {
154 /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
155 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
156 */
157 return PVS_SRC_OPERAND(t_src_index(vp, src),
158 t_swizzle(GET_SWZ(src->Swizzle, 0)),
159 t_swizzle(GET_SWZ(src->Swizzle, 0)),
160 t_swizzle(GET_SWZ(src->Swizzle, 0)),
161 t_swizzle(GET_SWZ(src->Swizzle, 0)),
162 t_src_class(src->File),
163 src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
164 (src->RelAddr << 4);
165 }
166
167 static int valid_dst(struct r300_vertex_program_code *vp,
168 struct rc_dst_register *dst)
169 {
170 if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) {
171 return 0;
172 } else if (dst->File == RC_FILE_ADDRESS) {
173 assert(dst->Index == 0);
174 }
175
176 return 1;
177 }
178
179 static void ei_vector1(struct r300_vertex_program_code *vp,
180 unsigned int hw_opcode,
181 struct rc_sub_instruction *vpi,
182 unsigned int * inst)
183 {
184 inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
185 0,
186 0,
187 t_dst_index(vp, &vpi->DstReg),
188 t_dst_mask(vpi->DstReg.WriteMask),
189 t_dst_class(vpi->DstReg.File));
190 inst[1] = t_src(vp, &vpi->SrcReg[0]);
191 inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
192 inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
193 }
194
195 static void ei_vector2(struct r300_vertex_program_code *vp,
196 unsigned int hw_opcode,
197 struct rc_sub_instruction *vpi,
198 unsigned int * inst)
199 {
200 inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
201 0,
202 0,
203 t_dst_index(vp, &vpi->DstReg),
204 t_dst_mask(vpi->DstReg.WriteMask),
205 t_dst_class(vpi->DstReg.File));
206 inst[1] = t_src(vp, &vpi->SrcReg[0]);
207 inst[2] = t_src(vp, &vpi->SrcReg[1]);
208 inst[3] = __CONST(1, RC_SWIZZLE_ZERO);
209 }
210
211 static void ei_math1(struct r300_vertex_program_code *vp,
212 unsigned int hw_opcode,
213 struct rc_sub_instruction *vpi,
214 unsigned int * inst)
215 {
216 inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
217 1,
218 0,
219 t_dst_index(vp, &vpi->DstReg),
220 t_dst_mask(vpi->DstReg.WriteMask),
221 t_dst_class(vpi->DstReg.File));
222 inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
223 inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
224 inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
225 }
226
227 static void ei_lit(struct r300_vertex_program_code *vp,
228 struct rc_sub_instruction *vpi,
229 unsigned int * inst)
230 {
231 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
232
233 inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
234 1,
235 0,
236 t_dst_index(vp, &vpi->DstReg),
237 t_dst_mask(vpi->DstReg.WriteMask),
238 t_dst_class(vpi->DstReg.File));
239 /* NOTE: Users swizzling might not work. */
240 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
241 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
242 PVS_SRC_SELECT_FORCE_0, // Z
243 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
244 t_src_class(vpi->SrcReg[0].File),
245 vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
246 (vpi->SrcReg[0].RelAddr << 4);
247 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
248 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
249 PVS_SRC_SELECT_FORCE_0, // Z
250 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
251 t_src_class(vpi->SrcReg[0].File),
252 vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
253 (vpi->SrcReg[0].RelAddr << 4);
254 inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
255 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
256 PVS_SRC_SELECT_FORCE_0, // Z
257 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
258 t_src_class(vpi->SrcReg[0].File),
259 vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
260 (vpi->SrcReg[0].RelAddr << 4);
261 }
262
263 static void ei_mad(struct r300_vertex_program_code *vp,
264 struct rc_sub_instruction *vpi,
265 unsigned int * inst)
266 {
267 /* Remarks about hardware limitations of MAD
268 * (please preserve this comment, as this information is _NOT_
269 * in the documentation provided by AMD).
270 *
271 * As described in the documentation, MAD with three unique temporary
272 * source registers requires the use of the macro version.
273 *
274 * However (and this is not mentioned in the documentation), apparently
275 * the macro version is _NOT_ a full superset of the normal version.
276 * In particular, the macro version does not always work when relative
277 * addressing is used in the source operands.
278 *
279 * This limitation caused incorrect rendering in Sauerbraten's OpenGL
280 * assembly shader path when using medium quality animations
281 * (i.e. animations with matrix blending instead of quaternion blending).
282 *
283 * Unfortunately, I (nha) have been unable to extract a Piglit regression
284 * test for this issue - for some reason, it is possible to have vertex
285 * programs whose prefix is *exactly* the same as the prefix of the
286 * offending program in Sauerbraten up to the offending instruction
287 * without causing any trouble.
288 *
289 * Bottom line: Only use the macro version only when really necessary;
290 * according to AMD docs, this should improve performance by one clock
291 * as a nice side bonus.
292 */
293 if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY &&
294 vpi->SrcReg[1].File == RC_FILE_TEMPORARY &&
295 vpi->SrcReg[2].File == RC_FILE_TEMPORARY &&
296 vpi->SrcReg[0].Index != vpi->SrcReg[1].Index &&
297 vpi->SrcReg[0].Index != vpi->SrcReg[2].Index &&
298 vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) {
299 inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
300 0,
301 1,
302 t_dst_index(vp, &vpi->DstReg),
303 t_dst_mask(vpi->DstReg.WriteMask),
304 t_dst_class(vpi->DstReg.File));
305 } else {
306 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
307 0,
308 0,
309 t_dst_index(vp, &vpi->DstReg),
310 t_dst_mask(vpi->DstReg.WriteMask),
311 t_dst_class(vpi->DstReg.File));
312 }
313 inst[1] = t_src(vp, &vpi->SrcReg[0]);
314 inst[2] = t_src(vp, &vpi->SrcReg[1]);
315 inst[3] = t_src(vp, &vpi->SrcReg[2]);
316 }
317
318 static void ei_pow(struct r300_vertex_program_code *vp,
319 struct rc_sub_instruction *vpi,
320 unsigned int * inst)
321 {
322 inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
323 1,
324 0,
325 t_dst_index(vp, &vpi->DstReg),
326 t_dst_mask(vpi->DstReg.WriteMask),
327 t_dst_class(vpi->DstReg.File));
328 inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
329 inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
330 inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
331 }
332
333
334 static void translate_vertex_program(struct r300_vertex_program_compiler * compiler)
335 {
336 struct rc_instruction *rci;
337
338 compiler->code->pos_end = 0; /* Not supported yet */
339 compiler->code->length = 0;
340
341 compiler->SetHwInputOutput(compiler);
342
343 for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
344 struct rc_sub_instruction *vpi = &rci->I;
345 unsigned int *inst = compiler->code->body.d + compiler->code->length;
346
347 /* Skip instructions writing to non-existing destination */
348 if (!valid_dst(compiler->code, &vpi->DstReg))
349 continue;
350
351 if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) {
352 rc_error(&compiler->Base, "Vertex program has too many instructions\n");
353 return;
354 }
355
356 switch (vpi->Opcode) {
357 case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
358 case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
359 case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
360 case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
361 case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
362 case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
363 case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
364 case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
365 case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
366 case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
367 case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
368 case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
369 case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
370 case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
371 case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
372 case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
373 case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
374 case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
375 case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
376 case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
377 default:
378 rc_error(&compiler->Base, "Unknown opcode %i\n", vpi->Opcode);
379 return;
380 }
381
382 compiler->code->length += 4;
383
384 if (compiler->Base.Error)
385 return;
386 }
387 }
388
389 struct temporary_allocation {
390 unsigned int Allocated:1;
391 unsigned int HwTemp:15;
392 struct rc_instruction * LastRead;
393 };
394
395 static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
396 {
397 struct rc_instruction *inst;
398 unsigned int num_orig_temps = 0;
399 char hwtemps[VSF_MAX_FRAGMENT_TEMPS];
400 struct temporary_allocation * ta;
401 unsigned int i, j;
402
403 compiler->code->num_temporaries = 0;
404 memset(hwtemps, 0, sizeof(hwtemps));
405
406 /* Pass 1: Count original temporaries and allocate structures */
407 for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
408 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
409
410 for (i = 0; i < opcode->NumSrcRegs; ++i) {
411 if (inst->I.SrcReg[i].File == RC_FILE_TEMPORARY) {
412 if (inst->I.SrcReg[i].Index >= num_orig_temps)
413 num_orig_temps = inst->I.SrcReg[i].Index + 1;
414 }
415 }
416
417 if (opcode->HasDstReg) {
418 if (inst->I.DstReg.File == RC_FILE_TEMPORARY) {
419 if (inst->I.DstReg.Index >= num_orig_temps)
420 num_orig_temps = inst->I.DstReg.Index + 1;
421 }
422 }
423 }
424
425 ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
426 sizeof(struct temporary_allocation) * num_orig_temps);
427 memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
428
429 /* Pass 2: Determine original temporary lifetimes */
430 for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
431 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
432
433 for (i = 0; i < opcode->NumSrcRegs; ++i) {
434 if (inst->I.SrcReg[i].File == RC_FILE_TEMPORARY)
435 ta[inst->I.SrcReg[i].Index].LastRead = inst;
436 }
437 }
438
439 /* Pass 3: Register allocation */
440 for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
441 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
442
443 for (i = 0; i < opcode->NumSrcRegs; ++i) {
444 if (inst->I.SrcReg[i].File == RC_FILE_TEMPORARY) {
445 unsigned int orig = inst->I.SrcReg[i].Index;
446 inst->I.SrcReg[i].Index = ta[orig].HwTemp;
447
448 if (ta[orig].Allocated && inst == ta[orig].LastRead)
449 hwtemps[ta[orig].HwTemp] = 0;
450 }
451 }
452
453 if (opcode->HasDstReg) {
454 if (inst->I.DstReg.File == RC_FILE_TEMPORARY) {
455 unsigned int orig = inst->I.DstReg.Index;
456
457 if (!ta[orig].Allocated) {
458 for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) {
459 if (!hwtemps[j])
460 break;
461 }
462 if (j >= VSF_MAX_FRAGMENT_TEMPS) {
463 fprintf(stderr, "Out of hw temporaries\n");
464 } else {
465 ta[orig].Allocated = 1;
466 ta[orig].HwTemp = j;
467 hwtemps[j] = 1;
468
469 if (j >= compiler->code->num_temporaries)
470 compiler->code->num_temporaries = j + 1;
471 }
472 }
473
474 inst->I.DstReg.Index = ta[orig].HwTemp;
475 }
476 }
477 }
478 }
479
480
481 /**
482 * Vertex engine cannot read two inputs or two constants at the same time.
483 * Introduce intermediate MOVs to temporary registers to account for this.
484 */
485 static int transform_source_conflicts(
486 struct radeon_compiler *c,
487 struct rc_instruction* inst,
488 void* unused)
489 {
490 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
491
492 if (opcode->NumSrcRegs == 3) {
493 if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2])
494 || t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) {
495 int tmpreg = rc_find_free_temporary(c);
496 struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
497 inst_mov->I.Opcode = RC_OPCODE_MOV;
498 inst_mov->I.DstReg.File = RC_FILE_TEMPORARY;
499 inst_mov->I.DstReg.Index = tmpreg;
500 inst_mov->I.SrcReg[0] = inst->I.SrcReg[2];
501
502 reset_srcreg(&inst->I.SrcReg[2]);
503 inst->I.SrcReg[2].File = RC_FILE_TEMPORARY;
504 inst->I.SrcReg[2].Index = tmpreg;
505 }
506 }
507
508 if (opcode->NumSrcRegs >= 2) {
509 if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) {
510 int tmpreg = rc_find_free_temporary(c);
511 struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
512 inst_mov->I.Opcode = RC_OPCODE_MOV;
513 inst_mov->I.DstReg.File = RC_FILE_TEMPORARY;
514 inst_mov->I.DstReg.Index = tmpreg;
515 inst_mov->I.SrcReg[0] = inst->I.SrcReg[1];
516
517 reset_srcreg(&inst->I.SrcReg[1]);
518 inst->I.SrcReg[1].File = RC_FILE_TEMPORARY;
519 inst->I.SrcReg[1].Index = tmpreg;
520 }
521 }
522
523 return 1;
524 }
525
526 static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
527 {
528 int i;
529
530 for(i = 0; i < 32; ++i) {
531 if ((compiler->RequiredOutputs & (1 << i)) &&
532 !(compiler->Base.Program.OutputsWritten & (1 << i))) {
533 struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev);
534 inst->I.Opcode = RC_OPCODE_MOV;
535
536 inst->I.DstReg.File = RC_FILE_OUTPUT;
537 inst->I.DstReg.Index = i;
538 inst->I.DstReg.WriteMask = RC_MASK_XYZW;
539
540 inst->I.SrcReg[0].File = RC_FILE_CONSTANT;
541 inst->I.SrcReg[0].Index = 0;
542 inst->I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
543
544 compiler->Base.Program.OutputsWritten |= 1 << i;
545 }
546 }
547 }
548
549 static void dataflow_outputs_mark_used(void * userdata, void * data,
550 void (*callback)(void *, unsigned int, unsigned int))
551 {
552 struct r300_vertex_program_compiler * c = userdata;
553 int i;
554
555 for(i = 0; i < 32; ++i) {
556 if (c->RequiredOutputs & (1 << i))
557 callback(data, i, RC_MASK_XYZW);
558 }
559 }
560
561 static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
562 {
563 (void) opcode;
564 (void) reg;
565
566 return 1;
567 }
568
569
570 static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
571 .IsNative = &swizzle_is_native,
572 .Split = 0 /* should never be called */
573 };
574
575
576 void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
577 {
578 compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
579
580 addArtificialOutputs(compiler);
581
582 {
583 struct radeon_program_transformation transformations[] = {
584 { &r300_transform_vertex_alu, 0 },
585 };
586 radeonLocalTransform(&compiler->Base, 1, transformations);
587 }
588
589 if (compiler->Base.Debug) {
590 fprintf(stderr, "Vertex program after native rewrite:\n");
591 rc_print_program(&compiler->Base.Program);
592 fflush(stderr);
593 }
594
595 {
596 /* Note: This pass has to be done seperately from ALU rewrite,
597 * otherwise non-native ALU instructions with source conflits
598 * will not be treated properly.
599 */
600 struct radeon_program_transformation transformations[] = {
601 { &transform_source_conflicts, 0 },
602 };
603 radeonLocalTransform(&compiler->Base, 1, transformations);
604 }
605
606 if (compiler->Base.Debug) {
607 fprintf(stderr, "Vertex program after source conflict resolve:\n");
608 rc_print_program(&compiler->Base.Program);
609 fflush(stderr);
610 }
611
612 rc_dataflow_deadcode(&compiler->Base, &dataflow_outputs_mark_used, compiler);
613
614 if (compiler->Base.Debug) {
615 fprintf(stderr, "Vertex program after deadcode:\n");
616 rc_print_program(&compiler->Base.Program);
617 fflush(stderr);
618 }
619
620 rc_dataflow_swizzles(&compiler->Base);
621
622 allocate_temporary_registers(compiler);
623
624 if (compiler->Base.Debug) {
625 fprintf(stderr, "Vertex program after dataflow:\n");
626 rc_print_program(&compiler->Base.Program);
627 fflush(stderr);
628 }
629
630 translate_vertex_program(compiler);
631
632 rc_constants_copy(&compiler->code->constants, &compiler->Base.Program.Constants);
633
634 compiler->code->InputsRead = compiler->Base.Program.InputsRead;
635 compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten;
636
637 if (compiler->Base.Debug) {
638 fprintf(stderr, "Final vertex program code:\n");
639 r300_vertex_program_dump(compiler->code);
640 }
641 }