r300/compiler: New dataflow structures and passes
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / r3xx_vertprog.c
1 /*
2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23 #include "radeon_compiler.h"
24
25 #include <stdio.h>
26
27 #include "../r300_reg.h"
28
29 #include "radeon_dataflow.h"
30 #include "radeon_program_alu.h"
31 #include "radeon_swizzle.h"
32
33
34 /*
35 * Take an already-setup and valid source then swizzle it appropriately to
36 * obtain a constant ZERO or ONE source.
37 */
38 #define __CONST(x, y) \
39 (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \
40 t_swizzle(y), \
41 t_swizzle(y), \
42 t_swizzle(y), \
43 t_swizzle(y), \
44 t_src_class(vpi->SrcReg[x].File), \
45 RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4))
46
47
48 static unsigned long t_dst_mask(unsigned int mask)
49 {
50 /* RC_MASK_* is equivalent to VSF_FLAG_* */
51 return mask & RC_MASK_XYZW;
52 }
53
54 static unsigned long t_dst_class(rc_register_file file)
55 {
56 switch (file) {
57 default:
58 fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
59 /* fall-through */
60 case RC_FILE_TEMPORARY:
61 return PVS_DST_REG_TEMPORARY;
62 case RC_FILE_OUTPUT:
63 return PVS_DST_REG_OUT;
64 case RC_FILE_ADDRESS:
65 return PVS_DST_REG_A0;
66 }
67 }
68
69 static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
70 struct rc_dst_register *dst)
71 {
72 if (dst->File == RC_FILE_OUTPUT)
73 return vp->outputs[dst->Index];
74
75 return dst->Index;
76 }
77
78 static unsigned long t_src_class(rc_register_file file)
79 {
80 switch (file) {
81 default:
82 fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
83 /* fall-through */
84 case RC_FILE_TEMPORARY:
85 return PVS_SRC_REG_TEMPORARY;
86 case RC_FILE_INPUT:
87 return PVS_SRC_REG_INPUT;
88 case RC_FILE_CONSTANT:
89 return PVS_SRC_REG_CONSTANT;
90 }
91 }
92
93 static int t_src_conflict(struct rc_src_register a, struct rc_src_register b)
94 {
95 unsigned long aclass = t_src_class(a.File);
96 unsigned long bclass = t_src_class(b.File);
97
98 if (aclass != bclass)
99 return 0;
100 if (aclass == PVS_SRC_REG_TEMPORARY)
101 return 0;
102
103 if (a.RelAddr || b.RelAddr)
104 return 1;
105 if (a.Index != b.Index)
106 return 1;
107
108 return 0;
109 }
110
111 static inline unsigned long t_swizzle(unsigned int swizzle)
112 {
113 /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
114 return swizzle;
115 }
116
117 static unsigned long t_src_index(struct r300_vertex_program_code *vp,
118 struct rc_src_register *src)
119 {
120 if (src->File == RC_FILE_INPUT) {
121 assert(vp->inputs[src->Index] != -1);
122 return vp->inputs[src->Index];
123 } else {
124 if (src->Index < 0) {
125 fprintf(stderr,
126 "negative offsets for indirect addressing do not work.\n");
127 return 0;
128 }
129 return src->Index;
130 }
131 }
132
133 /* these two functions should probably be merged... */
134
135 static unsigned long t_src(struct r300_vertex_program_code *vp,
136 struct rc_src_register *src)
137 {
138 /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
139 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
140 */
141 return PVS_SRC_OPERAND(t_src_index(vp, src),
142 t_swizzle(GET_SWZ(src->Swizzle, 0)),
143 t_swizzle(GET_SWZ(src->Swizzle, 1)),
144 t_swizzle(GET_SWZ(src->Swizzle, 2)),
145 t_swizzle(GET_SWZ(src->Swizzle, 3)),
146 t_src_class(src->File),
147 src->Negate) | (src->RelAddr << 4);
148 }
149
150 static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
151 struct rc_src_register *src)
152 {
153 /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
154 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
155 */
156 return PVS_SRC_OPERAND(t_src_index(vp, src),
157 t_swizzle(GET_SWZ(src->Swizzle, 0)),
158 t_swizzle(GET_SWZ(src->Swizzle, 0)),
159 t_swizzle(GET_SWZ(src->Swizzle, 0)),
160 t_swizzle(GET_SWZ(src->Swizzle, 0)),
161 t_src_class(src->File),
162 src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
163 (src->RelAddr << 4);
164 }
165
166 static int valid_dst(struct r300_vertex_program_code *vp,
167 struct rc_dst_register *dst)
168 {
169 if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) {
170 return 0;
171 } else if (dst->File == RC_FILE_ADDRESS) {
172 assert(dst->Index == 0);
173 }
174
175 return 1;
176 }
177
178 static void ei_vector1(struct r300_vertex_program_code *vp,
179 unsigned int hw_opcode,
180 struct rc_sub_instruction *vpi,
181 unsigned int * inst)
182 {
183 inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
184 0,
185 0,
186 t_dst_index(vp, &vpi->DstReg),
187 t_dst_mask(vpi->DstReg.WriteMask),
188 t_dst_class(vpi->DstReg.File));
189 inst[1] = t_src(vp, &vpi->SrcReg[0]);
190 inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
191 inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
192 }
193
194 static void ei_vector2(struct r300_vertex_program_code *vp,
195 unsigned int hw_opcode,
196 struct rc_sub_instruction *vpi,
197 unsigned int * inst)
198 {
199 inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
200 0,
201 0,
202 t_dst_index(vp, &vpi->DstReg),
203 t_dst_mask(vpi->DstReg.WriteMask),
204 t_dst_class(vpi->DstReg.File));
205 inst[1] = t_src(vp, &vpi->SrcReg[0]);
206 inst[2] = t_src(vp, &vpi->SrcReg[1]);
207 inst[3] = __CONST(1, RC_SWIZZLE_ZERO);
208 }
209
210 static void ei_math1(struct r300_vertex_program_code *vp,
211 unsigned int hw_opcode,
212 struct rc_sub_instruction *vpi,
213 unsigned int * inst)
214 {
215 inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
216 1,
217 0,
218 t_dst_index(vp, &vpi->DstReg),
219 t_dst_mask(vpi->DstReg.WriteMask),
220 t_dst_class(vpi->DstReg.File));
221 inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
222 inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
223 inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
224 }
225
226 static void ei_lit(struct r300_vertex_program_code *vp,
227 struct rc_sub_instruction *vpi,
228 unsigned int * inst)
229 {
230 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
231
232 inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
233 1,
234 0,
235 t_dst_index(vp, &vpi->DstReg),
236 t_dst_mask(vpi->DstReg.WriteMask),
237 t_dst_class(vpi->DstReg.File));
238 /* NOTE: Users swizzling might not work. */
239 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
240 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
241 PVS_SRC_SELECT_FORCE_0, // Z
242 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
243 t_src_class(vpi->SrcReg[0].File),
244 vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
245 (vpi->SrcReg[0].RelAddr << 4);
246 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
247 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
248 PVS_SRC_SELECT_FORCE_0, // Z
249 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
250 t_src_class(vpi->SrcReg[0].File),
251 vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
252 (vpi->SrcReg[0].RelAddr << 4);
253 inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
254 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
255 PVS_SRC_SELECT_FORCE_0, // Z
256 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
257 t_src_class(vpi->SrcReg[0].File),
258 vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
259 (vpi->SrcReg[0].RelAddr << 4);
260 }
261
262 static void ei_mad(struct r300_vertex_program_code *vp,
263 struct rc_sub_instruction *vpi,
264 unsigned int * inst)
265 {
266 /* Remarks about hardware limitations of MAD
267 * (please preserve this comment, as this information is _NOT_
268 * in the documentation provided by AMD).
269 *
270 * As described in the documentation, MAD with three unique temporary
271 * source registers requires the use of the macro version.
272 *
273 * However (and this is not mentioned in the documentation), apparently
274 * the macro version is _NOT_ a full superset of the normal version.
275 * In particular, the macro version does not always work when relative
276 * addressing is used in the source operands.
277 *
278 * This limitation caused incorrect rendering in Sauerbraten's OpenGL
279 * assembly shader path when using medium quality animations
280 * (i.e. animations with matrix blending instead of quaternion blending).
281 *
282 * Unfortunately, I (nha) have been unable to extract a Piglit regression
283 * test for this issue - for some reason, it is possible to have vertex
284 * programs whose prefix is *exactly* the same as the prefix of the
285 * offending program in Sauerbraten up to the offending instruction
286 * without causing any trouble.
287 *
288 * Bottom line: Only use the macro version only when really necessary;
289 * according to AMD docs, this should improve performance by one clock
290 * as a nice side bonus.
291 */
292 if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY &&
293 vpi->SrcReg[1].File == RC_FILE_TEMPORARY &&
294 vpi->SrcReg[2].File == RC_FILE_TEMPORARY &&
295 vpi->SrcReg[0].Index != vpi->SrcReg[1].Index &&
296 vpi->SrcReg[0].Index != vpi->SrcReg[2].Index &&
297 vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) {
298 inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
299 0,
300 1,
301 t_dst_index(vp, &vpi->DstReg),
302 t_dst_mask(vpi->DstReg.WriteMask),
303 t_dst_class(vpi->DstReg.File));
304 } else {
305 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
306 0,
307 0,
308 t_dst_index(vp, &vpi->DstReg),
309 t_dst_mask(vpi->DstReg.WriteMask),
310 t_dst_class(vpi->DstReg.File));
311 }
312 inst[1] = t_src(vp, &vpi->SrcReg[0]);
313 inst[2] = t_src(vp, &vpi->SrcReg[1]);
314 inst[3] = t_src(vp, &vpi->SrcReg[2]);
315 }
316
317 static void ei_pow(struct r300_vertex_program_code *vp,
318 struct rc_sub_instruction *vpi,
319 unsigned int * inst)
320 {
321 inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
322 1,
323 0,
324 t_dst_index(vp, &vpi->DstReg),
325 t_dst_mask(vpi->DstReg.WriteMask),
326 t_dst_class(vpi->DstReg.File));
327 inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
328 inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
329 inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
330 }
331
332
333 static void translate_vertex_program(struct r300_vertex_program_compiler * compiler)
334 {
335 struct rc_instruction *rci;
336
337 compiler->code->pos_end = 0; /* Not supported yet */
338 compiler->code->length = 0;
339
340 compiler->SetHwInputOutput(compiler);
341
342 for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
343 struct rc_sub_instruction *vpi = &rci->I;
344 unsigned int *inst = compiler->code->body.d + compiler->code->length;
345
346 /* Skip instructions writing to non-existing destination */
347 if (!valid_dst(compiler->code, &vpi->DstReg))
348 continue;
349
350 if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) {
351 rc_error(&compiler->Base, "Vertex program has too many instructions\n");
352 return;
353 }
354
355 switch (vpi->Opcode) {
356 case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
357 case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
358 case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
359 case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
360 case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
361 case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
362 case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
363 case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
364 case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
365 case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
366 case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
367 case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
368 case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
369 case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
370 case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
371 case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
372 case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
373 case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
374 case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
375 case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
376 default:
377 rc_error(&compiler->Base, "Unknown opcode %i\n", vpi->Opcode);
378 return;
379 }
380
381 compiler->code->length += 4;
382
383 if (compiler->Base.Error)
384 return;
385 }
386 }
387
388 struct temporary_allocation {
389 unsigned int Allocated:1;
390 unsigned int HwTemp:15;
391 struct rc_instruction * LastRead;
392 };
393
394 static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
395 {
396 struct rc_instruction *inst;
397 unsigned int num_orig_temps = 0;
398 char hwtemps[VSF_MAX_FRAGMENT_TEMPS];
399 struct temporary_allocation * ta;
400 unsigned int i, j;
401
402 compiler->code->num_temporaries = 0;
403 memset(hwtemps, 0, sizeof(hwtemps));
404
405 /* Pass 1: Count original temporaries and allocate structures */
406 for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
407 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
408
409 for (i = 0; i < opcode->NumSrcRegs; ++i) {
410 if (inst->I.SrcReg[i].File == RC_FILE_TEMPORARY) {
411 if (inst->I.SrcReg[i].Index >= num_orig_temps)
412 num_orig_temps = inst->I.SrcReg[i].Index + 1;
413 }
414 }
415
416 if (opcode->HasDstReg) {
417 if (inst->I.DstReg.File == RC_FILE_TEMPORARY) {
418 if (inst->I.DstReg.Index >= num_orig_temps)
419 num_orig_temps = inst->I.DstReg.Index + 1;
420 }
421 }
422 }
423
424 ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
425 sizeof(struct temporary_allocation) * num_orig_temps);
426 memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
427
428 /* Pass 2: Determine original temporary lifetimes */
429 for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
430 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
431
432 for (i = 0; i < opcode->NumSrcRegs; ++i) {
433 if (inst->I.SrcReg[i].File == RC_FILE_TEMPORARY)
434 ta[inst->I.SrcReg[i].Index].LastRead = inst;
435 }
436 }
437
438 /* Pass 3: Register allocation */
439 for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
440 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
441
442 for (i = 0; i < opcode->NumSrcRegs; ++i) {
443 if (inst->I.SrcReg[i].File == RC_FILE_TEMPORARY) {
444 unsigned int orig = inst->I.SrcReg[i].Index;
445 inst->I.SrcReg[i].Index = ta[orig].HwTemp;
446
447 if (ta[orig].Allocated && inst == ta[orig].LastRead)
448 hwtemps[ta[orig].HwTemp] = 0;
449 }
450 }
451
452 if (opcode->HasDstReg) {
453 if (inst->I.DstReg.File == RC_FILE_TEMPORARY) {
454 unsigned int orig = inst->I.DstReg.Index;
455
456 if (!ta[orig].Allocated) {
457 for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) {
458 if (!hwtemps[j])
459 break;
460 }
461 if (j >= VSF_MAX_FRAGMENT_TEMPS) {
462 fprintf(stderr, "Out of hw temporaries\n");
463 } else {
464 ta[orig].Allocated = 1;
465 ta[orig].HwTemp = j;
466 hwtemps[j] = 1;
467
468 if (j >= compiler->code->num_temporaries)
469 compiler->code->num_temporaries = j + 1;
470 }
471 }
472
473 inst->I.DstReg.Index = ta[orig].HwTemp;
474 }
475 }
476 }
477 }
478
479
480 /**
481 * Vertex engine cannot read two inputs or two constants at the same time.
482 * Introduce intermediate MOVs to temporary registers to account for this.
483 */
484 static int transform_source_conflicts(
485 struct radeon_compiler *c,
486 struct rc_instruction* inst,
487 void* unused)
488 {
489 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);
490
491 if (opcode->NumSrcRegs == 3) {
492 if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2])
493 || t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) {
494 int tmpreg = rc_find_free_temporary(c);
495 struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
496 inst_mov->I.Opcode = RC_OPCODE_MOV;
497 inst_mov->I.DstReg.File = RC_FILE_TEMPORARY;
498 inst_mov->I.DstReg.Index = tmpreg;
499 inst_mov->I.SrcReg[0] = inst->I.SrcReg[2];
500
501 reset_srcreg(&inst->I.SrcReg[2]);
502 inst->I.SrcReg[2].File = RC_FILE_TEMPORARY;
503 inst->I.SrcReg[2].Index = tmpreg;
504 }
505 }
506
507 if (opcode->NumSrcRegs >= 2) {
508 if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) {
509 int tmpreg = rc_find_free_temporary(c);
510 struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
511 inst_mov->I.Opcode = RC_OPCODE_MOV;
512 inst_mov->I.DstReg.File = RC_FILE_TEMPORARY;
513 inst_mov->I.DstReg.Index = tmpreg;
514 inst_mov->I.SrcReg[0] = inst->I.SrcReg[1];
515
516 reset_srcreg(&inst->I.SrcReg[1]);
517 inst->I.SrcReg[1].File = RC_FILE_TEMPORARY;
518 inst->I.SrcReg[1].Index = tmpreg;
519 }
520 }
521
522 return 1;
523 }
524
525 static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
526 {
527 int i;
528
529 for(i = 0; i < 32; ++i) {
530 if ((compiler->RequiredOutputs & (1 << i)) &&
531 !(compiler->Base.Program.OutputsWritten & (1 << i))) {
532 struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev);
533 inst->I.Opcode = RC_OPCODE_MOV;
534
535 inst->I.DstReg.File = RC_FILE_OUTPUT;
536 inst->I.DstReg.Index = i;
537 inst->I.DstReg.WriteMask = RC_MASK_XYZW;
538
539 inst->I.SrcReg[0].File = RC_FILE_CONSTANT;
540 inst->I.SrcReg[0].Index = 0;
541 inst->I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
542
543 compiler->Base.Program.OutputsWritten |= 1 << i;
544 }
545 }
546 }
547
548 static void dataflow_outputs_mark_used(void * userdata, void * data,
549 void (*callback)(void *, unsigned int, unsigned int))
550 {
551 struct r300_vertex_program_compiler * c = userdata;
552 int i;
553
554 for(i = 0; i < 32; ++i) {
555 if (c->RequiredOutputs & (1 << i))
556 callback(data, i, RC_MASK_XYZW);
557 }
558 }
559
560 static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
561 {
562 (void) opcode;
563 (void) reg;
564
565 return 1;
566 }
567
568
569 static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
570 .IsNative = &swizzle_is_native,
571 .Split = 0 /* should never be called */
572 };
573
574
575 void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
576 {
577 compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
578
579 addArtificialOutputs(compiler);
580
581 {
582 struct radeon_program_transformation transformations[] = {
583 { &r300_transform_vertex_alu, 0 },
584 };
585 radeonLocalTransform(&compiler->Base, 1, transformations);
586 }
587
588 if (compiler->Base.Debug) {
589 fprintf(stderr, "Vertex program after native rewrite:\n");
590 rc_print_program(&compiler->Base.Program, 0);
591 fflush(stderr);
592 }
593
594 {
595 /* Note: This pass has to be done seperately from ALU rewrite,
596 * otherwise non-native ALU instructions with source conflits
597 * will not be treated properly.
598 */
599 struct radeon_program_transformation transformations[] = {
600 { &transform_source_conflicts, 0 },
601 };
602 radeonLocalTransform(&compiler->Base, 1, transformations);
603 }
604
605 if (compiler->Base.Debug) {
606 fprintf(stderr, "Vertex program after source conflict resolve:\n");
607 rc_print_program(&compiler->Base.Program, 0);
608 fflush(stderr);
609 }
610
611 rc_dataflow_annotate(&compiler->Base, &dataflow_outputs_mark_used, compiler);
612 rc_dataflow_dealias(&compiler->Base);
613 rc_dataflow_swizzles(&compiler->Base);
614
615 /* This invalidates dataflow annotations and should be replaced
616 * by a future generic register allocation pass. */
617 allocate_temporary_registers(compiler);
618
619 if (compiler->Base.Debug) {
620 fprintf(stderr, "Vertex program after dataflow:\n");
621 rc_print_program(&compiler->Base.Program, 0);
622 fflush(stderr);
623 }
624
625 translate_vertex_program(compiler);
626
627 rc_constants_copy(&compiler->code->constants, &compiler->Base.Program.Constants);
628
629 compiler->code->InputsRead = compiler->Base.Program.InputsRead;
630 compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten;
631
632 if (compiler->Base.Debug) {
633 fprintf(stderr, "Final vertex program code:\n");
634 r300_vertex_program_dump(compiler->code);
635 }
636 }