r300/compiler: terminate vertex shader compilation immediately after an error
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / r3xx_vertprog.c
1 /*
2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23 #include "radeon_compiler.h"
24
25 #include <stdio.h>
26
27 #include "../r300_reg.h"
28
29 #include "radeon_dataflow.h"
30 #include "radeon_program_alu.h"
31 #include "radeon_swizzle.h"
32 #include "radeon_emulate_branches.h"
33 #include "radeon_emulate_loops.h"
34
35 struct loop {
36 int BgnLoop;
37
38 };
39
40 /*
41 * Take an already-setup and valid source then swizzle it appropriately to
42 * obtain a constant ZERO or ONE source.
43 */
44 #define __CONST(x, y) \
45 (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \
46 t_swizzle(y), \
47 t_swizzle(y), \
48 t_swizzle(y), \
49 t_swizzle(y), \
50 t_src_class(vpi->SrcReg[x].File), \
51 RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4))
52
53
54 static unsigned long t_dst_mask(unsigned int mask)
55 {
56 /* RC_MASK_* is equivalent to VSF_FLAG_* */
57 return mask & RC_MASK_XYZW;
58 }
59
60 static unsigned long t_dst_class(rc_register_file file)
61 {
62 switch (file) {
63 default:
64 fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
65 /* fall-through */
66 case RC_FILE_TEMPORARY:
67 return PVS_DST_REG_TEMPORARY;
68 case RC_FILE_OUTPUT:
69 return PVS_DST_REG_OUT;
70 case RC_FILE_ADDRESS:
71 return PVS_DST_REG_A0;
72 }
73 }
74
75 static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
76 struct rc_dst_register *dst)
77 {
78 if (dst->File == RC_FILE_OUTPUT)
79 return vp->outputs[dst->Index];
80
81 return dst->Index;
82 }
83
84 static unsigned long t_src_class(rc_register_file file)
85 {
86 switch (file) {
87 default:
88 fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
89 /* fall-through */
90 case RC_FILE_NONE:
91 case RC_FILE_TEMPORARY:
92 return PVS_SRC_REG_TEMPORARY;
93 case RC_FILE_INPUT:
94 return PVS_SRC_REG_INPUT;
95 case RC_FILE_CONSTANT:
96 return PVS_SRC_REG_CONSTANT;
97 }
98 }
99
100 static int t_src_conflict(struct rc_src_register a, struct rc_src_register b)
101 {
102 unsigned long aclass = t_src_class(a.File);
103 unsigned long bclass = t_src_class(b.File);
104
105 if (aclass != bclass)
106 return 0;
107 if (aclass == PVS_SRC_REG_TEMPORARY)
108 return 0;
109
110 if (a.RelAddr || b.RelAddr)
111 return 1;
112 if (a.Index != b.Index)
113 return 1;
114
115 return 0;
116 }
117
118 static inline unsigned long t_swizzle(unsigned int swizzle)
119 {
120 /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
121 return swizzle;
122 }
123
124 static unsigned long t_src_index(struct r300_vertex_program_code *vp,
125 struct rc_src_register *src)
126 {
127 if (src->File == RC_FILE_INPUT) {
128 assert(vp->inputs[src->Index] != -1);
129 return vp->inputs[src->Index];
130 } else {
131 if (src->Index < 0) {
132 fprintf(stderr,
133 "negative offsets for indirect addressing do not work.\n");
134 return 0;
135 }
136 return src->Index;
137 }
138 }
139
140 /* these two functions should probably be merged... */
141
142 static unsigned long t_src(struct r300_vertex_program_code *vp,
143 struct rc_src_register *src)
144 {
145 /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
146 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
147 */
148 return PVS_SRC_OPERAND(t_src_index(vp, src),
149 t_swizzle(GET_SWZ(src->Swizzle, 0)),
150 t_swizzle(GET_SWZ(src->Swizzle, 1)),
151 t_swizzle(GET_SWZ(src->Swizzle, 2)),
152 t_swizzle(GET_SWZ(src->Swizzle, 3)),
153 t_src_class(src->File),
154 src->Negate) |
155 (src->RelAddr << 4) | (src->Abs << 3);
156 }
157
158 static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
159 struct rc_src_register *src)
160 {
161 /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
162 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
163 */
164 return PVS_SRC_OPERAND(t_src_index(vp, src),
165 t_swizzle(GET_SWZ(src->Swizzle, 0)),
166 t_swizzle(GET_SWZ(src->Swizzle, 0)),
167 t_swizzle(GET_SWZ(src->Swizzle, 0)),
168 t_swizzle(GET_SWZ(src->Swizzle, 0)),
169 t_src_class(src->File),
170 src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
171 (src->RelAddr << 4) | (src->Abs << 3);
172 }
173
174 static int valid_dst(struct r300_vertex_program_code *vp,
175 struct rc_dst_register *dst)
176 {
177 if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) {
178 return 0;
179 } else if (dst->File == RC_FILE_ADDRESS) {
180 assert(dst->Index == 0);
181 }
182
183 return 1;
184 }
185
186 static void ei_vector1(struct r300_vertex_program_code *vp,
187 unsigned int hw_opcode,
188 struct rc_sub_instruction *vpi,
189 unsigned int * inst)
190 {
191 inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
192 0,
193 0,
194 t_dst_index(vp, &vpi->DstReg),
195 t_dst_mask(vpi->DstReg.WriteMask),
196 t_dst_class(vpi->DstReg.File));
197 inst[1] = t_src(vp, &vpi->SrcReg[0]);
198 inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
199 inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
200 }
201
202 static void ei_vector2(struct r300_vertex_program_code *vp,
203 unsigned int hw_opcode,
204 struct rc_sub_instruction *vpi,
205 unsigned int * inst)
206 {
207 inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
208 0,
209 0,
210 t_dst_index(vp, &vpi->DstReg),
211 t_dst_mask(vpi->DstReg.WriteMask),
212 t_dst_class(vpi->DstReg.File));
213 inst[1] = t_src(vp, &vpi->SrcReg[0]);
214 inst[2] = t_src(vp, &vpi->SrcReg[1]);
215 inst[3] = __CONST(1, RC_SWIZZLE_ZERO);
216 }
217
218 static void ei_math1(struct r300_vertex_program_code *vp,
219 unsigned int hw_opcode,
220 struct rc_sub_instruction *vpi,
221 unsigned int * inst)
222 {
223 inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
224 1,
225 0,
226 t_dst_index(vp, &vpi->DstReg),
227 t_dst_mask(vpi->DstReg.WriteMask),
228 t_dst_class(vpi->DstReg.File));
229 inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
230 inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
231 inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
232 }
233
234 static void ei_lit(struct r300_vertex_program_code *vp,
235 struct rc_sub_instruction *vpi,
236 unsigned int * inst)
237 {
238 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
239
240 inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
241 1,
242 0,
243 t_dst_index(vp, &vpi->DstReg),
244 t_dst_mask(vpi->DstReg.WriteMask),
245 t_dst_class(vpi->DstReg.File));
246 /* NOTE: Users swizzling might not work. */
247 inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
248 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
249 PVS_SRC_SELECT_FORCE_0, // Z
250 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
251 t_src_class(vpi->SrcReg[0].File),
252 vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
253 (vpi->SrcReg[0].RelAddr << 4);
254 inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
255 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
256 PVS_SRC_SELECT_FORCE_0, // Z
257 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
258 t_src_class(vpi->SrcReg[0].File),
259 vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
260 (vpi->SrcReg[0].RelAddr << 4);
261 inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
262 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
263 PVS_SRC_SELECT_FORCE_0, // Z
264 t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
265 t_src_class(vpi->SrcReg[0].File),
266 vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
267 (vpi->SrcReg[0].RelAddr << 4);
268 }
269
270 static void ei_mad(struct r300_vertex_program_code *vp,
271 struct rc_sub_instruction *vpi,
272 unsigned int * inst)
273 {
274 /* Remarks about hardware limitations of MAD
275 * (please preserve this comment, as this information is _NOT_
276 * in the documentation provided by AMD).
277 *
278 * As described in the documentation, MAD with three unique temporary
279 * source registers requires the use of the macro version.
280 *
281 * However (and this is not mentioned in the documentation), apparently
282 * the macro version is _NOT_ a full superset of the normal version.
283 * In particular, the macro version does not always work when relative
284 * addressing is used in the source operands.
285 *
286 * This limitation caused incorrect rendering in Sauerbraten's OpenGL
287 * assembly shader path when using medium quality animations
288 * (i.e. animations with matrix blending instead of quaternion blending).
289 *
290 * Unfortunately, I (nha) have been unable to extract a Piglit regression
291 * test for this issue - for some reason, it is possible to have vertex
292 * programs whose prefix is *exactly* the same as the prefix of the
293 * offending program in Sauerbraten up to the offending instruction
294 * without causing any trouble.
295 *
296 * Bottom line: Only use the macro version only when really necessary;
297 * according to AMD docs, this should improve performance by one clock
298 * as a nice side bonus.
299 */
300 if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY &&
301 vpi->SrcReg[1].File == RC_FILE_TEMPORARY &&
302 vpi->SrcReg[2].File == RC_FILE_TEMPORARY &&
303 vpi->SrcReg[0].Index != vpi->SrcReg[1].Index &&
304 vpi->SrcReg[0].Index != vpi->SrcReg[2].Index &&
305 vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) {
306 inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
307 0,
308 1,
309 t_dst_index(vp, &vpi->DstReg),
310 t_dst_mask(vpi->DstReg.WriteMask),
311 t_dst_class(vpi->DstReg.File));
312 } else {
313 inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
314 0,
315 0,
316 t_dst_index(vp, &vpi->DstReg),
317 t_dst_mask(vpi->DstReg.WriteMask),
318 t_dst_class(vpi->DstReg.File));
319 }
320 inst[1] = t_src(vp, &vpi->SrcReg[0]);
321 inst[2] = t_src(vp, &vpi->SrcReg[1]);
322 inst[3] = t_src(vp, &vpi->SrcReg[2]);
323 }
324
325 static void ei_pow(struct r300_vertex_program_code *vp,
326 struct rc_sub_instruction *vpi,
327 unsigned int * inst)
328 {
329 inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
330 1,
331 0,
332 t_dst_index(vp, &vpi->DstReg),
333 t_dst_mask(vpi->DstReg.WriteMask),
334 t_dst_class(vpi->DstReg.File));
335 inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
336 inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
337 inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
338 }
339
340 static void mark_write(void * userdata, struct rc_instruction * inst,
341 rc_register_file file, unsigned int index, unsigned int mask)
342 {
343 unsigned int * writemasks = userdata;
344
345 if (file != RC_FILE_TEMPORARY)
346 return;
347
348 if (index >= R300_VS_MAX_TEMPS)
349 return;
350
351 writemasks[index] |= mask;
352 }
353
354 static unsigned long t_pred_src(struct r300_vertex_program_compiler * compiler)
355 {
356 return PVS_SRC_OPERAND(compiler->PredicateIndex,
357 t_swizzle(RC_SWIZZLE_ZERO),
358 t_swizzle(RC_SWIZZLE_ZERO),
359 t_swizzle(RC_SWIZZLE_ZERO),
360 t_swizzle(RC_SWIZZLE_W),
361 t_src_class(RC_FILE_TEMPORARY),
362 0);
363 }
364
365 static unsigned long t_pred_dst(struct r300_vertex_program_compiler * compiler,
366 unsigned int hw_opcode, int is_math)
367 {
368 return PVS_OP_DST_OPERAND(hw_opcode,
369 is_math,
370 0,
371 compiler->PredicateIndex,
372 RC_MASK_W,
373 t_dst_class(RC_FILE_TEMPORARY));
374
375 }
376
377 static void ei_if(struct r300_vertex_program_compiler * compiler,
378 struct rc_instruction *rci,
379 unsigned int * inst,
380 unsigned int branch_depth)
381 {
382 unsigned int predicate_opcode;
383 int is_math = 0;
384
385 if (!compiler->Base.is_r500) {
386 rc_error(&compiler->Base,"Opcode IF not supported\n");
387 return;
388 }
389
390 /* Reserve a temporary to use as our predicate stack counter, if we
391 * don't already have one. */
392 if (!compiler->PredicateMask) {
393 unsigned int writemasks[R300_VS_MAX_TEMPS];
394 memset(writemasks, 0, sizeof(writemasks));
395 struct rc_instruction * inst;
396 unsigned int i;
397 for(inst = compiler->Base.Program.Instructions.Next;
398 inst != &compiler->Base.Program.Instructions;
399 inst = inst->Next) {
400 rc_for_all_writes_mask(inst, mark_write, writemasks);
401 }
402 for(i = 0; i < R300_VS_MAX_TEMPS; i++) {
403 unsigned int mask = ~writemasks[i] & RC_MASK_XYZW;
404 /* Only the W component can be used fo the predicate
405 * stack counter. */
406 if (mask & RC_MASK_W) {
407 compiler->PredicateMask = RC_MASK_W;
408 compiler->PredicateIndex = i;
409 break;
410 }
411 }
412 if (i == R300_VS_MAX_TEMPS) {
413 rc_error(&compiler->Base, "No free temporary to use for"
414 " predicate stack counter.\n");
415 return;
416 }
417 }
418 predicate_opcode =
419 branch_depth ? VE_PRED_SET_NEQ_PUSH : ME_PRED_SET_NEQ;
420
421 rci->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(rci->U.I.SrcReg[0].Swizzle,0));
422 if (branch_depth == 0) {
423 is_math = 1;
424 predicate_opcode = ME_PRED_SET_NEQ;
425 inst[1] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
426 inst[2] = 0;
427 } else {
428 predicate_opcode = VE_PRED_SET_NEQ_PUSH;
429 inst[1] = t_pred_src(compiler);
430 inst[2] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
431 }
432
433 inst[0] = t_pred_dst(compiler, predicate_opcode, is_math);
434 inst[3] = 0;
435
436 }
437
438 static void ei_else(struct r300_vertex_program_compiler * compiler,
439 unsigned int * inst)
440 {
441 if (!compiler->Base.is_r500) {
442 rc_error(&compiler->Base,"Opcode ELSE not supported\n");
443 return;
444 }
445 inst[0] = t_pred_dst(compiler, ME_PRED_SET_INV, 1);
446 inst[1] = t_pred_src(compiler);
447 inst[2] = 0;
448 inst[3] = 0;
449 }
450
451 static void ei_endif(struct r300_vertex_program_compiler *compiler,
452 unsigned int * inst)
453 {
454 if (!compiler->Base.is_r500) {
455 rc_error(&compiler->Base,"Opcode ENDIF not supported\n");
456 return;
457 }
458 inst[0] = t_pred_dst(compiler, ME_PRED_SET_POP, 1);
459 inst[1] = t_pred_src(compiler);
460 inst[2] = 0;
461 inst[3] = 0;
462 }
463
464 static void translate_vertex_program(struct r300_vertex_program_compiler * compiler)
465 {
466 struct rc_instruction *rci;
467
468 struct loop * loops;
469 int current_loop_depth = 0;
470 int loops_reserved = 0;
471
472 unsigned int branch_depth = 0;
473
474 compiler->code->pos_end = 0; /* Not supported yet */
475 compiler->code->length = 0;
476
477 compiler->SetHwInputOutput(compiler);
478
479 for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
480 struct rc_sub_instruction *vpi = &rci->U.I;
481 unsigned int *inst = compiler->code->body.d + compiler->code->length;
482
483 /* Skip instructions writing to non-existing destination */
484 if (!valid_dst(compiler->code, &vpi->DstReg))
485 continue;
486
487 if (rc_get_opcode_info(vpi->Opcode)->HasDstReg) {
488 /* Relative addressing of destination operands is not supported yet. */
489 if (vpi->DstReg.RelAddr) {
490 rc_error(&compiler->Base, "Vertex program does not support relative "
491 "addressing of destination operands (yet).\n");
492 return;
493 }
494
495 /* Neither is Saturate. */
496 if (vpi->SaturateMode != RC_SATURATE_NONE) {
497 rc_error(&compiler->Base, "Vertex program does not support the Saturate "
498 "modifier (yet).\n");
499 }
500 }
501
502 if (compiler->code->length >= R500_VS_MAX_ALU_DWORDS ||
503 (compiler->code->length >= R300_VS_MAX_ALU_DWORDS && !compiler->Base.is_r500)) {
504 rc_error(&compiler->Base, "Vertex program has too many instructions\n");
505 return;
506 }
507
508 assert(compiler->Base.is_r500 ||
509 (vpi->Opcode != RC_OPCODE_SEQ &&
510 vpi->Opcode != RC_OPCODE_SNE));
511
512 switch (vpi->Opcode) {
513 case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
514 case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
515 case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
516 case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
517 case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
518 case RC_OPCODE_ELSE: ei_else(compiler, inst); break;
519 case RC_OPCODE_ENDIF: ei_endif(compiler, inst); branch_depth--; break;
520 case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
521 case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
522 case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
523 case RC_OPCODE_IF: ei_if(compiler, rci, inst, branch_depth); branch_depth++; break;
524 case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
525 case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
526 case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
527 case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
528 case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
529 case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
530 case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
531 case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
532 case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
533 case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
534 case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
535 case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break;
536 case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
537 case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break;
538 case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
539 case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
540 case RC_OPCODE_BGNLOOP:
541 {
542 struct loop * l;
543
544 if ((!compiler->Base.is_r500
545 && loops_reserved >= R300_VS_MAX_LOOP_DEPTH)
546 || loops_reserved >= R500_VS_MAX_FC_DEPTH) {
547 rc_error(&compiler->Base,
548 "Loops are nested too deep.");
549 return;
550 }
551 memory_pool_array_reserve(&compiler->Base.Pool,
552 struct loop, loops, current_loop_depth,
553 loops_reserved, 1);
554 l = &loops[current_loop_depth++];
555 memset(l , 0, sizeof(struct loop));
556 l->BgnLoop = (compiler->code->length / 4);
557 continue;
558 }
559 case RC_OPCODE_ENDLOOP:
560 {
561 struct loop * l = &loops[current_loop_depth - 1];
562 unsigned int act_addr = l->BgnLoop - 1;
563 unsigned int last_addr = (compiler->code->length / 4) - 1;
564 unsigned int ret_addr = l->BgnLoop;
565
566 if (loops_reserved >= R300_VS_MAX_FC_OPS) {
567 rc_error(&compiler->Base,
568 "Too many flow control instructions.");
569 return;
570 }
571 if (compiler->Base.is_r500) {
572 compiler->code->fc_op_addrs.r500
573 [compiler->code->num_fc_ops].lw =
574 R500_PVS_FC_ACT_ADRS(act_addr)
575 | R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff)
576 ;
577 compiler->code->fc_op_addrs.r500
578 [compiler->code->num_fc_ops].uw =
579 R500_PVS_FC_LAST_INST(last_addr)
580 | R500_PVS_FC_RTN_INST(ret_addr)
581 ;
582 } else {
583 compiler->code->fc_op_addrs.r300
584 [compiler->code->num_fc_ops] =
585 R300_PVS_FC_ACT_ADRS(act_addr)
586 | R300_PVS_FC_LOOP_CNT_JMP_INST(0xff)
587 | R300_PVS_FC_LAST_INST(last_addr)
588 | R300_PVS_FC_RTN_INST(ret_addr)
589 ;
590 }
591 compiler->code->fc_loop_index[compiler->code->num_fc_ops] =
592 R300_PVS_FC_LOOP_INIT_VAL(0x0)
593 | R300_PVS_FC_LOOP_STEP_VAL(0x1)
594 ;
595 compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP(
596 compiler->code->num_fc_ops);
597 compiler->code->num_fc_ops++;
598 current_loop_depth--;
599 continue;
600 }
601
602 default:
603 rc_error(&compiler->Base, "Unknown opcode %s\n", rc_get_opcode_info(vpi->Opcode)->Name);
604 return;
605 }
606
607 /* Non-flow control instructions that are inside an if statement
608 * need to pay attention to the predicate bit. */
609 if (branch_depth
610 && vpi->Opcode != RC_OPCODE_IF
611 && vpi->Opcode != RC_OPCODE_ELSE
612 && vpi->Opcode != RC_OPCODE_ENDIF) {
613
614 inst[0] |= (PVS_DST_PRED_ENABLE_MASK
615 << PVS_DST_PRED_ENABLE_SHIFT);
616 inst[0] |= (PVS_DST_PRED_SENSE_MASK
617 << PVS_DST_PRED_SENSE_SHIFT);
618 }
619
620 compiler->code->length += 4;
621
622 if (compiler->Base.Error)
623 return;
624 }
625 }
626
627 struct temporary_allocation {
628 unsigned int Allocated:1;
629 unsigned int HwTemp:15;
630 struct rc_instruction * LastRead;
631 };
632
633 static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
634 {
635 struct rc_instruction *inst;
636 struct rc_instruction *end_loop = NULL;
637 unsigned int num_orig_temps = 0;
638 char hwtemps[R300_VS_MAX_TEMPS];
639 struct temporary_allocation * ta;
640 unsigned int i, j;
641
642 memset(hwtemps, 0, sizeof(hwtemps));
643
644 /* Pass 1: Count original temporaries. */
645 for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
646 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
647
648 for (i = 0; i < opcode->NumSrcRegs; ++i) {
649 if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
650 if (inst->U.I.SrcReg[i].Index >= num_orig_temps)
651 num_orig_temps = inst->U.I.SrcReg[i].Index + 1;
652 }
653 }
654
655 if (opcode->HasDstReg) {
656 if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
657 if (inst->U.I.DstReg.Index >= num_orig_temps)
658 num_orig_temps = inst->U.I.DstReg.Index + 1;
659 }
660 }
661 }
662 compiler->code->num_temporaries = num_orig_temps;
663
664 /* Pass 2: If there is relative addressing of temporaries, we cannot change register indices. Give up. */
665 for (inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
666 const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
667
668 if (opcode->HasDstReg)
669 if (inst->U.I.DstReg.RelAddr)
670 return;
671
672 for (i = 0; i < opcode->NumSrcRegs; ++i) {
673 if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY &&
674 inst->U.I.SrcReg[i].RelAddr) {
675 return;
676 }
677 }
678 }
679
680 compiler->code->num_temporaries = 0;
681 ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
682 sizeof(struct temporary_allocation) * num_orig_temps);
683 memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
684
685 /* Pass 3: Determine original temporary lifetimes */
686 for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
687 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
688 /* Instructions inside of loops need to use the ENDLOOP
689 * instruction as their LastRead. */
690 if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
691 int endloops = 1;
692 struct rc_instruction * ptr;
693 for(ptr = inst->Next;
694 ptr != &compiler->Base.Program.Instructions;
695 ptr = ptr->Next){
696 if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
697 endloops++;
698 } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
699 endloops--;
700 if (endloops <= 0) {
701 end_loop = ptr;
702 break;
703 }
704 }
705 }
706 }
707
708 if (inst == end_loop) {
709 end_loop = NULL;
710 continue;
711 }
712
713 for (i = 0; i < opcode->NumSrcRegs; ++i) {
714 if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY)
715 ta[inst->U.I.SrcReg[i].Index].LastRead =
716 end_loop ? end_loop : inst;
717 }
718 }
719
720 /* Pass 4: Register allocation */
721 for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
722 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
723
724 for (i = 0; i < opcode->NumSrcRegs; ++i) {
725 if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
726 unsigned int orig = inst->U.I.SrcReg[i].Index;
727 inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
728
729 if (ta[orig].Allocated && inst == ta[orig].LastRead)
730 hwtemps[ta[orig].HwTemp] = 0;
731 }
732 }
733
734 if (opcode->HasDstReg) {
735 if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
736 unsigned int orig = inst->U.I.DstReg.Index;
737
738 if (!ta[orig].Allocated) {
739 for(j = 0; j < R300_VS_MAX_TEMPS; ++j) {
740 if (!hwtemps[j])
741 break;
742 }
743 if (j >= R300_VS_MAX_TEMPS) {
744 fprintf(stderr, "Out of hw temporaries\n");
745 } else {
746 ta[orig].Allocated = 1;
747 ta[orig].HwTemp = j;
748 hwtemps[j] = 1;
749
750 if (j >= compiler->code->num_temporaries)
751 compiler->code->num_temporaries = j + 1;
752 }
753 }
754
755 inst->U.I.DstReg.Index = ta[orig].HwTemp;
756 }
757 }
758 }
759 }
760
761 /**
762 * R3xx-R4xx vertex engine does not support the Absolute source operand modifier
763 * and the Saturate opcode modifier. Only Absolute is currently transformed.
764 */
765 static int transform_nonnative_modifiers(
766 struct radeon_compiler *c,
767 struct rc_instruction *inst,
768 void* unused)
769 {
770 const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
771 unsigned i;
772
773 /* Transform ABS(a) to MAX(a, -a). */
774 for (i = 0; i < opcode->NumSrcRegs; i++) {
775 if (inst->U.I.SrcReg[i].Abs) {
776 struct rc_instruction *new_inst;
777 unsigned temp;
778
779 inst->U.I.SrcReg[i].Abs = 0;
780
781 temp = rc_find_free_temporary(c);
782
783 new_inst = rc_insert_new_instruction(c, inst->Prev);
784 new_inst->U.I.Opcode = RC_OPCODE_MAX;
785 new_inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
786 new_inst->U.I.DstReg.Index = temp;
787 new_inst->U.I.SrcReg[0] = inst->U.I.SrcReg[i];
788 new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i];
789 new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
790
791 memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i]));
792 inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
793 inst->U.I.SrcReg[i].Index = temp;
794 inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW;
795 }
796 }
797 return 1;
798 }
799
800 /**
801 * Vertex engine cannot read two inputs or two constants at the same time.
802 * Introduce intermediate MOVs to temporary registers to account for this.
803 */
804 static int transform_source_conflicts(
805 struct radeon_compiler *c,
806 struct rc_instruction* inst,
807 void* unused)
808 {
809 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
810
811 if (opcode->NumSrcRegs == 3) {
812 if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])
813 || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) {
814 int tmpreg = rc_find_free_temporary(c);
815 struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
816 inst_mov->U.I.Opcode = RC_OPCODE_MOV;
817 inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
818 inst_mov->U.I.DstReg.Index = tmpreg;
819 inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
820
821 reset_srcreg(&inst->U.I.SrcReg[2]);
822 inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
823 inst->U.I.SrcReg[2].Index = tmpreg;
824 }
825 }
826
827 if (opcode->NumSrcRegs >= 2) {
828 if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) {
829 int tmpreg = rc_find_free_temporary(c);
830 struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
831 inst_mov->U.I.Opcode = RC_OPCODE_MOV;
832 inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
833 inst_mov->U.I.DstReg.Index = tmpreg;
834 inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
835
836 reset_srcreg(&inst->U.I.SrcReg[1]);
837 inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
838 inst->U.I.SrcReg[1].Index = tmpreg;
839 }
840 }
841
842 return 1;
843 }
844
845 static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
846 {
847 int i;
848
849 for(i = 0; i < 32; ++i) {
850 if ((compiler->RequiredOutputs & (1 << i)) &&
851 !(compiler->Base.Program.OutputsWritten & (1 << i))) {
852 struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev);
853 inst->U.I.Opcode = RC_OPCODE_MOV;
854
855 inst->U.I.DstReg.File = RC_FILE_OUTPUT;
856 inst->U.I.DstReg.Index = i;
857 inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
858
859 inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT;
860 inst->U.I.SrcReg[0].Index = 0;
861 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
862
863 compiler->Base.Program.OutputsWritten |= 1 << i;
864 }
865 }
866 }
867
868 static void dataflow_outputs_mark_used(void * userdata, void * data,
869 void (*callback)(void *, unsigned int, unsigned int))
870 {
871 struct r300_vertex_program_compiler * c = userdata;
872 int i;
873
874 for(i = 0; i < 32; ++i) {
875 if (c->RequiredOutputs & (1 << i))
876 callback(data, i, RC_MASK_XYZW);
877 }
878 }
879
880 static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
881 {
882 (void) opcode;
883 (void) reg;
884
885 return 1;
886 }
887
888 static void debug_program_log(struct r300_vertex_program_compiler* c, const char * where)
889 {
890 if (c->Base.Debug) {
891 fprintf(stderr, "Vertex Program: %s\n", where);
892 rc_print_program(&c->Base.Program);
893 }
894 }
895
896
897 static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
898 .IsNative = &swizzle_is_native,
899 .Split = 0 /* should never be called */
900 };
901
902
903 void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
904 {
905 struct emulate_loop_state loop_state;
906
907 c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
908
909 addArtificialOutputs(c);
910
911 debug_program_log(c, "before compilation");
912
913 if (c->Base.is_r500)
914 rc_transform_loops(&c->Base, &loop_state, R500_VS_MAX_ALU);
915 else
916 rc_transform_loops(&c->Base, &loop_state, R300_VS_MAX_ALU);
917 if (c->Base.Error)
918 return;
919
920 debug_program_log(c, "after emulate loops");
921
922 if (!c->Base.is_r500) {
923 rc_emulate_branches(&c->Base);
924 if (c->Base.Error)
925 return;
926 debug_program_log(c, "after emulate branches");
927 }
928
929 if (c->Base.is_r500) {
930 struct radeon_program_transformation transformations[] = {
931 { &r300_transform_vertex_alu, 0 },
932 { &r300_transform_trig_scale_vertex, 0 }
933 };
934 radeonLocalTransform(&c->Base, 2, transformations);
935 if (c->Base.Error)
936 return;
937
938 debug_program_log(c, "after native rewrite");
939 } else {
940 struct radeon_program_transformation transformations[] = {
941 { &r300_transform_vertex_alu, 0 },
942 { &radeonTransformTrigSimple, 0 }
943 };
944 radeonLocalTransform(&c->Base, 2, transformations);
945 if (c->Base.Error)
946 return;
947
948 debug_program_log(c, "after native rewrite");
949
950 /* Note: This pass has to be done seperately from ALU rewrite,
951 * because it needs to check every instruction.
952 */
953 struct radeon_program_transformation transformations2[] = {
954 { &transform_nonnative_modifiers, 0 },
955 };
956 radeonLocalTransform(&c->Base, 1, transformations2);
957 if (c->Base.Error)
958 return;
959
960 debug_program_log(c, "after emulate modifiers");
961 }
962
963 {
964 /* Note: This pass has to be done seperately from ALU rewrite,
965 * otherwise non-native ALU instructions with source conflits
966 * will not be treated properly.
967 */
968 struct radeon_program_transformation transformations[] = {
969 { &transform_source_conflicts, 0 },
970 };
971 radeonLocalTransform(&c->Base, 1, transformations);
972 if (c->Base.Error)
973 return;
974 }
975
976 debug_program_log(c, "after source conflict resolve");
977
978 rc_dataflow_deadcode(&c->Base, &dataflow_outputs_mark_used, c);
979 if (c->Base.Error)
980 return;
981
982 debug_program_log(c, "after deadcode");
983
984 rc_dataflow_swizzles(&c->Base);
985 if (c->Base.Error)
986 return;
987
988 debug_program_log(c, "after dataflow");
989
990 allocate_temporary_registers(c);
991 if (c->Base.Error)
992 return;
993
994 debug_program_log(c, "after register allocation");
995
996
997 translate_vertex_program(c);
998 if (c->Base.Error)
999 return;
1000
1001 rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
1002
1003 c->code->InputsRead = c->Base.Program.InputsRead;
1004 c->code->OutputsWritten = c->Base.Program.OutputsWritten;
1005
1006 if (c->Base.Debug) {
1007 fprintf(stderr, "Final vertex program code:\n");
1008 r300_vertex_program_dump(c);
1009 }
1010
1011 /* Check the number of constants. */
1012 if (!c->Base.Error &&
1013 c->Base.Program.Constants.Count > 256) {
1014 rc_error(&c->Base, "Too many constants. Max: 256, Got: %i\n",
1015 c->Base.Program.Constants.Count);
1016 }
1017 }