r300/compiler: r500 hw support for break and continue in loops.
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / r500_fragprog_emit.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
5 * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
6 *
7 * All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining
10 * a copy of this software and associated documentation files (the
11 * "Software"), to deal in the Software without restriction, including
12 * without limitation the rights to use, copy, modify, merge, publish,
13 * distribute, sublicense, and/or sell copies of the Software, and to
14 * permit persons to whom the Software is furnished to do so, subject to
15 * the following conditions:
16 *
17 * The above copyright notice and this permission notice (including the
18 * next paragraph) shall be included in all copies or substantial
19 * portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 *
29 */
30
31 /**
32 * \file
33 *
34 * \author Ben Skeggs <darktama@iinet.net.au>
35 *
36 * \author Jerome Glisse <j.glisse@gmail.com>
37 *
38 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
39 *
40 */
41
42 #include "r500_fragprog.h"
43
44 #include "../r300_reg.h"
45
46 #include "radeon_program_pair.h"
47
48 #define MAX_BRANCH_DEPTH_FULL 32
49 #define MAX_BRANCH_DEPTH_PARTIAL 4
50
51 #define PROG_CODE \
52 struct r500_fragment_program_code *code = &c->code->code.r500
53
54 #define error(fmt, args...) do { \
55 rc_error(&c->Base, "%s::%s(): " fmt "\n", \
56 __FILE__, __FUNCTION__, ##args); \
57 } while(0)
58
59
60 struct branch_info {
61 int If;
62 int Else;
63 int Endif;
64 };
65
66 struct loop_info {
67 int BgnLoop;
68
69 int BranchDepth;
70 int * Brks;
71 int BrkCount;
72 int BrkReserved;
73 };
74
75 struct emit_state {
76 struct radeon_compiler * C;
77 struct r500_fragment_program_code * Code;
78
79 struct branch_info * Branches;
80 unsigned int CurrentBranchDepth;
81 unsigned int BranchesReserved;
82
83 struct loop_info * Loops;
84 unsigned int CurrentLoopDepth;
85 unsigned int LoopsReserved;
86
87 unsigned int MaxBranchDepth;
88
89 };
90
91 static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
92 {
93 switch(opcode) {
94 case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
95 case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
96 case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
97 case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
98 case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
99 case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
100 default:
101 error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
102 /* fall through */
103 case RC_OPCODE_NOP:
104 /* fall through */
105 case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
106 case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
107 case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
108 case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
109 }
110 }
111
112 static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
113 {
114 switch(opcode) {
115 case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
116 case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
117 case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
118 case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
119 case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
120 case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
121 case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
122 case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
123 case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
124 default:
125 error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
126 /* fall through */
127 case RC_OPCODE_NOP:
128 /* fall through */
129 case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
130 case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
131 case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
132 case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
133 case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
134 case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
135 }
136 }
137
138 static unsigned int fix_hw_swizzle(unsigned int swz)
139 {
140 switch (swz) {
141 case RC_SWIZZLE_ZERO:
142 case RC_SWIZZLE_UNUSED:
143 swz = 4;
144 break;
145 case RC_SWIZZLE_HALF:
146 swz = 5;
147 break;
148 case RC_SWIZZLE_ONE:
149 swz = 6;
150 break;
151 }
152
153 return swz;
154 }
155
156 static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
157 {
158 unsigned int t = inst->RGB.Arg[arg].Source;
159 int comp;
160 t |= inst->RGB.Arg[arg].Negate << 11;
161 t |= inst->RGB.Arg[arg].Abs << 12;
162
163 for(comp = 0; comp < 3; ++comp)
164 t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
165
166 return t;
167 }
168
169 static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
170 {
171 unsigned int t = inst->Alpha.Arg[i].Source;
172 t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2;
173 t |= inst->Alpha.Arg[i].Negate << 5;
174 t |= inst->Alpha.Arg[i].Abs << 6;
175 return t;
176 }
177
178 static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
179 {
180 switch(func) {
181 case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
182 case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
183 case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
184 case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
185 default:
186 rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
187 return 0;
188 }
189 }
190
191 static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
192 {
193 if (index > code->max_temp_idx)
194 code->max_temp_idx = index;
195 }
196
197 static unsigned int use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src)
198 {
199 if (src.File == RC_FILE_CONSTANT) {
200 return src.Index | 0x100;
201 } else if (src.File == RC_FILE_TEMPORARY) {
202 use_temporary(code, src.Index);
203 return src.Index;
204 }
205
206 return 0;
207 }
208
209 /**
210 * NOP the specified instruction if it is not a texture lookup.
211 */
212 static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
213 {
214 PROG_CODE;
215
216 if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
217 code->inst[ip].inst0 |= R500_INST_NOP;
218 }
219 }
220
221 /**
222 * Emit a paired ALU instruction.
223 */
224 static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
225 {
226 PROG_CODE;
227
228 if (code->inst_end >= 511) {
229 error("emit_alu: Too many instructions");
230 return;
231 }
232
233 int ip = ++code->inst_end;
234
235 /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
236 if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
237 inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
238 if (ip > 0) {
239 alu_nop(c, ip - 1);
240 }
241 }
242
243 code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
244 code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
245
246 if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
247 code->inst[ip].inst0 = R500_INST_TYPE_OUT;
248 if (inst->WriteALUResult) {
249 error("%s: cannot write output and ALU result at the same time");
250 return;
251 }
252 } else {
253 code->inst[ip].inst0 = R500_INST_TYPE_ALU;
254 }
255 code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
256
257 code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14);
258 code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
259 if (inst->Alpha.DepthWriteMask) {
260 code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
261 c->code->writes_depth = 1;
262 }
263
264 code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
265 code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
266 use_temporary(code, inst->Alpha.DestIndex);
267 use_temporary(code, inst->RGB.DestIndex);
268
269 if (inst->RGB.Saturate)
270 code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
271 if (inst->Alpha.Saturate)
272 code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
273
274 code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
275 code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
276 code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
277
278 code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
279 code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
280 code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
281
282 code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
283 code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
284 code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
285
286 code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
287 code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
288 code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
289
290 code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
291 code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
292
293 if (inst->WriteALUResult) {
294 code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
295
296 if (inst->WriteALUResult == RC_ALURESULT_X)
297 code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
298 else
299 code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
300
301 code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
302 }
303 }
304
305 static unsigned int translate_strq_swizzle(unsigned int swizzle)
306 {
307 unsigned int swiz = 0;
308 int i;
309 for (i = 0; i < 4; i++)
310 swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
311 return swiz;
312 }
313
314 /**
315 * Emit a single TEX instruction
316 */
317 static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
318 {
319 PROG_CODE;
320
321 if (code->inst_end >= 511) {
322 error("emit_tex: Too many instructions");
323 return 0;
324 }
325
326 int ip = ++code->inst_end;
327
328 code->inst[ip].inst0 = R500_INST_TYPE_TEX
329 | (inst->DstReg.WriteMask << 11)
330 | R500_INST_TEX_SEM_WAIT;
331 code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
332 | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
333
334 if (inst->TexSrcTarget == RC_TEXTURE_RECT)
335 code->inst[ip].inst1 |= R500_TEX_UNSCALED;
336
337 switch (inst->Opcode) {
338 case RC_OPCODE_KIL:
339 code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
340 break;
341 case RC_OPCODE_TEX:
342 code->inst[ip].inst1 |= R500_TEX_INST_LD;
343 break;
344 case RC_OPCODE_TXB:
345 code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
346 break;
347 case RC_OPCODE_TXP:
348 code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
349 break;
350 default:
351 error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
352 }
353
354 use_temporary(code, inst->SrcReg[0].Index);
355 if (inst->Opcode != RC_OPCODE_KIL)
356 use_temporary(code, inst->DstReg.Index);
357
358 code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
359 | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
360 | R500_TEX_DST_ADDR(inst->DstReg.Index)
361 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
362 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
363
364 return 1;
365 }
366
367 static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
368 {
369 if (s->Code->inst_end >= 511) {
370 rc_error(s->C, "emit_tex: Too many instructions");
371 return;
372 }
373
374 unsigned int newip = ++s->Code->inst_end;
375
376 /* Currently all loops use the same integer constant to intialize
377 * the loop variables. */
378 if(!s->Code->int_constants[0]) {
379 s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
380 s->Code->int_constant_count = 1;
381 }
382 s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
383
384 switch(inst->U.I.Opcode){
385 struct branch_info * branch;
386 struct loop_info * loop;
387 case RC_OPCODE_BGNLOOP:
388 memory_pool_array_reserve(&s->C->Pool, struct loop_info,
389 s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
390
391 loop = &s->Loops[s->CurrentLoopDepth++];
392 memset(loop, 0, sizeof(struct loop_info));
393 loop->BranchDepth = s->CurrentBranchDepth;
394 loop->BgnLoop = newip;
395
396 s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
397 | R500_FC_JUMP_FUNC(0x00)
398 | R500_FC_IGNORE_UNCOVERED
399 ;
400 break;
401 case RC_OPCODE_BRK:
402 loop = &s->Loops[s->CurrentLoopDepth - 1];
403 memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
404 loop->BrkCount, loop->BrkReserved, 1);
405
406 loop->Brks[loop->BrkCount++] = newip;
407 s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
408 | R500_FC_JUMP_FUNC(0xff)
409 | R500_FC_B_OP1_DECR
410 | R500_FC_B_POP_CNT(
411 s->CurrentBranchDepth - loop->BranchDepth)
412 | R500_FC_IGNORE_UNCOVERED
413 ;
414 break;
415
416 case RC_OPCODE_CONTINUE:
417 loop = &s->Loops[s->CurrentLoopDepth - 1];
418 s->Code->inst[newip].inst2 = R500_FC_OP_JUMP
419 | R500_FC_JUMP_FUNC(0xff)
420 | R500_FC_B_OP1_DECR
421 | R500_FC_B_POP_CNT(
422 s->CurrentBranchDepth - loop->BranchDepth)
423 ;
424 s->Code->inst[newip].inst3 = R500_FC_JUMP_ADDR(loop->BgnLoop);
425 break;
426
427 case RC_OPCODE_ENDLOOP:
428 {
429 unsigned int i;
430 loop = &s->Loops[s->CurrentLoopDepth - 1];
431 /* Emit ENDLOOP */
432 s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
433 | R500_FC_JUMP_FUNC(0xff)
434 | R500_FC_JUMP_ANY
435 | R500_FC_IGNORE_UNCOVERED
436 ;
437 /* The constant integer at index 0 is used by all loops. */
438 s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
439 | R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
440 ;
441
442 /* Set jump address and int constant for BGNLOOP */
443 s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
444 | R500_FC_JUMP_ADDR(newip)
445 ;
446
447 /* Set jump address for the BRK instructions. */
448 while(loop->BrkCount--) {
449 s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
450 R500_FC_JUMP_ADDR(newip + 1);
451 }
452 s->CurrentLoopDepth--;
453 break;
454 }
455 case RC_OPCODE_IF:
456 if ( s->CurrentBranchDepth >= MAX_BRANCH_DEPTH_FULL) {
457 rc_error(s->C, "Branch depth exceeds hardware limit");
458 return;
459 }
460 memory_pool_array_reserve(&s->C->Pool, struct branch_info,
461 s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
462
463 branch = &s->Branches[s->CurrentBranchDepth++];
464 branch->If = newip;
465 branch->Else = -1;
466 branch->Endif = -1;
467
468 if (s->CurrentBranchDepth > s->MaxBranchDepth)
469 s->MaxBranchDepth = s->CurrentBranchDepth;
470
471 /* actual instruction is filled in at ENDIF time */
472 break;
473
474 case RC_OPCODE_ELSE:
475 if (!s->CurrentBranchDepth) {
476 rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
477 return;
478 }
479
480 branch = &s->Branches[s->CurrentBranchDepth - 1];
481 branch->Else = newip;
482
483 /* actual instruction is filled in at ENDIF time */
484 break;
485
486 case RC_OPCODE_ENDIF:
487 if (!s->CurrentBranchDepth) {
488 rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
489 return;
490 }
491
492 branch = &s->Branches[s->CurrentBranchDepth - 1];
493 branch->Endif = newip;
494
495 s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
496 | R500_FC_A_OP_NONE /* no address stack */
497 | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
498 | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
499 | R500_FC_B_OP1_NONE /* no branch counter if stay */
500 | R500_FC_B_POP_CNT(1)
501 ;
502 s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
503 s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
504 | R500_FC_A_OP_NONE /* no address stack */
505 | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
506 | R500_FC_B_OP0_INCR /* increment branch counter if stay */
507 | R500_FC_IGNORE_UNCOVERED
508 ;
509
510 if (branch->Else >= 0) {
511 /* increment branch counter also if jump */
512 s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
513 s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
514
515 s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
516 | R500_FC_A_OP_NONE /* no address stack */
517 | R500_FC_B_ELSE /* all active pixels want to jump */
518 | R500_FC_B_OP0_NONE /* no counter op if stay */
519 | R500_FC_B_OP1_DECR /* decrement branch counter if jump */
520 | R500_FC_B_POP_CNT(1)
521 ;
522 s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
523 } else {
524 /* don't touch branch counter on jump */
525 s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
526 s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
527 }
528
529
530 s->CurrentBranchDepth--;
531 break;
532 default:
533 rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
534 }
535 }
536
537 void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
538 {
539 struct emit_state s;
540 struct r500_fragment_program_code *code = &compiler->code->code.r500;
541
542 memset(&s, 0, sizeof(s));
543 s.C = &compiler->Base;
544 s.Code = code;
545
546 memset(code, 0, sizeof(*code));
547 code->max_temp_idx = 1;
548 code->inst_end = -1;
549
550 for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
551 inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
552 inst = inst->Next) {
553 if (inst->Type == RC_INSTRUCTION_NORMAL) {
554 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
555
556 if (opcode->IsFlowControl) {
557 emit_flowcontrol(&s, inst);
558 } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
559 continue;
560 } else {
561 emit_tex(compiler, &inst->U.I);
562 }
563 } else {
564 emit_paired(compiler, &inst->U.P);
565 }
566 }
567
568 if (code->max_temp_idx >= 128)
569 rc_error(&compiler->Base, "Too many hardware temporaries used");
570
571 if (compiler->Base.Error)
572 return;
573
574 if (code->inst_end == -1 ||
575 (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
576 /* This may happen when dead-code elimination is disabled or
577 * when most of the fragment program logic is leading to a KIL */
578 if (code->inst_end >= 511) {
579 rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
580 return;
581 }
582
583 int ip = ++code->inst_end;
584 code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
585 }
586
587 /* Enable full flow control mode if we are using loops or have if
588 * statements nested at least four deep. */
589 if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
590 if (code->max_temp_idx < 1)
591 code->max_temp_idx = 1;
592
593 code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
594 }
595 }