draw: corrections to allow for different cliptest cases
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / r500_fragprog_emit.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
5 * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
6 *
7 * All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining
10 * a copy of this software and associated documentation files (the
11 * "Software"), to deal in the Software without restriction, including
12 * without limitation the rights to use, copy, modify, merge, publish,
13 * distribute, sublicense, and/or sell copies of the Software, and to
14 * permit persons to whom the Software is furnished to do so, subject to
15 * the following conditions:
16 *
17 * The above copyright notice and this permission notice (including the
18 * next paragraph) shall be included in all copies or substantial
19 * portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 *
29 */
30
31 /**
32 * \file
33 *
34 * \author Ben Skeggs <darktama@iinet.net.au>
35 *
36 * \author Jerome Glisse <j.glisse@gmail.com>
37 *
38 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
39 *
40 */
41
42 #include "r500_fragprog.h"
43
44 #include "../r300_reg.h"
45
46 #include "radeon_program_pair.h"
47
48 #define MAX_BRANCH_DEPTH_FULL 32
49 #define MAX_BRANCH_DEPTH_PARTIAL 4
50
51 #define PROG_CODE \
52 struct r500_fragment_program_code *code = &c->code->code.r500
53
54 #define error(fmt, args...) do { \
55 rc_error(&c->Base, "%s::%s(): " fmt "\n", \
56 __FILE__, __FUNCTION__, ##args); \
57 } while(0)
58
59
60 struct branch_info {
61 int If;
62 int Else;
63 int Endif;
64 };
65
66 struct r500_loop_info {
67 int BgnLoop;
68
69 int BranchDepth;
70 int * Brks;
71 int BrkCount;
72 int BrkReserved;
73
74 int * Conts;
75 int ContCount;
76 int ContReserved;
77 };
78
79 struct emit_state {
80 struct radeon_compiler * C;
81 struct r500_fragment_program_code * Code;
82
83 struct branch_info * Branches;
84 unsigned int CurrentBranchDepth;
85 unsigned int BranchesReserved;
86
87 struct r500_loop_info * Loops;
88 unsigned int CurrentLoopDepth;
89 unsigned int LoopsReserved;
90
91 unsigned int MaxBranchDepth;
92
93 };
94
95 static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
96 {
97 switch(opcode) {
98 case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
99 case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
100 case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
101 case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
102 case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
103 case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
104 default:
105 error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
106 /* fall through */
107 case RC_OPCODE_NOP:
108 /* fall through */
109 case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
110 case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
111 case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
112 case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
113 }
114 }
115
116 static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
117 {
118 switch(opcode) {
119 case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
120 case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
121 case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
122 case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
123 case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
124 case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
125 case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
126 case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
127 case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
128 default:
129 error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
130 /* fall through */
131 case RC_OPCODE_NOP:
132 /* fall through */
133 case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
134 case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
135 case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
136 case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
137 case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
138 case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
139 }
140 }
141
142 static unsigned int fix_hw_swizzle(unsigned int swz)
143 {
144 switch (swz) {
145 case RC_SWIZZLE_ZERO:
146 case RC_SWIZZLE_UNUSED:
147 swz = 4;
148 break;
149 case RC_SWIZZLE_HALF:
150 swz = 5;
151 break;
152 case RC_SWIZZLE_ONE:
153 swz = 6;
154 break;
155 }
156
157 return swz;
158 }
159
160 static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
161 {
162 unsigned int t = inst->RGB.Arg[arg].Source;
163 int comp;
164 t |= inst->RGB.Arg[arg].Negate << 11;
165 t |= inst->RGB.Arg[arg].Abs << 12;
166
167 for(comp = 0; comp < 3; ++comp)
168 t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
169
170 return t;
171 }
172
173 static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
174 {
175 unsigned int t = inst->Alpha.Arg[i].Source;
176 t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2;
177 t |= inst->Alpha.Arg[i].Negate << 5;
178 t |= inst->Alpha.Arg[i].Abs << 6;
179 return t;
180 }
181
182 static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
183 {
184 switch(func) {
185 case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
186 case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
187 case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
188 case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
189 default:
190 rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
191 return 0;
192 }
193 }
194
195 static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
196 {
197 if (index > code->max_temp_idx)
198 code->max_temp_idx = index;
199 }
200
201 static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
202 {
203 if (src.File == RC_FILE_CONSTANT) {
204 return src.Index | 0x100;
205 } else if (src.File == RC_FILE_TEMPORARY) {
206 use_temporary(code, src.Index);
207 return src.Index;
208 }
209
210 return 0;
211 }
212
213 /**
214 * NOP the specified instruction if it is not a texture lookup.
215 */
216 static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
217 {
218 PROG_CODE;
219
220 if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
221 code->inst[ip].inst0 |= R500_INST_NOP;
222 }
223 }
224
225 /**
226 * Emit a paired ALU instruction.
227 */
228 static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
229 {
230 PROG_CODE;
231
232 if (code->inst_end >= c->Base.max_alu_insts-1) {
233 error("emit_alu: Too many instructions");
234 return;
235 }
236
237 int ip = ++code->inst_end;
238
239 /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
240 if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
241 inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
242 if (ip > 0) {
243 alu_nop(c, ip - 1);
244 }
245 }
246
247 code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
248 code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
249
250 if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
251 code->inst[ip].inst0 = R500_INST_TYPE_OUT;
252 if (inst->WriteALUResult) {
253 error("%s: cannot write output and ALU result at the same time");
254 return;
255 }
256 } else {
257 code->inst[ip].inst0 = R500_INST_TYPE_ALU;
258 }
259 code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
260
261 code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14);
262 code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
263 if (inst->Nop) {
264 code->inst[ip].inst0 |= R500_INST_NOP;
265 }
266 if (inst->Alpha.DepthWriteMask) {
267 code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
268 c->code->writes_depth = 1;
269 }
270
271 code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
272 code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
273 use_temporary(code, inst->Alpha.DestIndex);
274 use_temporary(code, inst->RGB.DestIndex);
275
276 if (inst->RGB.Saturate)
277 code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
278 if (inst->Alpha.Saturate)
279 code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
280
281 /* Set the presubtract operation. */
282 switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
283 case RC_PRESUB_BIAS:
284 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
285 break;
286 case RC_PRESUB_SUB:
287 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
288 break;
289 case RC_PRESUB_ADD:
290 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
291 break;
292 case RC_PRESUB_INV:
293 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
294 break;
295 default:
296 break;
297 }
298 switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
299 case RC_PRESUB_BIAS:
300 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
301 break;
302 case RC_PRESUB_SUB:
303 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
304 break;
305 case RC_PRESUB_ADD:
306 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
307 break;
308 case RC_PRESUB_INV:
309 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
310 break;
311 default:
312 break;
313 }
314
315 code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
316 code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
317 code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
318
319 code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
320 code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
321 code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
322
323 code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
324 code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
325 code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
326
327 code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
328 code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
329 code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
330
331 code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
332 code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
333
334 if (inst->WriteALUResult) {
335 code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
336
337 if (inst->WriteALUResult == RC_ALURESULT_X)
338 code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
339 else
340 code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
341
342 code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
343 }
344 }
345
346 static unsigned int translate_strq_swizzle(unsigned int swizzle)
347 {
348 unsigned int swiz = 0;
349 int i;
350 for (i = 0; i < 4; i++)
351 swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
352 return swiz;
353 }
354
355 /**
356 * Emit a single TEX instruction
357 */
358 static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
359 {
360 PROG_CODE;
361
362 if (code->inst_end >= c->Base.max_alu_insts-1) {
363 error("emit_tex: Too many instructions");
364 return 0;
365 }
366
367 int ip = ++code->inst_end;
368
369 code->inst[ip].inst0 = R500_INST_TYPE_TEX
370 | (inst->DstReg.WriteMask << 11)
371 | R500_INST_TEX_SEM_WAIT;
372 code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
373 | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
374
375 if (inst->TexSrcTarget == RC_TEXTURE_RECT)
376 code->inst[ip].inst1 |= R500_TEX_UNSCALED;
377
378 switch (inst->Opcode) {
379 case RC_OPCODE_KIL:
380 code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
381 break;
382 case RC_OPCODE_TEX:
383 code->inst[ip].inst1 |= R500_TEX_INST_LD;
384 break;
385 case RC_OPCODE_TXB:
386 code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
387 break;
388 case RC_OPCODE_TXP:
389 code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
390 break;
391 default:
392 error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
393 }
394
395 use_temporary(code, inst->SrcReg[0].Index);
396 if (inst->Opcode != RC_OPCODE_KIL)
397 use_temporary(code, inst->DstReg.Index);
398
399 code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
400 | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
401 | R500_TEX_DST_ADDR(inst->DstReg.Index)
402 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
403 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
404
405 return 1;
406 }
407
408 static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
409 {
410 if (s->Code->inst_end >= s->C->max_alu_insts-1) {
411 rc_error(s->C, "emit_tex: Too many instructions");
412 return;
413 }
414
415 unsigned int newip = ++s->Code->inst_end;
416
417 /* Currently all loops use the same integer constant to intialize
418 * the loop variables. */
419 if(!s->Code->int_constants[0]) {
420 s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
421 s->Code->int_constant_count = 1;
422 }
423 s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
424
425 switch(inst->U.I.Opcode){
426 struct branch_info * branch;
427 struct r500_loop_info * loop;
428 case RC_OPCODE_BGNLOOP:
429 memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info,
430 s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
431
432 loop = &s->Loops[s->CurrentLoopDepth++];
433 memset(loop, 0, sizeof(struct r500_loop_info));
434 loop->BranchDepth = s->CurrentBranchDepth;
435 loop->BgnLoop = newip;
436
437 s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
438 | R500_FC_JUMP_FUNC(0x00)
439 | R500_FC_IGNORE_UNCOVERED
440 ;
441 break;
442 case RC_OPCODE_BRK:
443 loop = &s->Loops[s->CurrentLoopDepth - 1];
444 memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
445 loop->BrkCount, loop->BrkReserved, 1);
446
447 loop->Brks[loop->BrkCount++] = newip;
448 s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
449 | R500_FC_JUMP_FUNC(0xff)
450 | R500_FC_B_OP1_DECR
451 | R500_FC_B_POP_CNT(
452 s->CurrentBranchDepth - loop->BranchDepth)
453 | R500_FC_IGNORE_UNCOVERED
454 ;
455 break;
456
457 case RC_OPCODE_CONT:
458 loop = &s->Loops[s->CurrentLoopDepth - 1];
459 memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
460 loop->ContCount, loop->ContReserved, 1);
461 loop->Conts[loop->ContCount++] = newip;
462 s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
463 | R500_FC_JUMP_FUNC(0xff)
464 | R500_FC_B_OP1_DECR
465 | R500_FC_B_POP_CNT(
466 s->CurrentBranchDepth - loop->BranchDepth)
467 | R500_FC_IGNORE_UNCOVERED
468 ;
469 break;
470
471 case RC_OPCODE_ENDLOOP:
472 {
473 loop = &s->Loops[s->CurrentLoopDepth - 1];
474 /* Emit ENDLOOP */
475 s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
476 | R500_FC_JUMP_FUNC(0xff)
477 | R500_FC_JUMP_ANY
478 | R500_FC_IGNORE_UNCOVERED
479 ;
480 /* The constant integer at index 0 is used by all loops. */
481 s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
482 | R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
483 ;
484
485 /* Set jump address and int constant for BGNLOOP */
486 s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
487 | R500_FC_JUMP_ADDR(newip)
488 ;
489
490 /* Set jump address for the BRK instructions. */
491 while(loop->BrkCount--) {
492 s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
493 R500_FC_JUMP_ADDR(newip + 1);
494 }
495
496 /* Set jump address for CONT instructions. */
497 while(loop->ContCount--) {
498 s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
499 R500_FC_JUMP_ADDR(newip);
500 }
501 s->CurrentLoopDepth--;
502 break;
503 }
504 case RC_OPCODE_IF:
505 if ( s->CurrentBranchDepth >= MAX_BRANCH_DEPTH_FULL) {
506 rc_error(s->C, "Branch depth exceeds hardware limit");
507 return;
508 }
509 memory_pool_array_reserve(&s->C->Pool, struct branch_info,
510 s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
511
512 branch = &s->Branches[s->CurrentBranchDepth++];
513 branch->If = newip;
514 branch->Else = -1;
515 branch->Endif = -1;
516
517 if (s->CurrentBranchDepth > s->MaxBranchDepth)
518 s->MaxBranchDepth = s->CurrentBranchDepth;
519
520 /* actual instruction is filled in at ENDIF time */
521 break;
522
523 case RC_OPCODE_ELSE:
524 if (!s->CurrentBranchDepth) {
525 rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
526 return;
527 }
528
529 branch = &s->Branches[s->CurrentBranchDepth - 1];
530 branch->Else = newip;
531
532 /* actual instruction is filled in at ENDIF time */
533 break;
534
535 case RC_OPCODE_ENDIF:
536 if (!s->CurrentBranchDepth) {
537 rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
538 return;
539 }
540
541 branch = &s->Branches[s->CurrentBranchDepth - 1];
542 branch->Endif = newip;
543
544 s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
545 | R500_FC_A_OP_NONE /* no address stack */
546 | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
547 | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
548 | R500_FC_B_OP1_NONE /* no branch counter if stay */
549 | R500_FC_B_POP_CNT(1)
550 ;
551 s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
552 s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
553 | R500_FC_A_OP_NONE /* no address stack */
554 | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
555 | R500_FC_B_OP0_INCR /* increment branch counter if stay */
556 | R500_FC_IGNORE_UNCOVERED
557 ;
558
559 if (branch->Else >= 0) {
560 /* increment branch counter also if jump */
561 s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
562 s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
563
564 s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
565 | R500_FC_A_OP_NONE /* no address stack */
566 | R500_FC_B_ELSE /* all active pixels want to jump */
567 | R500_FC_B_OP0_NONE /* no counter op if stay */
568 | R500_FC_B_OP1_DECR /* decrement branch counter if jump */
569 | R500_FC_B_POP_CNT(1)
570 ;
571 s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
572 } else {
573 /* don't touch branch counter on jump */
574 s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
575 s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
576 }
577
578
579 s->CurrentBranchDepth--;
580 break;
581 default:
582 rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
583 }
584 }
585
586 void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
587 {
588 struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
589 struct emit_state s;
590 struct r500_fragment_program_code *code = &compiler->code->code.r500;
591
592 memset(&s, 0, sizeof(s));
593 s.C = &compiler->Base;
594 s.Code = code;
595
596 memset(code, 0, sizeof(*code));
597 code->max_temp_idx = 1;
598 code->inst_end = -1;
599
600 for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
601 inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
602 inst = inst->Next) {
603 if (inst->Type == RC_INSTRUCTION_NORMAL) {
604 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
605
606 if (opcode->IsFlowControl) {
607 emit_flowcontrol(&s, inst);
608 } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
609 continue;
610 } else {
611 emit_tex(compiler, &inst->U.I);
612 }
613 } else {
614 emit_paired(compiler, &inst->U.P);
615 }
616 }
617
618 if (code->max_temp_idx >= compiler->Base.max_temp_regs)
619 rc_error(&compiler->Base, "Too many hardware temporaries used");
620
621 if (compiler->Base.Error)
622 return;
623
624 if (code->inst_end == -1 ||
625 (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
626 /* This may happen when dead-code elimination is disabled or
627 * when most of the fragment program logic is leading to a KIL */
628 if (code->inst_end >= compiler->Base.max_alu_insts-1) {
629 rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
630 return;
631 }
632
633 int ip = ++code->inst_end;
634 code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
635 }
636
637 /* Enable full flow control mode if we are using loops or have if
638 * statements nested at least four deep. */
639 if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
640 if (code->max_temp_idx < 1)
641 code->max_temp_idx = 1;
642
643 code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
644 }
645 }