5da82d90f6716a117a092af86ed1b3a49d32452f
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / r500_fragprog_emit.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
5 * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
6 *
7 * All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining
10 * a copy of this software and associated documentation files (the
11 * "Software"), to deal in the Software without restriction, including
12 * without limitation the rights to use, copy, modify, merge, publish,
13 * distribute, sublicense, and/or sell copies of the Software, and to
14 * permit persons to whom the Software is furnished to do so, subject to
15 * the following conditions:
16 *
17 * The above copyright notice and this permission notice (including the
18 * next paragraph) shall be included in all copies or substantial
19 * portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 *
29 */
30
31 /**
32 * \file
33 *
34 * \author Ben Skeggs <darktama@iinet.net.au>
35 *
36 * \author Jerome Glisse <j.glisse@gmail.com>
37 *
38 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
39 *
40 */
41
42 #include "r500_fragprog.h"
43
44 #include "../r300_reg.h"
45
46 #include "radeon_program_pair.h"
47
48 #define PROG_CODE \
49 struct r500_fragment_program_code *code = &c->code->code.r500
50
51 #define error(fmt, args...) do { \
52 rc_error(&c->Base, "%s::%s(): " fmt "\n", \
53 __FILE__, __FUNCTION__, ##args); \
54 } while(0)
55
56
57 struct branch_info {
58 int If;
59 int Else;
60 int Endif;
61 };
62
63 struct r500_loop_info {
64 int BgnLoop;
65
66 int BranchDepth;
67 int * Brks;
68 int BrkCount;
69 int BrkReserved;
70
71 int * Conts;
72 int ContCount;
73 int ContReserved;
74 };
75
76 struct emit_state {
77 struct radeon_compiler * C;
78 struct r500_fragment_program_code * Code;
79
80 struct branch_info * Branches;
81 unsigned int CurrentBranchDepth;
82 unsigned int BranchesReserved;
83
84 struct r500_loop_info * Loops;
85 unsigned int CurrentLoopDepth;
86 unsigned int LoopsReserved;
87
88 unsigned int MaxBranchDepth;
89
90 };
91
92 static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
93 {
94 switch(opcode) {
95 case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
96 case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
97 case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
98 case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
99 case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
100 case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
101 default:
102 error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
103 /* fall through */
104 case RC_OPCODE_NOP:
105 /* fall through */
106 case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
107 case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
108 case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
109 case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
110 }
111 }
112
113 static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
114 {
115 switch(opcode) {
116 case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
117 case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
118 case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
119 case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
120 case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
121 case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
122 case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
123 case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
124 case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
125 default:
126 error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
127 /* fall through */
128 case RC_OPCODE_NOP:
129 /* fall through */
130 case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
131 case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
132 case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
133 case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
134 case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
135 case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
136 }
137 }
138
139 static unsigned int fix_hw_swizzle(unsigned int swz)
140 {
141 switch (swz) {
142 case RC_SWIZZLE_ZERO:
143 case RC_SWIZZLE_UNUSED:
144 swz = 4;
145 break;
146 case RC_SWIZZLE_HALF:
147 swz = 5;
148 break;
149 case RC_SWIZZLE_ONE:
150 swz = 6;
151 break;
152 }
153
154 return swz;
155 }
156
157 static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
158 {
159 unsigned int t = inst->RGB.Arg[arg].Source;
160 int comp;
161 t |= inst->RGB.Arg[arg].Negate << 11;
162 t |= inst->RGB.Arg[arg].Abs << 12;
163
164 for(comp = 0; comp < 3; ++comp)
165 t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
166
167 return t;
168 }
169
170 static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
171 {
172 unsigned int t = inst->Alpha.Arg[i].Source;
173 t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2;
174 t |= inst->Alpha.Arg[i].Negate << 5;
175 t |= inst->Alpha.Arg[i].Abs << 6;
176 return t;
177 }
178
179 static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
180 {
181 switch(func) {
182 case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
183 case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
184 case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
185 case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
186 default:
187 rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
188 return 0;
189 }
190 }
191
192 static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
193 {
194 if (index > code->max_temp_idx)
195 code->max_temp_idx = index;
196 }
197
198 static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
199 {
200 if (!src.Used)
201 return 0;
202
203 if (src.File == RC_FILE_CONSTANT) {
204 return src.Index | 0x100;
205 } else if (src.File == RC_FILE_TEMPORARY) {
206 use_temporary(code, src.Index);
207 return src.Index;
208 }
209
210 return 0;
211 }
212
213 /**
214 * NOP the specified instruction if it is not a texture lookup.
215 */
216 static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
217 {
218 PROG_CODE;
219
220 if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
221 code->inst[ip].inst0 |= R500_INST_NOP;
222 }
223 }
224
225 /**
226 * Emit a paired ALU instruction.
227 */
228 static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
229 {
230 int ip;
231 PROG_CODE;
232
233 if (code->inst_end >= c->Base.max_alu_insts-1) {
234 error("emit_alu: Too many instructions");
235 return;
236 }
237
238 ip = ++code->inst_end;
239
240 /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
241 if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
242 inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
243 if (ip > 0) {
244 alu_nop(c, ip - 1);
245 }
246 }
247
248 code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
249 code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
250
251 if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
252 code->inst[ip].inst0 = R500_INST_TYPE_OUT;
253 if (inst->WriteALUResult) {
254 error("Cannot write output and ALU result at the same time");
255 return;
256 }
257 } else {
258 code->inst[ip].inst0 = R500_INST_TYPE_ALU;
259 }
260 code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
261
262 code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14);
263 code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
264 if (inst->Nop) {
265 code->inst[ip].inst0 |= R500_INST_NOP;
266 }
267 if (inst->Alpha.DepthWriteMask) {
268 code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
269 c->code->writes_depth = 1;
270 }
271
272 code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
273 code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
274 use_temporary(code, inst->Alpha.DestIndex);
275 use_temporary(code, inst->RGB.DestIndex);
276
277 if (inst->RGB.Saturate)
278 code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
279 if (inst->Alpha.Saturate)
280 code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
281
282 /* Set the presubtract operation. */
283 switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
284 case RC_PRESUB_BIAS:
285 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
286 break;
287 case RC_PRESUB_SUB:
288 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
289 break;
290 case RC_PRESUB_ADD:
291 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
292 break;
293 case RC_PRESUB_INV:
294 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
295 break;
296 default:
297 break;
298 }
299 switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
300 case RC_PRESUB_BIAS:
301 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
302 break;
303 case RC_PRESUB_SUB:
304 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
305 break;
306 case RC_PRESUB_ADD:
307 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
308 break;
309 case RC_PRESUB_INV:
310 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
311 break;
312 default:
313 break;
314 }
315
316 code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
317 code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
318 code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
319
320 code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
321 code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
322 code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
323
324 code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
325 code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
326 code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
327
328 code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
329 code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
330 code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
331
332 code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
333 code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
334
335 if (inst->WriteALUResult) {
336 code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
337
338 if (inst->WriteALUResult == RC_ALURESULT_X)
339 code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
340 else
341 code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
342
343 code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
344 }
345 }
346
347 static unsigned int translate_strq_swizzle(unsigned int swizzle)
348 {
349 unsigned int swiz = 0;
350 int i;
351 for (i = 0; i < 4; i++)
352 swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
353 return swiz;
354 }
355
356 /**
357 * Emit a single TEX instruction
358 */
359 static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
360 {
361 int ip;
362 PROG_CODE;
363
364 if (code->inst_end >= c->Base.max_alu_insts-1) {
365 error("emit_tex: Too many instructions");
366 return 0;
367 }
368
369 ip = ++code->inst_end;
370
371 code->inst[ip].inst0 = R500_INST_TYPE_TEX
372 | (inst->DstReg.WriteMask << 11)
373 | R500_INST_TEX_SEM_WAIT;
374 code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
375 | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
376
377 if (inst->TexSrcTarget == RC_TEXTURE_RECT)
378 code->inst[ip].inst1 |= R500_TEX_UNSCALED;
379
380 switch (inst->Opcode) {
381 case RC_OPCODE_KIL:
382 code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
383 break;
384 case RC_OPCODE_TEX:
385 code->inst[ip].inst1 |= R500_TEX_INST_LD;
386 break;
387 case RC_OPCODE_TXB:
388 code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
389 break;
390 case RC_OPCODE_TXP:
391 code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
392 break;
393 default:
394 error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
395 }
396
397 use_temporary(code, inst->SrcReg[0].Index);
398 if (inst->Opcode != RC_OPCODE_KIL)
399 use_temporary(code, inst->DstReg.Index);
400
401 code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
402 | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
403 | R500_TEX_DST_ADDR(inst->DstReg.Index)
404 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
405 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
406
407 return 1;
408 }
409
410 static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
411 {
412 unsigned int newip;
413
414 if (s->Code->inst_end >= s->C->max_alu_insts-1) {
415 rc_error(s->C, "emit_tex: Too many instructions");
416 return;
417 }
418
419 newip = ++s->Code->inst_end;
420
421 /* Currently all loops use the same integer constant to intialize
422 * the loop variables. */
423 if(!s->Code->int_constants[0]) {
424 s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
425 s->Code->int_constant_count = 1;
426 }
427 s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
428
429 switch(inst->U.I.Opcode){
430 struct branch_info * branch;
431 struct r500_loop_info * loop;
432 case RC_OPCODE_BGNLOOP:
433 memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info,
434 s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
435
436 loop = &s->Loops[s->CurrentLoopDepth++];
437 memset(loop, 0, sizeof(struct r500_loop_info));
438 loop->BranchDepth = s->CurrentBranchDepth;
439 loop->BgnLoop = newip;
440
441 s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
442 | R500_FC_JUMP_FUNC(0x00)
443 | R500_FC_IGNORE_UNCOVERED
444 ;
445 break;
446 case RC_OPCODE_BRK:
447 loop = &s->Loops[s->CurrentLoopDepth - 1];
448 memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
449 loop->BrkCount, loop->BrkReserved, 1);
450
451 loop->Brks[loop->BrkCount++] = newip;
452 s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
453 | R500_FC_JUMP_FUNC(0xff)
454 | R500_FC_B_OP1_DECR
455 | R500_FC_B_POP_CNT(
456 s->CurrentBranchDepth - loop->BranchDepth)
457 | R500_FC_IGNORE_UNCOVERED
458 ;
459 break;
460
461 case RC_OPCODE_CONT:
462 loop = &s->Loops[s->CurrentLoopDepth - 1];
463 memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
464 loop->ContCount, loop->ContReserved, 1);
465 loop->Conts[loop->ContCount++] = newip;
466 s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
467 | R500_FC_JUMP_FUNC(0xff)
468 | R500_FC_B_OP1_DECR
469 | R500_FC_B_POP_CNT(
470 s->CurrentBranchDepth - loop->BranchDepth)
471 | R500_FC_IGNORE_UNCOVERED
472 ;
473 break;
474
475 case RC_OPCODE_ENDLOOP:
476 {
477 loop = &s->Loops[s->CurrentLoopDepth - 1];
478 /* Emit ENDLOOP */
479 s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
480 | R500_FC_JUMP_FUNC(0xff)
481 | R500_FC_JUMP_ANY
482 | R500_FC_IGNORE_UNCOVERED
483 ;
484 /* The constant integer at index 0 is used by all loops. */
485 s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
486 | R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
487 ;
488
489 /* Set jump address and int constant for BGNLOOP */
490 s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
491 | R500_FC_JUMP_ADDR(newip)
492 ;
493
494 /* Set jump address for the BRK instructions. */
495 while(loop->BrkCount--) {
496 s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
497 R500_FC_JUMP_ADDR(newip + 1);
498 }
499
500 /* Set jump address for CONT instructions. */
501 while(loop->ContCount--) {
502 s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
503 R500_FC_JUMP_ADDR(newip);
504 }
505 s->CurrentLoopDepth--;
506 break;
507 }
508 case RC_OPCODE_IF:
509 if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
510 rc_error(s->C, "Branch depth exceeds hardware limit");
511 return;
512 }
513 memory_pool_array_reserve(&s->C->Pool, struct branch_info,
514 s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
515
516 branch = &s->Branches[s->CurrentBranchDepth++];
517 branch->If = newip;
518 branch->Else = -1;
519 branch->Endif = -1;
520
521 if (s->CurrentBranchDepth > s->MaxBranchDepth)
522 s->MaxBranchDepth = s->CurrentBranchDepth;
523
524 /* actual instruction is filled in at ENDIF time */
525 break;
526
527 case RC_OPCODE_ELSE:
528 if (!s->CurrentBranchDepth) {
529 rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
530 return;
531 }
532
533 branch = &s->Branches[s->CurrentBranchDepth - 1];
534 branch->Else = newip;
535
536 /* actual instruction is filled in at ENDIF time */
537 break;
538
539 case RC_OPCODE_ENDIF:
540 if (!s->CurrentBranchDepth) {
541 rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
542 return;
543 }
544
545 branch = &s->Branches[s->CurrentBranchDepth - 1];
546 branch->Endif = newip;
547
548 s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
549 | R500_FC_A_OP_NONE /* no address stack */
550 | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
551 | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
552 | R500_FC_B_OP1_NONE /* no branch counter if stay */
553 | R500_FC_B_POP_CNT(1)
554 ;
555 s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
556 s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
557 | R500_FC_A_OP_NONE /* no address stack */
558 | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
559 | R500_FC_B_OP0_INCR /* increment branch counter if stay */
560 | R500_FC_IGNORE_UNCOVERED
561 ;
562
563 if (branch->Else >= 0) {
564 /* increment branch counter also if jump */
565 s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
566 s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
567
568 s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
569 | R500_FC_A_OP_NONE /* no address stack */
570 | R500_FC_B_ELSE /* all active pixels want to jump */
571 | R500_FC_B_OP0_NONE /* no counter op if stay */
572 | R500_FC_B_OP1_DECR /* decrement branch counter if jump */
573 | R500_FC_B_POP_CNT(1)
574 ;
575 s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
576 } else {
577 /* don't touch branch counter on jump */
578 s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
579 s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
580 }
581
582
583 s->CurrentBranchDepth--;
584 break;
585 default:
586 rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
587 }
588 }
589
590 void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
591 {
592 struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
593 struct emit_state s;
594 struct r500_fragment_program_code *code = &compiler->code->code.r500;
595
596 memset(&s, 0, sizeof(s));
597 s.C = &compiler->Base;
598 s.Code = code;
599
600 memset(code, 0, sizeof(*code));
601 code->max_temp_idx = 1;
602 code->inst_end = -1;
603
604 for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
605 inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
606 inst = inst->Next) {
607 if (inst->Type == RC_INSTRUCTION_NORMAL) {
608 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
609
610 if (opcode->IsFlowControl) {
611 emit_flowcontrol(&s, inst);
612 } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
613 continue;
614 } else {
615 emit_tex(compiler, &inst->U.I);
616 }
617 } else {
618 emit_paired(compiler, &inst->U.P);
619 }
620 }
621
622 if (code->max_temp_idx >= compiler->Base.max_temp_regs)
623 rc_error(&compiler->Base, "Too many hardware temporaries used");
624
625 if (compiler->Base.Error)
626 return;
627
628 if (code->inst_end == -1 ||
629 (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
630 int ip;
631
632 /* This may happen when dead-code elimination is disabled or
633 * when most of the fragment program logic is leading to a KIL */
634 if (code->inst_end >= compiler->Base.max_alu_insts-1) {
635 rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
636 return;
637 }
638
639 ip = ++code->inst_end;
640 code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
641 }
642
643 /* Enable full flow control mode if we are using loops or have if
644 * statements nested at least four deep. */
645 if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
646 if (code->max_temp_idx < 1)
647 code->max_temp_idx = 1;
648
649 code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
650 }
651 }