Merge branch 'glsl-to-tgsi'
[mesa.git] / src / gallium / drivers / r300 / compiler / r500_fragprog_emit.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
5 * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
6 *
7 * All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining
10 * a copy of this software and associated documentation files (the
11 * "Software"), to deal in the Software without restriction, including
12 * without limitation the rights to use, copy, modify, merge, publish,
13 * distribute, sublicense, and/or sell copies of the Software, and to
14 * permit persons to whom the Software is furnished to do so, subject to
15 * the following conditions:
16 *
17 * The above copyright notice and this permission notice (including the
18 * next paragraph) shall be included in all copies or substantial
19 * portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 *
29 */
30
31 /**
32 * \file
33 *
34 * \author Ben Skeggs <darktama@iinet.net.au>
35 *
36 * \author Jerome Glisse <j.glisse@gmail.com>
37 *
38 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
39 *
40 */
41
42 #include "r500_fragprog.h"
43
44 #include "../r300_reg.h"
45
46 #include "radeon_program_pair.h"
47
48 #define PROG_CODE \
49 struct r500_fragment_program_code *code = &c->code->code.r500
50
51 #define error(fmt, args...) do { \
52 rc_error(&c->Base, "%s::%s(): " fmt "\n", \
53 __FILE__, __FUNCTION__, ##args); \
54 } while(0)
55
56
57 struct branch_info {
58 int If;
59 int Else;
60 int Endif;
61 };
62
63 struct r500_loop_info {
64 int BgnLoop;
65
66 int BranchDepth;
67 int * Brks;
68 int BrkCount;
69 int BrkReserved;
70
71 int * Conts;
72 int ContCount;
73 int ContReserved;
74 };
75
76 struct emit_state {
77 struct radeon_compiler * C;
78 struct r500_fragment_program_code * Code;
79
80 struct branch_info * Branches;
81 unsigned int CurrentBranchDepth;
82 unsigned int BranchesReserved;
83
84 struct r500_loop_info * Loops;
85 unsigned int CurrentLoopDepth;
86 unsigned int LoopsReserved;
87
88 unsigned int MaxBranchDepth;
89
90 };
91
92 static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
93 {
94 switch(opcode) {
95 case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
96 case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND;
97 case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
98 case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
99 case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
100 case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
101 case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
102 default:
103 error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
104 /* fall through */
105 case RC_OPCODE_NOP:
106 /* fall through */
107 case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
108 case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
109 case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
110 case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
111 }
112 }
113
114 static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
115 {
116 switch(opcode) {
117 case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
118 case RC_OPCODE_CND: return R500_ALPHA_OP_CND;
119 case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
120 case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
121 case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
122 case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
123 case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
124 case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
125 case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
126 case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
127 default:
128 error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
129 /* fall through */
130 case RC_OPCODE_NOP:
131 /* fall through */
132 case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
133 case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
134 case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
135 case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
136 case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
137 case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
138 }
139 }
140
141 static unsigned int fix_hw_swizzle(unsigned int swz)
142 {
143 switch (swz) {
144 case RC_SWIZZLE_ZERO:
145 case RC_SWIZZLE_UNUSED:
146 swz = 4;
147 break;
148 case RC_SWIZZLE_HALF:
149 swz = 5;
150 break;
151 case RC_SWIZZLE_ONE:
152 swz = 6;
153 break;
154 }
155
156 return swz;
157 }
158
159 static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
160 {
161 unsigned int t = inst->RGB.Arg[arg].Source;
162 int comp;
163 t |= inst->RGB.Arg[arg].Negate << 11;
164 t |= inst->RGB.Arg[arg].Abs << 12;
165
166 for(comp = 0; comp < 3; ++comp)
167 t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
168
169 return t;
170 }
171
172 static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
173 {
174 unsigned int t = inst->Alpha.Arg[i].Source;
175 t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2;
176 t |= inst->Alpha.Arg[i].Negate << 5;
177 t |= inst->Alpha.Arg[i].Abs << 6;
178 return t;
179 }
180
181 static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
182 {
183 switch(func) {
184 case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
185 case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
186 case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
187 case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
188 default:
189 rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
190 return 0;
191 }
192 }
193
194 static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
195 {
196 if (index > code->max_temp_idx)
197 code->max_temp_idx = index;
198 }
199
200 static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
201 {
202 /* From docs:
203 * Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST.
204 * MSB = 1 << 7 */
205 if (!src.Used)
206 return 1 << 7;
207
208 if (src.File == RC_FILE_CONSTANT) {
209 return src.Index | R500_RGB_ADDR0_CONST;
210 } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
211 use_temporary(code, src.Index);
212 return src.Index;
213 }
214
215 return 0;
216 }
217
218 /**
219 * NOP the specified instruction if it is not a texture lookup.
220 */
221 static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
222 {
223 PROG_CODE;
224
225 if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
226 code->inst[ip].inst0 |= R500_INST_NOP;
227 }
228 }
229
230 /**
231 * Emit a paired ALU instruction.
232 */
233 static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
234 {
235 int ip;
236 PROG_CODE;
237
238 if (code->inst_end >= c->Base.max_alu_insts-1) {
239 error("emit_alu: Too many instructions");
240 return;
241 }
242
243 ip = ++code->inst_end;
244
245 /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
246 if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
247 inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
248 if (ip > 0) {
249 alu_nop(c, ip - 1);
250 }
251 }
252
253 code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
254 code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
255
256 if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
257 code->inst[ip].inst0 = R500_INST_TYPE_OUT;
258 if (inst->WriteALUResult) {
259 error("Cannot write output and ALU result at the same time");
260 return;
261 }
262 } else {
263 code->inst[ip].inst0 = R500_INST_TYPE_ALU;
264 }
265 code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
266
267 code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);
268 code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;
269 code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
270 if (inst->Nop) {
271 code->inst[ip].inst0 |= R500_INST_NOP;
272 }
273 if (inst->Alpha.DepthWriteMask) {
274 code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
275 c->code->writes_depth = 1;
276 }
277
278 code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
279 code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
280 use_temporary(code, inst->Alpha.DestIndex);
281 use_temporary(code, inst->RGB.DestIndex);
282
283 if (inst->RGB.Saturate)
284 code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
285 if (inst->Alpha.Saturate)
286 code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
287
288 /* Set the presubtract operation. */
289 switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
290 case RC_PRESUB_BIAS:
291 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
292 break;
293 case RC_PRESUB_SUB:
294 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
295 break;
296 case RC_PRESUB_ADD:
297 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
298 break;
299 case RC_PRESUB_INV:
300 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
301 break;
302 default:
303 break;
304 }
305 switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
306 case RC_PRESUB_BIAS:
307 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
308 break;
309 case RC_PRESUB_SUB:
310 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
311 break;
312 case RC_PRESUB_ADD:
313 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
314 break;
315 case RC_PRESUB_INV:
316 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
317 break;
318 default:
319 break;
320 }
321
322 code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
323 code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
324 code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
325
326 code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
327 code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
328 code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
329
330 code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
331 code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
332 code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
333
334 code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
335 code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
336 code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
337
338 code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
339 code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
340
341 if (inst->WriteALUResult) {
342 code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
343
344 if (inst->WriteALUResult == RC_ALURESULT_X)
345 code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
346 else
347 code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
348
349 code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
350 }
351 }
352
353 static unsigned int translate_strq_swizzle(unsigned int swizzle)
354 {
355 unsigned int swiz = 0;
356 int i;
357 for (i = 0; i < 4; i++)
358 swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
359 return swiz;
360 }
361
362 /**
363 * Emit a single TEX instruction
364 */
365 static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
366 {
367 int ip;
368 PROG_CODE;
369
370 if (code->inst_end >= c->Base.max_alu_insts-1) {
371 error("emit_tex: Too many instructions");
372 return 0;
373 }
374
375 ip = ++code->inst_end;
376
377 code->inst[ip].inst0 = R500_INST_TYPE_TEX
378 | (inst->DstReg.WriteMask << 11)
379 | R500_INST_TEX_SEM_WAIT;
380 code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
381 | R500_TEX_SEM_ACQUIRE;
382
383 if (inst->TexSrcTarget == RC_TEXTURE_RECT)
384 code->inst[ip].inst1 |= R500_TEX_UNSCALED;
385
386 switch (inst->Opcode) {
387 case RC_OPCODE_KIL:
388 code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
389 break;
390 case RC_OPCODE_TEX:
391 code->inst[ip].inst1 |= R500_TEX_INST_LD;
392 break;
393 case RC_OPCODE_TXB:
394 code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
395 break;
396 case RC_OPCODE_TXP:
397 code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
398 break;
399 case RC_OPCODE_TXD:
400 code->inst[ip].inst1 |= R500_TEX_INST_DXDY;
401 break;
402 case RC_OPCODE_TXL:
403 code->inst[ip].inst1 |= R500_TEX_INST_LOD;
404 break;
405 default:
406 error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
407 }
408
409 use_temporary(code, inst->SrcReg[0].Index);
410 if (inst->Opcode != RC_OPCODE_KIL)
411 use_temporary(code, inst->DstReg.Index);
412
413 code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
414 | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
415 | R500_TEX_DST_ADDR(inst->DstReg.Index)
416 | (GET_SWZ(inst->TexSwizzle, 0) << 24)
417 | (GET_SWZ(inst->TexSwizzle, 1) << 26)
418 | (GET_SWZ(inst->TexSwizzle, 2) << 28)
419 | (GET_SWZ(inst->TexSwizzle, 3) << 30)
420 ;
421
422 if (inst->Opcode == RC_OPCODE_TXD) {
423 use_temporary(code, inst->SrcReg[1].Index);
424 use_temporary(code, inst->SrcReg[2].Index);
425
426 /* DX and DY parameters are specified in a separate register. */
427 code->inst[ip].inst3 =
428 R500_DX_ADDR(inst->SrcReg[1].Index) |
429 (translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) |
430 R500_DY_ADDR(inst->SrcReg[2].Index) |
431 (translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24);
432 }
433
434 return 1;
435 }
436
437 static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
438 {
439 unsigned int newip;
440
441 if (s->Code->inst_end >= s->C->max_alu_insts-1) {
442 rc_error(s->C, "emit_tex: Too many instructions");
443 return;
444 }
445
446 newip = ++s->Code->inst_end;
447
448 /* Currently all loops use the same integer constant to intialize
449 * the loop variables. */
450 if(!s->Code->int_constants[0]) {
451 s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
452 s->Code->int_constant_count = 1;
453 }
454 s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
455
456 switch(inst->U.I.Opcode){
457 struct branch_info * branch;
458 struct r500_loop_info * loop;
459 case RC_OPCODE_BGNLOOP:
460 memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info,
461 s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
462
463 loop = &s->Loops[s->CurrentLoopDepth++];
464 memset(loop, 0, sizeof(struct r500_loop_info));
465 loop->BranchDepth = s->CurrentBranchDepth;
466 loop->BgnLoop = newip;
467
468 s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
469 | R500_FC_JUMP_FUNC(0x00)
470 | R500_FC_IGNORE_UNCOVERED
471 ;
472 break;
473 case RC_OPCODE_BRK:
474 loop = &s->Loops[s->CurrentLoopDepth - 1];
475 memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
476 loop->BrkCount, loop->BrkReserved, 1);
477
478 loop->Brks[loop->BrkCount++] = newip;
479 s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
480 | R500_FC_JUMP_FUNC(0xff)
481 | R500_FC_B_OP1_DECR
482 | R500_FC_B_POP_CNT(
483 s->CurrentBranchDepth - loop->BranchDepth)
484 | R500_FC_IGNORE_UNCOVERED
485 ;
486 break;
487
488 case RC_OPCODE_CONT:
489 loop = &s->Loops[s->CurrentLoopDepth - 1];
490 memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
491 loop->ContCount, loop->ContReserved, 1);
492 loop->Conts[loop->ContCount++] = newip;
493 s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
494 | R500_FC_JUMP_FUNC(0xff)
495 | R500_FC_B_OP1_DECR
496 | R500_FC_B_POP_CNT(
497 s->CurrentBranchDepth - loop->BranchDepth)
498 | R500_FC_IGNORE_UNCOVERED
499 ;
500 break;
501
502 case RC_OPCODE_ENDLOOP:
503 {
504 loop = &s->Loops[s->CurrentLoopDepth - 1];
505 /* Emit ENDLOOP */
506 s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
507 | R500_FC_JUMP_FUNC(0xff)
508 | R500_FC_JUMP_ANY
509 | R500_FC_IGNORE_UNCOVERED
510 ;
511 /* The constant integer at index 0 is used by all loops. */
512 s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
513 | R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
514 ;
515
516 /* Set jump address and int constant for BGNLOOP */
517 s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
518 | R500_FC_JUMP_ADDR(newip)
519 ;
520
521 /* Set jump address for the BRK instructions. */
522 while(loop->BrkCount--) {
523 s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
524 R500_FC_JUMP_ADDR(newip + 1);
525 }
526
527 /* Set jump address for CONT instructions. */
528 while(loop->ContCount--) {
529 s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
530 R500_FC_JUMP_ADDR(newip);
531 }
532 s->CurrentLoopDepth--;
533 break;
534 }
535 case RC_OPCODE_IF:
536 if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
537 rc_error(s->C, "Branch depth exceeds hardware limit");
538 return;
539 }
540 memory_pool_array_reserve(&s->C->Pool, struct branch_info,
541 s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
542
543 branch = &s->Branches[s->CurrentBranchDepth++];
544 branch->If = newip;
545 branch->Else = -1;
546 branch->Endif = -1;
547
548 if (s->CurrentBranchDepth > s->MaxBranchDepth)
549 s->MaxBranchDepth = s->CurrentBranchDepth;
550
551 /* actual instruction is filled in at ENDIF time */
552 break;
553
554 case RC_OPCODE_ELSE:
555 if (!s->CurrentBranchDepth) {
556 rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
557 return;
558 }
559
560 branch = &s->Branches[s->CurrentBranchDepth - 1];
561 branch->Else = newip;
562
563 /* actual instruction is filled in at ENDIF time */
564 break;
565
566 case RC_OPCODE_ENDIF:
567 if (!s->CurrentBranchDepth) {
568 rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
569 return;
570 }
571
572 branch = &s->Branches[s->CurrentBranchDepth - 1];
573 branch->Endif = newip;
574
575 s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
576 | R500_FC_A_OP_NONE /* no address stack */
577 | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
578 | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
579 | R500_FC_B_OP1_NONE /* no branch counter if stay */
580 | R500_FC_B_POP_CNT(1)
581 ;
582 s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
583 s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
584 | R500_FC_A_OP_NONE /* no address stack */
585 | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
586 | R500_FC_B_OP0_INCR /* increment branch counter if stay */
587 | R500_FC_IGNORE_UNCOVERED
588 ;
589
590 if (branch->Else >= 0) {
591 /* increment branch counter also if jump */
592 s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
593 s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
594
595 s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
596 | R500_FC_A_OP_NONE /* no address stack */
597 | R500_FC_B_ELSE /* all active pixels want to jump */
598 | R500_FC_B_OP0_NONE /* no counter op if stay */
599 | R500_FC_B_OP1_DECR /* decrement branch counter if jump */
600 | R500_FC_B_POP_CNT(1)
601 ;
602 s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
603 } else {
604 /* don't touch branch counter on jump */
605 s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
606 s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
607 }
608
609
610 s->CurrentBranchDepth--;
611 break;
612 default:
613 rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
614 }
615 }
616
617 void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
618 {
619 struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
620 struct emit_state s;
621 struct r500_fragment_program_code *code = &compiler->code->code.r500;
622
623 memset(&s, 0, sizeof(s));
624 s.C = &compiler->Base;
625 s.Code = code;
626
627 memset(code, 0, sizeof(*code));
628 code->max_temp_idx = 1;
629 code->inst_end = -1;
630
631 for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
632 inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
633 inst = inst->Next) {
634 if (inst->Type == RC_INSTRUCTION_NORMAL) {
635 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
636
637 if (opcode->IsFlowControl) {
638 emit_flowcontrol(&s, inst);
639 } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
640 continue;
641 } else {
642 emit_tex(compiler, &inst->U.I);
643 }
644 } else {
645 emit_paired(compiler, &inst->U.P);
646 }
647 }
648
649 if (code->max_temp_idx >= compiler->Base.max_temp_regs)
650 rc_error(&compiler->Base, "Too many hardware temporaries used");
651
652 if (compiler->Base.Error)
653 return;
654
655 if (code->inst_end == -1 ||
656 (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
657 int ip;
658
659 /* This may happen when dead-code elimination is disabled or
660 * when most of the fragment program logic is leading to a KIL */
661 if (code->inst_end >= compiler->Base.max_alu_insts-1) {
662 rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
663 return;
664 }
665
666 ip = ++code->inst_end;
667 code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
668 }
669
670 /* Enable full flow control mode if we are using loops or have if
671 * statements nested at least four deep. */
672 if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
673 if (code->max_temp_idx < 1)
674 code->max_temp_idx = 1;
675
676 code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
677 }
678 }