st/dri: Don't check for null when user ensures non-null
[mesa.git] / src / gallium / drivers / i965 / brw_wm_glsl.c
1 #include "util/u_math.h"
2
3
4 #include "brw_context.h"
5 #include "brw_eu.h"
6 #include "brw_wm.h"
7
8
9 static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
10 const struct brw_fp_instruction *inst,
11 GLuint component);
12
13
14 static void
15 reclaim_temps(struct brw_wm_compile *c);
16
17
18 /** Mark GRF register as used. */
19 static void
20 prealloc_grf(struct brw_wm_compile *c, int r)
21 {
22 c->used_grf[r] = GL_TRUE;
23 }
24
25
26 /** Mark given GRF register as not in use. */
27 static void
28 release_grf(struct brw_wm_compile *c, int r)
29 {
30 /*assert(c->used_grf[r]);*/
31 c->used_grf[r] = GL_FALSE;
32 c->first_free_grf = MIN2(c->first_free_grf, r);
33 }
34
35
36 /** Return index of a free GRF, mark it as used. */
37 static int
38 alloc_grf(struct brw_wm_compile *c)
39 {
40 GLuint r;
41 for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
42 if (!c->used_grf[r]) {
43 c->used_grf[r] = GL_TRUE;
44 c->first_free_grf = r + 1; /* a guess */
45 return r;
46 }
47 }
48
49 /* no free temps, try to reclaim some */
50 reclaim_temps(c);
51 c->first_free_grf = 0;
52
53 /* try alloc again */
54 for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
55 if (!c->used_grf[r]) {
56 c->used_grf[r] = GL_TRUE;
57 c->first_free_grf = r + 1; /* a guess */
58 return r;
59 }
60 }
61
62 for (r = 0; r < BRW_WM_MAX_GRF; r++) {
63 assert(c->used_grf[r]);
64 }
65
66 /* really, no free GRF regs found */
67 if (!c->out_of_regs) {
68 /* print warning once per compilation */
69 debug_printf("%s: ran out of registers for fragment program", __FUNCTION__);
70 c->out_of_regs = GL_TRUE;
71 }
72
73 return -1;
74 }
75
76
77 /** Return number of GRF registers used */
78 static int
79 num_grf_used(const struct brw_wm_compile *c)
80 {
81 int r;
82 for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--)
83 if (c->used_grf[r])
84 return r + 1;
85 return 0;
86 }
87
88
89
90 /**
91 * Record the mapping of a Mesa register to a hardware register.
92 */
93 static void set_reg(struct brw_wm_compile *c, int file, int index,
94 int component, struct brw_reg reg)
95 {
96 c->wm_regs[file][index][component].reg = reg;
97 c->wm_regs[file][index][component].inited = GL_TRUE;
98 }
99
100 static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
101 {
102 struct brw_reg reg;
103
104 /* if we need to allocate another temp, grow the tmp_regs[] array */
105 if (c->tmp_index == c->tmp_max) {
106 int r = alloc_grf(c);
107 if (r < 0) {
108 /*printf("Out of temps in %s\n", __FUNCTION__);*/
109 r = 50; /* XXX random register! */
110 }
111 c->tmp_regs[ c->tmp_max++ ] = r;
112 }
113
114 /* form the GRF register */
115 reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0);
116 /*printf("alloc_temp %d\n", reg.nr);*/
117 assert(reg.nr < BRW_WM_MAX_GRF);
118 return reg;
119
120 }
121
122 /**
123 * Save current temp register info.
124 * There must be a matching call to release_tmps().
125 */
126 static int mark_tmps(struct brw_wm_compile *c)
127 {
128 return c->tmp_index;
129 }
130
131 static struct brw_reg lookup_tmp( struct brw_wm_compile *c, int index )
132 {
133 return brw_vec8_grf( c->tmp_regs[ index ], 0 );
134 }
135
136 static void release_tmps(struct brw_wm_compile *c, int mark)
137 {
138 c->tmp_index = mark;
139 }
140
141 /**
142 * Convert Mesa src register to brw register.
143 *
144 * Since we're running in SOA mode each Mesa register corresponds to four
145 * hardware registers. We allocate the hardware registers as needed here.
146 *
147 * \param file register file, one of PROGRAM_x
148 * \param index register number
149 * \param component src component (X=0, Y=1, Z=2, W=3)
150 * \param nr not used?!?
151 * \param neg negate value?
152 * \param abs take absolute value?
153 */
154 static struct brw_reg
155 get_reg(struct brw_wm_compile *c, int file, int index, int component,
156 int nr, GLuint neg, GLuint abs)
157 {
158 struct brw_reg reg;
159 switch (file) {
160 case TGSI_FILE_NULL:
161 return brw_null_reg();
162
163 case TGSI_FILE_CONSTANT:
164 case TGSI_FILE_TEMPORARY:
165 case TGSI_FILE_INPUT:
166 case TGSI_FILE_OUTPUT:
167 case BRW_FILE_PAYLOAD:
168 break;
169
170 default:
171 debug_printf("%s: Unexpected file type\n", __FUNCTION__);
172 return brw_null_reg();
173 }
174
175 assert(index < 256);
176 assert(component < 4);
177
178 /* see if we've already allocated a HW register for this Mesa register */
179 if (c->wm_regs[file][index][component].inited) {
180 /* yes, re-use */
181 reg = c->wm_regs[file][index][component].reg;
182 }
183 else {
184 /* no, allocate new register */
185 int grf = alloc_grf(c);
186 /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
187 if (grf < 0) {
188 /* totally out of temps */
189 grf = 51; /* XXX random register! */
190 }
191
192 reg = brw_vec8_grf(grf, 0);
193 /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
194
195 set_reg(c, file, index, component, reg);
196 }
197
198 if (neg & (1 << component)) {
199 reg = negate(reg);
200 }
201 if (abs)
202 reg = brw_abs(reg);
203 return reg;
204 }
205
206
207
208
209 /**
210 * Find first/last instruction that references each temporary register.
211 */
212 GLboolean
213 _mesa_find_temp_intervals(const struct prog_instruction *instructions,
214 GLuint numInstructions,
215 GLint intBegin[MAX_PROGRAM_TEMPS],
216 GLint intEnd[MAX_PROGRAM_TEMPS])
217 {
218 struct loop_info
219 {
220 GLuint Start, End; /**< Start, end instructions of loop */
221 };
222 struct loop_info loopStack[MAX_LOOP_NESTING];
223 GLuint loopStackDepth = 0;
224 GLuint i;
225
226 for (i = 0; i < MAX_PROGRAM_TEMPS; i++){
227 intBegin[i] = intEnd[i] = -1;
228 }
229
230 /* Scan instructions looking for temporary registers */
231 for (i = 0; i < numInstructions; i++) {
232 const struct prog_instruction *inst = instructions + i;
233 if (inst->Opcode == OPCODE_BGNLOOP) {
234 loopStack[loopStackDepth].Start = i;
235 loopStack[loopStackDepth].End = inst->BranchTarget;
236 loopStackDepth++;
237 }
238 else if (inst->Opcode == OPCODE_ENDLOOP) {
239 loopStackDepth--;
240 }
241 else if (inst->Opcode == OPCODE_CAL) {
242 return GL_FALSE;
243 }
244 else {
245 const GLuint numSrc = 3;
246 GLuint j;
247 for (j = 0; j < numSrc; j++) {
248 if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
249 const GLuint index = inst->SrcReg[j].Index;
250 if (inst->SrcReg[j].RelAddr)
251 return GL_FALSE;
252 update_interval(intBegin, intEnd, index, i);
253 if (loopStackDepth > 0) {
254 /* extend temp register's interval to end of loop */
255 GLuint loopEnd = loopStack[loopStackDepth - 1].End;
256 update_interval(intBegin, intEnd, index, loopEnd);
257 }
258 }
259 }
260 if (inst->DstReg.File == PROGRAM_TEMPORARY) {
261 const GLuint index = inst->DstReg.Index;
262 if (inst->DstReg.RelAddr)
263 return GL_FALSE;
264 update_interval(intBegin, intEnd, index, i);
265 if (loopStackDepth > 0) {
266 /* extend temp register's interval to end of loop */
267 GLuint loopEnd = loopStack[loopStackDepth - 1].End;
268 update_interval(intBegin, intEnd, index, loopEnd);
269 }
270 }
271 }
272 }
273
274 return GL_TRUE;
275 }
276
277
278 /**
279 * This is called if we run out of GRF registers. Examine the live intervals
280 * of temp regs in the program and free those which won't be used again.
281 */
282 static void
283 reclaim_temps(struct brw_wm_compile *c)
284 {
285 GLint intBegin[BRW_WM_MAX_TEMPS];
286 GLint intEnd[BRW_WM_MAX_TEMPS];
287 int index;
288
289 /*printf("Reclaim temps:\n");*/
290
291 _mesa_find_temp_intervals(c->fp_instructions, c->nr_fp_insns,
292 intBegin, intEnd);
293
294 for (index = 0; index < BRW_WM_MAX_TEMPS; index++) {
295 if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) {
296 /* program temp[i] can be freed */
297 int component;
298 /*printf(" temp[%d] is dead\n", index);*/
299 for (component = 0; component < 4; component++) {
300 if (c->wm_regs[TGSI_FILE_TEMPORARY][index][component].inited) {
301 int r = c->wm_regs[TGSI_FILE_TEMPORARY][index][component].reg.nr;
302 release_grf(c, r);
303 /*
304 printf(" Reclaim temp %d, reg %d at inst %d\n",
305 index, r, c->cur_inst);
306 */
307 c->wm_regs[TGSI_FILE_TEMPORARY][index][component].inited = GL_FALSE;
308 }
309 }
310 }
311 }
312 }
313
314
315
316
317 /**
318 * Preallocate registers. This sets up the Mesa to hardware register
319 * mapping for certain registers, such as constants (uniforms/state vars)
320 * and shader inputs.
321 */
322 static void prealloc_reg(struct brw_wm_compile *c)
323 {
324 int i, j;
325 struct brw_reg reg;
326 int urb_read_length = 0;
327 GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted;
328 GLuint reg_index = 0;
329
330 memset(c->used_grf, GL_FALSE, sizeof(c->used_grf));
331 c->first_free_grf = 0;
332
333 for (i = 0; i < 4; i++) {
334 if (i < c->key.nr_depth_regs)
335 reg = brw_vec8_grf(i * 2, 0);
336 else
337 reg = brw_vec8_grf(0, 0);
338 set_reg(c, TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH, i, reg);
339 }
340 reg_index += 2 * c->key.nr_depth_regs;
341
342 /* constants */
343 {
344 const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters;
345 const GLuint nr_temps = c->fp->program.Base.NumTemporaries;
346
347 /* use a real constant buffer, or just use a section of the GRF? */
348 /* XXX this heuristic may need adjustment... */
349 if ((nr_params + nr_temps) * 4 + reg_index > 80)
350 c->fp->use_const_buffer = GL_TRUE;
351 else
352 c->fp->use_const_buffer = GL_FALSE;
353 /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
354
355 if (c->fp->use_const_buffer) {
356 /* We'll use a real constant buffer and fetch constants from
357 * it with a dataport read message.
358 */
359
360 /* number of float constants in CURBE */
361 c->prog_data.nr_params = 0;
362 }
363 else {
364 const struct gl_program_parameter_list *plist =
365 c->fp->program.Base.Parameters;
366 int index = 0;
367
368 /* number of float constants in CURBE */
369 c->prog_data.nr_params = 4 * nr_params;
370
371 /* loop over program constants (float[4]) */
372 for (i = 0; i < nr_params; i++) {
373 /* loop over XYZW channels */
374 for (j = 0; j < 4; j++, index++) {
375 reg = brw_vec1_grf(reg_index + index / 8, index % 8);
376 /* Save pointer to parameter/constant value.
377 * Constants will be copied in prepare_constant_buffer()
378 */
379 c->prog_data.param[index] = &plist->ParameterValues[i][j];
380 set_reg(c, TGSI_FILE_STATE_VAR, i, j, reg);
381 }
382 }
383 /* number of constant regs used (each reg is float[8]) */
384 c->nr_creg = 2 * ((4 * nr_params + 15) / 16);
385 reg_index += c->nr_creg;
386 }
387 }
388
389 /* fragment shader inputs */
390 for (i = 0; i < VERT_RESULT_MAX; i++) {
391 int fp_input;
392
393 if (i >= VERT_RESULT_VAR0)
394 fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0;
395 else if (i <= VERT_RESULT_TEX7)
396 fp_input = i;
397 else
398 fp_input = -1;
399
400 if (fp_input >= 0 && inputs & (1 << fp_input)) {
401 urb_read_length = reg_index;
402 reg = brw_vec8_grf(reg_index, 0);
403 for (j = 0; j < 4; j++)
404 set_reg(c, TGSI_FILE_PAYLOAD, fp_input, j, reg);
405 }
406 if (c->key.nr_vp_outputs > i) {
407 reg_index += 2;
408 }
409 }
410
411 c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
412 c->prog_data.urb_read_length = urb_read_length;
413 c->prog_data.curb_read_length = c->nr_creg;
414 c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
415 reg_index++;
416 c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
417 reg_index += 2;
418
419 /* mark GRF regs [0..reg_index-1] as in-use */
420 for (i = 0; i < reg_index; i++)
421 prealloc_grf(c, i);
422
423 /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */
424 prealloc_grf(c, 126);
425 prealloc_grf(c, 127);
426
427 for (i = 0; i < c->nr_fp_insns; i++) {
428 const struct brw_fp_instruction *inst = &c->fp_instructions[i];
429 struct brw_reg dst[4];
430
431 switch (inst->Opcode) {
432 case OPCODE_TEX:
433 case OPCODE_TXB:
434 /* Allocate the channels of texture results contiguously,
435 * since they are written out that way by the sampler unit.
436 */
437 for (j = 0; j < 4; j++) {
438 dst[j] = get_dst_reg(c, inst, j);
439 if (j != 0)
440 assert(dst[j].nr == dst[j - 1].nr + 1);
441 }
442 break;
443 default:
444 break;
445 }
446 }
447
448 /* An instruction may reference up to three constants.
449 * They'll be found in these registers.
450 * XXX alloc these on demand!
451 */
452 if (c->fp->use_const_buffer) {
453 for (i = 0; i < 3; i++) {
454 c->current_const[i].index = -1;
455 c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0);
456 }
457 }
458 #if 0
459 printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer);
460 printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index);
461 #endif
462 }
463
464
465 /**
466 * Check if any of the instruction's src registers are constants, uniforms,
467 * or statevars. If so, fetch any constants that we don't already have in
468 * the three GRF slots.
469 */
470 static void fetch_constants(struct brw_wm_compile *c,
471 const struct brw_fp_instruction *inst)
472 {
473 struct brw_compile *p = &c->func;
474 GLuint i;
475
476 /* loop over instruction src regs */
477 for (i = 0; i < 3; i++) {
478 const struct prog_src_register *src = &inst->SrcReg[i];
479 if (src->File == TGSI_FILE_IMMEDIATE ||
480 src->File == TGSI_FILE_CONSTANT) {
481 c->current_const[i].index = src->Index;
482
483 #if 0
484 printf(" fetch const[%d] for arg %d into reg %d\n",
485 src->Index, i, c->current_const[i].reg.nr);
486 #endif
487
488 /* need to fetch the constant now */
489 brw_dp_READ_4(p,
490 c->current_const[i].reg, /* writeback dest */
491 src->RelAddr, /* relative indexing? */
492 16 * src->Index, /* byte offset */
493 SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */
494 );
495 }
496 }
497 }
498
499
500 /**
501 * Convert Mesa dst register to brw register.
502 */
503 static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
504 const struct brw_fp_instruction *inst,
505 GLuint component)
506 {
507 const int nr = 1;
508 return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr,
509 0, 0);
510 }
511
512
513 static struct brw_reg
514 get_src_reg_const(struct brw_wm_compile *c,
515 const struct brw_fp_instruction *inst,
516 GLuint srcRegIndex, GLuint component)
517 {
518 /* We should have already fetched the constant from the constant
519 * buffer in fetch_constants(). Now we just have to return a
520 * register description that extracts the needed component and
521 * smears it across all eight vector components.
522 */
523 const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
524 struct brw_reg const_reg;
525
526 assert(component < 4);
527 assert(srcRegIndex < 3);
528 assert(c->current_const[srcRegIndex].index != -1);
529 const_reg = c->current_const[srcRegIndex].reg;
530
531 /* extract desired float from the const_reg, and smear */
532 const_reg = stride(const_reg, 0, 1, 0);
533 const_reg.subnr = component * 4;
534
535 if (src->Negate)
536 const_reg = negate(const_reg);
537 if (src->Abs)
538 const_reg = brw_abs(const_reg);
539
540 #if 0
541 printf(" form const[%d].%d for arg %d, reg %d\n",
542 c->current_const[srcRegIndex].index,
543 component,
544 srcRegIndex,
545 const_reg.nr);
546 #endif
547
548 return const_reg;
549 }
550
551
552 /**
553 * Convert Mesa src register to brw register.
554 */
555 static struct brw_reg get_src_reg(struct brw_wm_compile *c,
556 const struct brw_fp_instruction *inst,
557 GLuint srcRegIndex, GLuint channel)
558 {
559 const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
560 const GLuint nr = 1;
561 const GLuint component = BRW_GET_SWZ(src->Swizzle, channel);
562
563 /* Extended swizzle terms */
564 if (component == SWIZZLE_ZERO) {
565 return brw_imm_f(0.0F);
566 }
567 else if (component == SWIZZLE_ONE) {
568 return brw_imm_f(1.0F);
569 }
570
571 if (c->fp->use_const_buffer &&
572 (src->File == TGSI_FILE_STATE_VAR ||
573 src->File == TGSI_FILE_CONSTANT ||
574 src->File == TGSI_FILE_UNIFORM)) {
575 return get_src_reg_const(c, inst, srcRegIndex, component);
576 }
577 else {
578 /* other type of source register */
579 return get_reg(c, src->File, src->Index, component, nr,
580 src->Negate, src->Abs);
581 }
582 }
583
584
585 /**
586 * Same as \sa get_src_reg() but if the register is a immediate, emit
587 * a brw_reg encoding the immediate.
588 * Note that a brw instruction only allows one src operand to be a immediate.
589 * For instructions with more than one operand, only the second can be a
590 * immediate. This means that we treat some immediates as constants
591 * (which why TGSI_FILE_IMMEDIATE is checked in fetch_constants()).
592 *
593 */
594 static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c,
595 const struct brw_fp_instruction *inst,
596 GLuint srcRegIndex, GLuint channel)
597 {
598 const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
599 if (src->File == TGSI_FILE_IMMEDIATE) {
600 /* an immediate */
601 const int component = BRW_GET_SWZ(src->Swizzle, channel);
602 const GLfloat *param =
603 c->fp->program.Base.Parameters->ParameterValues[src->Index];
604 GLfloat value = param[component];
605 if (src->Negate)
606 value = -value;
607 if (src->Abs)
608 value = FABSF(value);
609 #if 0
610 printf(" form immed value %f for chan %d\n", value, channel);
611 #endif
612 return brw_imm_f(value);
613 }
614 else {
615 return get_src_reg(c, inst, srcRegIndex, channel);
616 }
617 }
618
619
620 /**
621 * Subroutines are minimal support for resusable instruction sequences.
622 * They are implemented as simply as possible to minimise overhead: there
623 * is no explicit support for communication between the caller and callee
624 * other than saving the return address in a temporary register, nor is
625 * there any automatic local storage. This implies that great care is
626 * required before attempting reentrancy or any kind of nested
627 * subroutine invocations.
628 */
629 static void invoke_subroutine( struct brw_wm_compile *c,
630 enum _subroutine subroutine,
631 void (*emit)( struct brw_wm_compile * ) )
632 {
633 struct brw_compile *p = &c->func;
634
635 assert( subroutine < BRW_WM_MAX_SUBROUTINE );
636
637 if( c->subroutines[ subroutine ] ) {
638 /* subroutine previously emitted: reuse existing instructions */
639
640 int mark = mark_tmps( c );
641 struct brw_reg return_address = retype( alloc_tmp( c ),
642 BRW_REGISTER_TYPE_UD );
643 int here = p->nr_insn;
644
645 brw_push_insn_state(p);
646 brw_set_mask_control(p, BRW_MASK_DISABLE);
647 brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
648
649 brw_ADD( p, brw_ip_reg(), brw_ip_reg(),
650 brw_imm_d( ( c->subroutines[ subroutine ] -
651 here - 1 ) << 4 ) );
652 brw_pop_insn_state(p);
653
654 release_tmps( c, mark );
655 } else {
656 /* previously unused subroutine: emit, and mark for later reuse */
657
658 int mark = mark_tmps( c );
659 struct brw_reg return_address = retype( alloc_tmp( c ),
660 BRW_REGISTER_TYPE_UD );
661 struct brw_instruction *calc;
662 int base = p->nr_insn;
663
664 brw_push_insn_state(p);
665 brw_set_mask_control(p, BRW_MASK_DISABLE);
666 calc = brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 0 ) );
667 brw_pop_insn_state(p);
668
669 c->subroutines[ subroutine ] = p->nr_insn;
670
671 emit( c );
672
673 brw_push_insn_state(p);
674 brw_set_mask_control(p, BRW_MASK_DISABLE);
675 brw_MOV( p, brw_ip_reg(), return_address );
676 brw_pop_insn_state(p);
677
678 brw_set_src1( calc, brw_imm_ud( ( p->nr_insn - base ) << 4 ) );
679
680 release_tmps( c, mark );
681 }
682 }
683
684 static void emit_trunc( struct brw_wm_compile *c,
685 const struct brw_fp_instruction *inst)
686 {
687 int i;
688 struct brw_compile *p = &c->func;
689 GLuint mask = inst->DstReg.WriteMask;
690 brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
691 for (i = 0; i < 4; i++) {
692 if (mask & (1<<i)) {
693 struct brw_reg src, dst;
694 dst = get_dst_reg(c, inst, i);
695 src = get_src_reg(c, inst, 0, i);
696 brw_RNDZ(p, dst, src);
697 }
698 }
699 brw_set_saturate(p, 0);
700 }
701
702 static void emit_mov( struct brw_wm_compile *c,
703 const struct brw_fp_instruction *inst)
704 {
705 int i;
706 struct brw_compile *p = &c->func;
707 GLuint mask = inst->DstReg.WriteMask;
708 brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
709 for (i = 0; i < 4; i++) {
710 if (mask & (1<<i)) {
711 struct brw_reg src, dst;
712 dst = get_dst_reg(c, inst, i);
713 /* XXX some moves from immediate value don't work reliably!!! */
714 /*src = get_src_reg_imm(c, inst, 0, i);*/
715 src = get_src_reg(c, inst, 0, i);
716 brw_MOV(p, dst, src);
717 }
718 }
719 brw_set_saturate(p, 0);
720 }
721
722 static void emit_pixel_xy(struct brw_wm_compile *c,
723 const struct brw_fp_instruction *inst)
724 {
725 struct brw_reg r1 = brw_vec1_grf(1, 0);
726 struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
727
728 struct brw_reg dst0, dst1;
729 struct brw_compile *p = &c->func;
730 GLuint mask = inst->DstReg.WriteMask;
731
732 dst0 = get_dst_reg(c, inst, 0);
733 dst1 = get_dst_reg(c, inst, 1);
734 /* Calculate pixel centers by adding 1 or 0 to each of the
735 * micro-tile coordinates passed in r1.
736 */
737 if (mask & WRITEMASK_X) {
738 brw_ADD(p,
739 vec8(retype(dst0, BRW_REGISTER_TYPE_UW)),
740 stride(suboffset(r1_uw, 4), 2, 4, 0),
741 brw_imm_v(0x10101010));
742 }
743
744 if (mask & WRITEMASK_Y) {
745 brw_ADD(p,
746 vec8(retype(dst1, BRW_REGISTER_TYPE_UW)),
747 stride(suboffset(r1_uw, 5), 2, 4, 0),
748 brw_imm_v(0x11001100));
749 }
750 }
751
752 static void emit_delta_xy(struct brw_wm_compile *c,
753 const struct brw_fp_instruction *inst)
754 {
755 struct brw_reg r1 = brw_vec1_grf(1, 0);
756 struct brw_reg dst0, dst1, src0, src1;
757 struct brw_compile *p = &c->func;
758 GLuint mask = inst->DstReg.WriteMask;
759
760 dst0 = get_dst_reg(c, inst, 0);
761 dst1 = get_dst_reg(c, inst, 1);
762 src0 = get_src_reg(c, inst, 0, 0);
763 src1 = get_src_reg(c, inst, 0, 1);
764 /* Calc delta X,Y by subtracting origin in r1 from the pixel
765 * centers.
766 */
767 if (mask & WRITEMASK_X) {
768 brw_ADD(p,
769 dst0,
770 retype(src0, BRW_REGISTER_TYPE_UW),
771 negate(r1));
772 }
773
774 if (mask & WRITEMASK_Y) {
775 brw_ADD(p,
776 dst1,
777 retype(src1, BRW_REGISTER_TYPE_UW),
778 negate(suboffset(r1,1)));
779
780 }
781 }
782
783 static void fire_fb_write( struct brw_wm_compile *c,
784 GLuint base_reg,
785 GLuint nr,
786 GLuint target,
787 GLuint eot)
788 {
789 struct brw_compile *p = &c->func;
790 /* Pass through control information:
791 */
792 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
793 {
794 brw_push_insn_state(p);
795 brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
796 brw_MOV(p,
797 brw_message_reg(base_reg + 1),
798 brw_vec8_grf(1, 0));
799 brw_pop_insn_state(p);
800 }
801 /* Send framebuffer write message: */
802 brw_fb_WRITE(p,
803 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
804 base_reg,
805 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
806 target,
807 nr,
808 0,
809 eot);
810 }
811
812 static void emit_fb_write(struct brw_wm_compile *c,
813 const struct brw_fp_instruction *inst)
814 {
815 struct brw_compile *p = &c->func;
816 int nr = 2;
817 int channel;
818 GLuint target, eot;
819 struct brw_reg src0;
820
821 /* Reserve a space for AA - may not be needed:
822 */
823 if (c->key.aa_dest_stencil_reg)
824 nr += 1;
825
826 brw_push_insn_state(p);
827 for (channel = 0; channel < 4; channel++) {
828 src0 = get_src_reg(c, inst, 0, channel);
829 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
830 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
831 brw_MOV(p, brw_message_reg(nr + channel), src0);
832 }
833 /* skip over the regs populated above: */
834 nr += 8;
835 brw_pop_insn_state(p);
836
837 if (c->key.source_depth_to_render_target) {
838 if (c->key.computes_depth) {
839 src0 = get_src_reg(c, inst, 2, 2);
840 brw_MOV(p, brw_message_reg(nr), src0);
841 }
842 else {
843 src0 = get_src_reg(c, inst, 1, 1);
844 brw_MOV(p, brw_message_reg(nr), src0);
845 }
846
847 nr += 2;
848 }
849
850 if (c->key.dest_depth_reg) {
851 const GLuint comp = c->key.dest_depth_reg / 2;
852 const GLuint off = c->key.dest_depth_reg % 2;
853
854 if (off != 0) {
855 /* XXX this code needs review/testing */
856 struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp);
857 struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1);
858
859 brw_push_insn_state(p);
860 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
861
862 brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1));
863 /* 2nd half? */
864 brw_MOV(p, brw_message_reg(nr+1), arg1_1);
865 brw_pop_insn_state(p);
866 }
867 else
868 {
869 struct brw_reg src = get_src_reg(c, inst, 1, 1);
870 brw_MOV(p, brw_message_reg(nr), src);
871 }
872 nr += 2;
873 }
874
875 target = inst->Aux >> 1;
876 eot = inst->Aux & 1;
877 fire_fb_write(c, 0, nr, target, eot);
878 }
879
880 static void emit_pixel_w( struct brw_wm_compile *c,
881 const struct brw_fp_instruction *inst)
882 {
883 struct brw_compile *p = &c->func;
884 GLuint mask = inst->DstReg.WriteMask;
885 if (mask & WRITEMASK_W) {
886 struct brw_reg dst, src0, delta0, delta1;
887 struct brw_reg interp3;
888
889 dst = get_dst_reg(c, inst, 3);
890 src0 = get_src_reg(c, inst, 0, 0);
891 delta0 = get_src_reg(c, inst, 1, 0);
892 delta1 = get_src_reg(c, inst, 1, 1);
893
894 interp3 = brw_vec1_grf(src0.nr+1, 4);
895 /* Calc 1/w - just linterp wpos[3] optimized by putting the
896 * result straight into a message reg.
897 */
898 brw_LINE(p, brw_null_reg(), interp3, delta0);
899 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1);
900
901 /* Calc w */
902 brw_math_16( p, dst,
903 BRW_MATH_FUNCTION_INV,
904 BRW_MATH_SATURATE_NONE,
905 2, brw_null_reg(),
906 BRW_MATH_PRECISION_FULL);
907 }
908 }
909
910 static void emit_linterp(struct brw_wm_compile *c,
911 const struct brw_fp_instruction *inst)
912 {
913 struct brw_compile *p = &c->func;
914 GLuint mask = inst->DstReg.WriteMask;
915 struct brw_reg interp[4];
916 struct brw_reg dst, delta0, delta1;
917 struct brw_reg src0;
918 GLuint nr, i;
919
920 src0 = get_src_reg(c, inst, 0, 0);
921 delta0 = get_src_reg(c, inst, 1, 0);
922 delta1 = get_src_reg(c, inst, 1, 1);
923 nr = src0.nr;
924
925 interp[0] = brw_vec1_grf(nr, 0);
926 interp[1] = brw_vec1_grf(nr, 4);
927 interp[2] = brw_vec1_grf(nr+1, 0);
928 interp[3] = brw_vec1_grf(nr+1, 4);
929
930 for(i = 0; i < 4; i++ ) {
931 if (mask & (1<<i)) {
932 dst = get_dst_reg(c, inst, i);
933 brw_LINE(p, brw_null_reg(), interp[i], delta0);
934 brw_MAC(p, dst, suboffset(interp[i],1), delta1);
935 }
936 }
937 }
938
939 static void emit_cinterp(struct brw_wm_compile *c,
940 const struct brw_fp_instruction *inst)
941 {
942 struct brw_compile *p = &c->func;
943 GLuint mask = inst->DstReg.WriteMask;
944
945 struct brw_reg interp[4];
946 struct brw_reg dst, src0;
947 GLuint nr, i;
948
949 src0 = get_src_reg(c, inst, 0, 0);
950 nr = src0.nr;
951
952 interp[0] = brw_vec1_grf(nr, 0);
953 interp[1] = brw_vec1_grf(nr, 4);
954 interp[2] = brw_vec1_grf(nr+1, 0);
955 interp[3] = brw_vec1_grf(nr+1, 4);
956
957 for(i = 0; i < 4; i++ ) {
958 if (mask & (1<<i)) {
959 dst = get_dst_reg(c, inst, i);
960 brw_MOV(p, dst, suboffset(interp[i],3));
961 }
962 }
963 }
964
965 static void emit_pinterp(struct brw_wm_compile *c,
966 const struct brw_fp_instruction *inst)
967 {
968 struct brw_compile *p = &c->func;
969 GLuint mask = inst->DstReg.WriteMask;
970
971 struct brw_reg interp[4];
972 struct brw_reg dst, delta0, delta1;
973 struct brw_reg src0, w;
974 GLuint nr, i;
975
976 src0 = get_src_reg(c, inst, 0, 0);
977 delta0 = get_src_reg(c, inst, 1, 0);
978 delta1 = get_src_reg(c, inst, 1, 1);
979 w = get_src_reg(c, inst, 2, 3);
980 nr = src0.nr;
981
982 interp[0] = brw_vec1_grf(nr, 0);
983 interp[1] = brw_vec1_grf(nr, 4);
984 interp[2] = brw_vec1_grf(nr+1, 0);
985 interp[3] = brw_vec1_grf(nr+1, 4);
986
987 for(i = 0; i < 4; i++ ) {
988 if (mask & (1<<i)) {
989 dst = get_dst_reg(c, inst, i);
990 brw_LINE(p, brw_null_reg(), interp[i], delta0);
991 brw_MAC(p, dst, suboffset(interp[i],1),
992 delta1);
993 brw_MUL(p, dst, dst, w);
994 }
995 }
996 }
997
998 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
999 static void emit_frontfacing(struct brw_wm_compile *c,
1000 const struct brw_fp_instruction *inst)
1001 {
1002 struct brw_compile *p = &c->func;
1003 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
1004 struct brw_reg dst;
1005 GLuint mask = inst->DstReg.WriteMask;
1006 int i;
1007
1008 for (i = 0; i < 4; i++) {
1009 if (mask & (1<<i)) {
1010 dst = get_dst_reg(c, inst, i);
1011 brw_MOV(p, dst, brw_imm_f(0.0));
1012 }
1013 }
1014
1015 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
1016 * us front face
1017 */
1018 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
1019 for (i = 0; i < 4; i++) {
1020 if (mask & (1<<i)) {
1021 dst = get_dst_reg(c, inst, i);
1022 brw_MOV(p, dst, brw_imm_f(1.0));
1023 }
1024 }
1025 brw_set_predicate_control_flag_value(p, 0xff);
1026 }
1027
1028 static void emit_xpd(struct brw_wm_compile *c,
1029 const struct brw_fp_instruction *inst)
1030 {
1031 int i;
1032 struct brw_compile *p = &c->func;
1033 GLuint mask = inst->DstReg.WriteMask;
1034 for (i = 0; i < 4; i++) {
1035 GLuint i2 = (i+2)%3;
1036 GLuint i1 = (i+1)%3;
1037 if (mask & (1<<i)) {
1038 struct brw_reg src0, src1, dst;
1039 dst = get_dst_reg(c, inst, i);
1040 src0 = negate(get_src_reg(c, inst, 0, i2));
1041 src1 = get_src_reg_imm(c, inst, 1, i1);
1042 brw_MUL(p, brw_null_reg(), src0, src1);
1043 src0 = get_src_reg(c, inst, 0, i1);
1044 src1 = get_src_reg_imm(c, inst, 1, i2);
1045 brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
1046 brw_MAC(p, dst, src0, src1);
1047 brw_set_saturate(p, 0);
1048 }
1049 }
1050 brw_set_saturate(p, 0);
1051 }
1052
1053 static void emit_dp3(struct brw_wm_compile *c,
1054 const struct brw_fp_instruction *inst)
1055 {
1056 struct brw_reg src0[3], src1[3], dst;
1057 int i;
1058 struct brw_compile *p = &c->func;
1059 GLuint mask = inst->DstReg.WriteMask;
1060 int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
1061
1062 if (!(mask & WRITEMASK_XYZW))
1063 return;
1064
1065 assert(is_power_of_two(mask & WRITEMASK_XYZW));
1066
1067 for (i = 0; i < 3; i++) {
1068 src0[i] = get_src_reg(c, inst, 0, i);
1069 src1[i] = get_src_reg_imm(c, inst, 1, i);
1070 }
1071
1072 dst = get_dst_reg(c, inst, dst_chan);
1073 brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
1074 brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
1075 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
1076 brw_MAC(p, dst, src0[2], src1[2]);
1077 brw_set_saturate(p, 0);
1078 }
1079
1080 static void emit_dp4(struct brw_wm_compile *c,
1081 const struct brw_fp_instruction *inst)
1082 {
1083 struct brw_reg src0[4], src1[4], dst;
1084 int i;
1085 struct brw_compile *p = &c->func;
1086 GLuint mask = inst->DstReg.WriteMask;
1087 int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
1088
1089 if (!(mask & WRITEMASK_XYZW))
1090 return;
1091
1092 assert(is_power_of_two(mask & WRITEMASK_XYZW));
1093
1094 for (i = 0; i < 4; i++) {
1095 src0[i] = get_src_reg(c, inst, 0, i);
1096 src1[i] = get_src_reg_imm(c, inst, 1, i);
1097 }
1098 dst = get_dst_reg(c, inst, dst_chan);
1099 brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
1100 brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
1101 brw_MAC(p, brw_null_reg(), src0[2], src1[2]);
1102 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
1103 brw_MAC(p, dst, src0[3], src1[3]);
1104 brw_set_saturate(p, 0);
1105 }
1106
1107 static void emit_dph(struct brw_wm_compile *c,
1108 const struct brw_fp_instruction *inst)
1109 {
1110 struct brw_reg src0[4], src1[4], dst;
1111 int i;
1112 struct brw_compile *p = &c->func;
1113 GLuint mask = inst->DstReg.WriteMask;
1114 int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
1115
1116 if (!(mask & WRITEMASK_XYZW))
1117 return;
1118
1119 assert(is_power_of_two(mask & WRITEMASK_XYZW));
1120
1121 for (i = 0; i < 4; i++) {
1122 src0[i] = get_src_reg(c, inst, 0, i);
1123 src1[i] = get_src_reg_imm(c, inst, 1, i);
1124 }
1125 dst = get_dst_reg(c, inst, dst_chan);
1126 brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
1127 brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
1128 brw_MAC(p, dst, src0[2], src1[2]);
1129 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
1130 brw_ADD(p, dst, dst, src1[3]);
1131 brw_set_saturate(p, 0);
1132 }
1133
1134 /**
1135 * Emit a scalar instruction, like RCP, RSQ, LOG, EXP.
1136 * Note that the result of the function is smeared across the dest
1137 * register's X, Y, Z and W channels (subject to writemasking of course).
1138 */
1139 static void emit_math1(struct brw_wm_compile *c,
1140 const struct brw_fp_instruction *inst, GLuint func)
1141 {
1142 struct brw_compile *p = &c->func;
1143 struct brw_reg src0, dst;
1144 GLuint mask = inst->DstReg.WriteMask;
1145 int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
1146
1147 if (!(mask & WRITEMASK_XYZW))
1148 return;
1149
1150 assert(is_power_of_two(mask & WRITEMASK_XYZW));
1151
1152 /* Get first component of source register */
1153 dst = get_dst_reg(c, inst, dst_chan);
1154 src0 = get_src_reg(c, inst, 0, 0);
1155
1156 brw_MOV(p, brw_message_reg(2), src0);
1157 brw_math(p,
1158 dst,
1159 func,
1160 (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
1161 2,
1162 brw_null_reg(),
1163 BRW_MATH_DATA_VECTOR,
1164 BRW_MATH_PRECISION_FULL);
1165 }
1166
1167 static void emit_rcp(struct brw_wm_compile *c,
1168 const struct brw_fp_instruction *inst)
1169 {
1170 emit_math1(c, inst, BRW_MATH_FUNCTION_INV);
1171 }
1172
1173 static void emit_rsq(struct brw_wm_compile *c,
1174 const struct brw_fp_instruction *inst)
1175 {
1176 emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ);
1177 }
1178
1179 static void emit_sin(struct brw_wm_compile *c,
1180 const struct brw_fp_instruction *inst)
1181 {
1182 emit_math1(c, inst, BRW_MATH_FUNCTION_SIN);
1183 }
1184
1185 static void emit_cos(struct brw_wm_compile *c,
1186 const struct brw_fp_instruction *inst)
1187 {
1188 emit_math1(c, inst, BRW_MATH_FUNCTION_COS);
1189 }
1190
1191 static void emit_ex2(struct brw_wm_compile *c,
1192 const struct brw_fp_instruction *inst)
1193 {
1194 emit_math1(c, inst, BRW_MATH_FUNCTION_EXP);
1195 }
1196
1197 static void emit_lg2(struct brw_wm_compile *c,
1198 const struct brw_fp_instruction *inst)
1199 {
1200 emit_math1(c, inst, BRW_MATH_FUNCTION_LOG);
1201 }
1202
1203 static void emit_add(struct brw_wm_compile *c,
1204 const struct brw_fp_instruction *inst)
1205 {
1206 struct brw_compile *p = &c->func;
1207 struct brw_reg src0, src1, dst;
1208 GLuint mask = inst->DstReg.WriteMask;
1209 int i;
1210 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
1211 for (i = 0 ; i < 4; i++) {
1212 if (mask & (1<<i)) {
1213 dst = get_dst_reg(c, inst, i);
1214 src0 = get_src_reg(c, inst, 0, i);
1215 src1 = get_src_reg_imm(c, inst, 1, i);
1216 brw_ADD(p, dst, src0, src1);
1217 }
1218 }
1219 brw_set_saturate(p, 0);
1220 }
1221
1222 static void emit_arl(struct brw_wm_compile *c,
1223 const struct brw_fp_instruction *inst)
1224 {
1225 struct brw_compile *p = &c->func;
1226 struct brw_reg src0, addr_reg;
1227 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
1228 addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1229 BRW_ARF_ADDRESS, 0);
1230 src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */
1231 brw_MOV(p, addr_reg, src0);
1232 brw_set_saturate(p, 0);
1233 }
1234
1235
1236 static void emit_mul(struct brw_wm_compile *c,
1237 const struct brw_fp_instruction *inst)
1238 {
1239 struct brw_compile *p = &c->func;
1240 struct brw_reg src0, src1, dst;
1241 GLuint mask = inst->DstReg.WriteMask;
1242 int i;
1243 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
1244 for (i = 0 ; i < 4; i++) {
1245 if (mask & (1<<i)) {
1246 dst = get_dst_reg(c, inst, i);
1247 src0 = get_src_reg(c, inst, 0, i);
1248 src1 = get_src_reg_imm(c, inst, 1, i);
1249 brw_MUL(p, dst, src0, src1);
1250 }
1251 }
1252 brw_set_saturate(p, 0);
1253 }
1254
1255 static void emit_frc(struct brw_wm_compile *c,
1256 const struct brw_fp_instruction *inst)
1257 {
1258 struct brw_compile *p = &c->func;
1259 struct brw_reg src0, dst;
1260 GLuint mask = inst->DstReg.WriteMask;
1261 int i;
1262 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
1263 for (i = 0 ; i < 4; i++) {
1264 if (mask & (1<<i)) {
1265 dst = get_dst_reg(c, inst, i);
1266 src0 = get_src_reg_imm(c, inst, 0, i);
1267 brw_FRC(p, dst, src0);
1268 }
1269 }
1270 if (inst->SaturateMode != SATURATE_OFF)
1271 brw_set_saturate(p, 0);
1272 }
1273
1274 static void emit_flr(struct brw_wm_compile *c,
1275 const struct brw_fp_instruction *inst)
1276 {
1277 struct brw_compile *p = &c->func;
1278 struct brw_reg src0, dst;
1279 GLuint mask = inst->DstReg.WriteMask;
1280 int i;
1281 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
1282 for (i = 0 ; i < 4; i++) {
1283 if (mask & (1<<i)) {
1284 dst = get_dst_reg(c, inst, i);
1285 src0 = get_src_reg_imm(c, inst, 0, i);
1286 brw_RNDD(p, dst, src0);
1287 }
1288 }
1289 brw_set_saturate(p, 0);
1290 }
1291
1292
1293 static void emit_min_max(struct brw_wm_compile *c,
1294 const struct brw_fp_instruction *inst)
1295 {
1296 struct brw_compile *p = &c->func;
1297 const GLuint mask = inst->DstReg.WriteMask;
1298 const int mark = mark_tmps(c);
1299 int i;
1300 brw_push_insn_state(p);
1301 for (i = 0; i < 4; i++) {
1302 if (mask & (1<<i)) {
1303 struct brw_reg real_dst = get_dst_reg(c, inst, i);
1304 struct brw_reg src0 = get_src_reg(c, inst, 0, i);
1305 struct brw_reg src1 = get_src_reg(c, inst, 1, i);
1306 struct brw_reg dst;
1307 /* if dst==src0 or dst==src1 we need to use a temp reg */
1308 GLboolean use_temp = brw_same_reg(dst, src0) ||
1309 brw_same_reg(dst, src1);
1310 if (use_temp)
1311 dst = alloc_tmp(c);
1312 else
1313 dst = real_dst;
1314
1315 /*
1316 printf(" Min/max: dst %d src0 %d src1 %d\n",
1317 dst.nr, src0.nr, src1.nr);
1318 */
1319 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
1320 brw_MOV(p, dst, src0);
1321 brw_set_saturate(p, 0);
1322
1323 if (inst->Opcode == OPCODE_MIN)
1324 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0);
1325 else
1326 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, src1, src0);
1327
1328 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
1329 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
1330 brw_MOV(p, dst, src1);
1331 brw_set_saturate(p, 0);
1332 brw_set_predicate_control_flag_value(p, 0xff);
1333 if (use_temp)
1334 brw_MOV(p, real_dst, dst);
1335 }
1336 }
1337 brw_pop_insn_state(p);
1338 release_tmps(c, mark);
1339 }
1340
1341 static void emit_pow(struct brw_wm_compile *c,
1342 const struct brw_fp_instruction *inst)
1343 {
1344 struct brw_compile *p = &c->func;
1345 struct brw_reg dst, src0, src1;
1346 GLuint mask = inst->DstReg.WriteMask;
1347 int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
1348
1349 if (!(mask & WRITEMASK_XYZW))
1350 return;
1351
1352 assert(is_power_of_two(mask & WRITEMASK_XYZW));
1353
1354 dst = get_dst_reg(c, inst, dst_chan);
1355 src0 = get_src_reg_imm(c, inst, 0, 0);
1356 src1 = get_src_reg_imm(c, inst, 1, 0);
1357
1358 brw_MOV(p, brw_message_reg(2), src0);
1359 brw_MOV(p, brw_message_reg(3), src1);
1360
1361 brw_math(p,
1362 dst,
1363 BRW_MATH_FUNCTION_POW,
1364 (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
1365 2,
1366 brw_null_reg(),
1367 BRW_MATH_DATA_VECTOR,
1368 BRW_MATH_PRECISION_FULL);
1369 }
1370
1371 static void emit_lrp(struct brw_wm_compile *c,
1372 const struct brw_fp_instruction *inst)
1373 {
1374 struct brw_compile *p = &c->func;
1375 GLuint mask = inst->DstReg.WriteMask;
1376 struct brw_reg dst, tmp1, tmp2, src0, src1, src2;
1377 int i;
1378 int mark = mark_tmps(c);
1379 for (i = 0; i < 4; i++) {
1380 if (mask & (1<<i)) {
1381 dst = get_dst_reg(c, inst, i);
1382 src0 = get_src_reg(c, inst, 0, i);
1383
1384 src1 = get_src_reg_imm(c, inst, 1, i);
1385
1386 if (src1.nr == dst.nr) {
1387 tmp1 = alloc_tmp(c);
1388 brw_MOV(p, tmp1, src1);
1389 } else
1390 tmp1 = src1;
1391
1392 src2 = get_src_reg(c, inst, 2, i);
1393 if (src2.nr == dst.nr) {
1394 tmp2 = alloc_tmp(c);
1395 brw_MOV(p, tmp2, src2);
1396 } else
1397 tmp2 = src2;
1398
1399 brw_ADD(p, dst, negate(src0), brw_imm_f(1.0));
1400 brw_MUL(p, brw_null_reg(), dst, tmp2);
1401 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
1402 brw_MAC(p, dst, src0, tmp1);
1403 brw_set_saturate(p, 0);
1404 }
1405 release_tmps(c, mark);
1406 }
1407 }
1408
1409 /**
1410 * For GLSL shaders, this KIL will be unconditional.
1411 * It may be contained inside an IF/ENDIF structure of course.
1412 */
1413 static void emit_kil(struct brw_wm_compile *c)
1414 {
1415 struct brw_compile *p = &c->func;
1416 struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1417 brw_push_insn_state(p);
1418 brw_set_mask_control(p, BRW_MASK_DISABLE);
1419 brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
1420 brw_AND(p, depth, c->emit_mask_reg, depth);
1421 brw_pop_insn_state(p);
1422 }
1423
1424 static void emit_mad(struct brw_wm_compile *c,
1425 const struct brw_fp_instruction *inst)
1426 {
1427 struct brw_compile *p = &c->func;
1428 GLuint mask = inst->DstReg.WriteMask;
1429 struct brw_reg dst, src0, src1, src2;
1430 int i;
1431
1432 for (i = 0; i < 4; i++) {
1433 if (mask & (1<<i)) {
1434 dst = get_dst_reg(c, inst, i);
1435 src0 = get_src_reg(c, inst, 0, i);
1436 src1 = get_src_reg_imm(c, inst, 1, i);
1437 src2 = get_src_reg_imm(c, inst, 2, i);
1438 brw_MUL(p, dst, src0, src1);
1439
1440 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
1441 brw_ADD(p, dst, dst, src2);
1442 brw_set_saturate(p, 0);
1443 }
1444 }
1445 }
1446
1447 static void emit_sop(struct brw_wm_compile *c,
1448 const struct brw_fp_instruction *inst, GLuint cond)
1449 {
1450 struct brw_compile *p = &c->func;
1451 GLuint mask = inst->DstReg.WriteMask;
1452 struct brw_reg dst, src0, src1;
1453 int i;
1454
1455 for (i = 0; i < 4; i++) {
1456 if (mask & (1<<i)) {
1457 dst = get_dst_reg(c, inst, i);
1458 src0 = get_src_reg(c, inst, 0, i);
1459 src1 = get_src_reg_imm(c, inst, 1, i);
1460 brw_push_insn_state(p);
1461 brw_CMP(p, brw_null_reg(), cond, src0, src1);
1462 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1463 brw_MOV(p, dst, brw_imm_f(0.0));
1464 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
1465 brw_MOV(p, dst, brw_imm_f(1.0));
1466 brw_pop_insn_state(p);
1467 }
1468 }
1469 }
1470
1471 static void emit_slt(struct brw_wm_compile *c,
1472 const struct brw_fp_instruction *inst)
1473 {
1474 emit_sop(c, inst, BRW_CONDITIONAL_L);
1475 }
1476
1477 static void emit_sle(struct brw_wm_compile *c,
1478 const struct brw_fp_instruction *inst)
1479 {
1480 emit_sop(c, inst, BRW_CONDITIONAL_LE);
1481 }
1482
1483 static void emit_sgt(struct brw_wm_compile *c,
1484 const struct brw_fp_instruction *inst)
1485 {
1486 emit_sop(c, inst, BRW_CONDITIONAL_G);
1487 }
1488
1489 static void emit_sge(struct brw_wm_compile *c,
1490 const struct brw_fp_instruction *inst)
1491 {
1492 emit_sop(c, inst, BRW_CONDITIONAL_GE);
1493 }
1494
1495 static void emit_seq(struct brw_wm_compile *c,
1496 const struct brw_fp_instruction *inst)
1497 {
1498 emit_sop(c, inst, BRW_CONDITIONAL_EQ);
1499 }
1500
1501 static void emit_sne(struct brw_wm_compile *c,
1502 const struct brw_fp_instruction *inst)
1503 {
1504 emit_sop(c, inst, BRW_CONDITIONAL_NEQ);
1505 }
1506
1507 static INLINE struct brw_reg high_words( struct brw_reg reg )
1508 {
1509 return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ),
1510 0, 8, 2 );
1511 }
1512
1513 static INLINE struct brw_reg low_words( struct brw_reg reg )
1514 {
1515 return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 );
1516 }
1517
1518 static INLINE struct brw_reg even_bytes( struct brw_reg reg )
1519 {
1520 return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 );
1521 }
1522
1523 static INLINE struct brw_reg odd_bytes( struct brw_reg reg )
1524 {
1525 return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ),
1526 0, 16, 2 );
1527 }
1528
1529
1530
1531 static void emit_wpos_xy(struct brw_wm_compile *c,
1532 const struct brw_fp_instruction *inst)
1533 {
1534 struct brw_compile *p = &c->func;
1535 GLuint mask = inst->DstReg.WriteMask;
1536 struct brw_reg src0[2], dst[2];
1537
1538 dst[0] = get_dst_reg(c, inst, 0);
1539 dst[1] = get_dst_reg(c, inst, 1);
1540
1541 src0[0] = get_src_reg(c, inst, 0, 0);
1542 src0[1] = get_src_reg(c, inst, 0, 1);
1543
1544 /* Calculate the pixel offset from window bottom left into destination
1545 * X and Y channels.
1546 */
1547 if (mask & WRITEMASK_X) {
1548 /* X' = X */
1549 brw_MOV(p,
1550 dst[0],
1551 retype(src0[0], BRW_REGISTER_TYPE_W));
1552 }
1553
1554 if (mask & WRITEMASK_Y) {
1555 /* Y' = height - 1 - Y */
1556 brw_ADD(p,
1557 dst[1],
1558 negate(retype(src0[1], BRW_REGISTER_TYPE_W)),
1559 brw_imm_d(c->key.drawable_height - 1));
1560 }
1561 }
1562
1563 /* TODO
1564 BIAS on SIMD8 not working yet...
1565 */
1566 static void emit_txb(struct brw_wm_compile *c,
1567 const struct brw_fp_instruction *inst)
1568 {
1569 struct brw_compile *p = &c->func;
1570 struct brw_reg dst[4], src[4], payload_reg;
1571 /* Note: tex_unit was already looked up through SamplerTextures[] */
1572 const GLuint unit = inst->tex_unit;
1573 GLuint i;
1574 GLuint msg_type;
1575
1576 assert(unit < BRW_MAX_TEX_UNIT);
1577
1578 payload_reg = get_reg(c, TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
1579
1580 for (i = 0; i < 4; i++)
1581 dst[i] = get_dst_reg(c, inst, i);
1582 for (i = 0; i < 4; i++)
1583 src[i] = get_src_reg(c, inst, 0, i);
1584
1585 switch (inst->tex_target) {
1586 case TEXTURE_1D_INDEX:
1587 brw_MOV(p, brw_message_reg(2), src[0]); /* s coord */
1588 brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); /* t coord */
1589 brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); /* r coord */
1590 break;
1591 case TEXTURE_2D_INDEX:
1592 case TEXTURE_RECT_INDEX:
1593 brw_MOV(p, brw_message_reg(2), src[0]);
1594 brw_MOV(p, brw_message_reg(3), src[1]);
1595 brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
1596 break;
1597 case TEXTURE_3D_INDEX:
1598 case TEXTURE_CUBE_INDEX:
1599 brw_MOV(p, brw_message_reg(2), src[0]);
1600 brw_MOV(p, brw_message_reg(3), src[1]);
1601 brw_MOV(p, brw_message_reg(4), src[2]);
1602 break;
1603 default:
1604 /* invalid target */
1605 abort();
1606 }
1607 brw_MOV(p, brw_message_reg(5), src[3]); /* bias */
1608 brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
1609
1610 if (BRW_IS_IGDNG(p->brw)) {
1611 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG;
1612 } else {
1613 /* Does it work well on SIMD8? */
1614 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1615 }
1616
1617 brw_SAMPLE(p,
1618 retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
1619 1, /* msg_reg_nr */
1620 retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */
1621 SURF_INDEX_TEXTURE(unit),
1622 unit, /* sampler */
1623 inst->DstReg.WriteMask, /* writemask */
1624 msg_type, /* msg_type */
1625 4, /* response_length */
1626 4, /* msg_length */
1627 0, /* eot */
1628 1,
1629 BRW_SAMPLER_SIMD_MODE_SIMD8);
1630 }
1631
1632
1633 static void emit_tex(struct brw_wm_compile *c,
1634 const struct brw_fp_instruction *inst)
1635 {
1636 struct brw_compile *p = &c->func;
1637 struct brw_reg dst[4], src[4], payload_reg;
1638 /* Note: tex_unit was already looked up through SamplerTextures[] */
1639 const GLuint unit = inst->tex_unit;
1640 GLuint msg_len;
1641 GLuint i, nr;
1642 GLuint emit;
1643 GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0;
1644 GLuint msg_type;
1645
1646 assert(unit < BRW_MAX_TEX_UNIT);
1647
1648 payload_reg = get_reg(c, TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
1649
1650 for (i = 0; i < 4; i++)
1651 dst[i] = get_dst_reg(c, inst, i);
1652 for (i = 0; i < 4; i++)
1653 src[i] = get_src_reg(c, inst, 0, i);
1654
1655 switch (inst->tex_target) {
1656 case TEXTURE_1D_INDEX:
1657 emit = WRITEMASK_X;
1658 nr = 1;
1659 break;
1660 case TEXTURE_2D_INDEX:
1661 case TEXTURE_RECT_INDEX:
1662 emit = WRITEMASK_XY;
1663 nr = 2;
1664 break;
1665 case TEXTURE_3D_INDEX:
1666 case TEXTURE_CUBE_INDEX:
1667 emit = WRITEMASK_XYZ;
1668 nr = 3;
1669 break;
1670 default:
1671 /* invalid target */
1672 abort();
1673 }
1674 msg_len = 1;
1675
1676 /* move/load S, T, R coords */
1677 for (i = 0; i < nr; i++) {
1678 static const GLuint swz[4] = {0,1,2,2};
1679 if (emit & (1<<i))
1680 brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]);
1681 else
1682 brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0));
1683 msg_len += 1;
1684 }
1685
1686 if (shadow) {
1687 brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */
1688 brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */
1689 }
1690
1691 if (BRW_IS_IGDNG(p->brw)) {
1692 if (shadow)
1693 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG;
1694 else
1695 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG;
1696 } else {
1697 /* Does it work for shadow on SIMD8 ? */
1698 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
1699 }
1700
1701 brw_SAMPLE(p,
1702 retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
1703 1, /* msg_reg_nr */
1704 retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */
1705 SURF_INDEX_TEXTURE(unit),
1706 unit, /* sampler */
1707 inst->DstReg.WriteMask, /* writemask */
1708 msg_type, /* msg_type */
1709 4, /* response_length */
1710 shadow ? 6 : 4, /* msg_length */
1711 0, /* eot */
1712 1,
1713 BRW_SAMPLER_SIMD_MODE_SIMD8);
1714
1715 if (shadow)
1716 brw_MOV(p, dst[3], brw_imm_f(1.0));
1717 }
1718
1719
1720 /**
1721 * Resolve subroutine calls after code emit is done.
1722 */
1723 static void post_wm_emit( struct brw_wm_compile *c )
1724 {
1725 brw_resolve_cals(&c->func);
1726 }
1727
1728 static void
1729 get_argument_regs(struct brw_wm_compile *c,
1730 const struct brw_fp_instruction *inst,
1731 int index,
1732 struct brw_reg *regs,
1733 int mask)
1734 {
1735 int i;
1736
1737 for (i = 0; i < 4; i++) {
1738 if (mask & (1 << i))
1739 regs[i] = get_src_reg(c, inst, index, i);
1740 }
1741 }
1742
1743 static void brw_wm_emit_branching_shader(struct brw_context *brw, struct brw_wm_compile *c)
1744 {
1745 #define MAX_IF_DEPTH 32
1746 #define MAX_LOOP_DEPTH 32
1747 struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
1748 GLuint i, if_depth = 0, loop_depth = 0;
1749 struct brw_compile *p = &c->func;
1750 struct brw_indirect stack_index = brw_indirect(0, 0);
1751
1752 c->out_of_regs = GL_FALSE;
1753
1754 prealloc_reg(c);
1755 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1756 brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
1757
1758 for (i = 0; i < c->nr_fp_insns; i++) {
1759 const struct brw_fp_instruction *inst = &c->fp_instructions[i];
1760 int dst_flags;
1761 struct brw_reg args[3][4], dst[4];
1762 int j;
1763
1764 c->cur_inst = i;
1765
1766 #if 0
1767 debug_printf("Inst %d: ", i);
1768 _mesa_print_instruction(inst);
1769 #endif
1770
1771 /* fetch any constants that this instruction needs */
1772 if (c->fp->use_const_buffer)
1773 fetch_constants(c, inst);
1774
1775 if (inst->CondUpdate)
1776 brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
1777 else
1778 brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
1779
1780 dst_flags = inst->DstReg.WriteMask;
1781 if (inst->SaturateMode == SATURATE_ZERO_ONE)
1782 dst_flags |= SATURATE;
1783
1784 switch (inst->Opcode) {
1785 case WM_PIXELXY:
1786 emit_pixel_xy(c, inst);
1787 break;
1788 case WM_DELTAXY:
1789 emit_delta_xy(c, inst);
1790 break;
1791 case WM_PIXELW:
1792 emit_pixel_w(c, inst);
1793 break;
1794 case WM_LINTERP:
1795 emit_linterp(c, inst);
1796 break;
1797 case WM_PINTERP:
1798 emit_pinterp(c, inst);
1799 break;
1800 case WM_CINTERP:
1801 emit_cinterp(c, inst);
1802 break;
1803 case WM_WPOSXY:
1804 emit_wpos_xy(c, inst);
1805 break;
1806 case WM_FB_WRITE:
1807 emit_fb_write(c, inst);
1808 break;
1809 case WM_FRONTFACING:
1810 emit_frontfacing(c, inst);
1811 break;
1812 case OPCODE_ADD:
1813 emit_add(c, inst);
1814 break;
1815 case OPCODE_ARL:
1816 emit_arl(c, inst);
1817 break;
1818 case OPCODE_FRC:
1819 emit_frc(c, inst);
1820 break;
1821 case OPCODE_FLR:
1822 emit_flr(c, inst);
1823 break;
1824 case OPCODE_LRP:
1825 emit_lrp(c, inst);
1826 break;
1827 case OPCODE_TRUNC:
1828 emit_trunc(c, inst);
1829 break;
1830 case OPCODE_MOV:
1831 emit_mov(c, inst);
1832 break;
1833 case OPCODE_DP3:
1834 emit_dp3(c, inst);
1835 break;
1836 case OPCODE_DP4:
1837 emit_dp4(c, inst);
1838 break;
1839 case OPCODE_XPD:
1840 emit_xpd(c, inst);
1841 break;
1842 case OPCODE_DPH:
1843 emit_dph(c, inst);
1844 break;
1845 case OPCODE_RCP:
1846 emit_rcp(c, inst);
1847 break;
1848 case OPCODE_RSQ:
1849 emit_rsq(c, inst);
1850 break;
1851 case OPCODE_SIN:
1852 emit_sin(c, inst);
1853 break;
1854 case OPCODE_COS:
1855 emit_cos(c, inst);
1856 break;
1857 case OPCODE_EX2:
1858 emit_ex2(c, inst);
1859 break;
1860 case OPCODE_LG2:
1861 emit_lg2(c, inst);
1862 break;
1863 case OPCODE_MIN:
1864 case OPCODE_MAX:
1865 emit_min_max(c, inst);
1866 break;
1867 case OPCODE_DDX:
1868 case OPCODE_DDY:
1869 for (j = 0; j < 4; j++) {
1870 if (inst->DstReg.WriteMask & (1 << j))
1871 dst[j] = get_dst_reg(c, inst, j);
1872 else
1873 dst[j] = brw_null_reg();
1874 }
1875 get_argument_regs(c, inst, 0, args[0], WRITEMASK_XYZW);
1876 emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX),
1877 args[0]);
1878 break;
1879 case OPCODE_SLT:
1880 emit_slt(c, inst);
1881 break;
1882 case OPCODE_SLE:
1883 emit_sle(c, inst);
1884 break;
1885 case OPCODE_SGT:
1886 emit_sgt(c, inst);
1887 break;
1888 case OPCODE_SGE:
1889 emit_sge(c, inst);
1890 break;
1891 case OPCODE_SEQ:
1892 emit_seq(c, inst);
1893 break;
1894 case OPCODE_SNE:
1895 emit_sne(c, inst);
1896 break;
1897 case OPCODE_MUL:
1898 emit_mul(c, inst);
1899 break;
1900 case OPCODE_POW:
1901 emit_pow(c, inst);
1902 break;
1903 case OPCODE_MAD:
1904 emit_mad(c, inst);
1905 break;
1906 case OPCODE_TEX:
1907 emit_tex(c, inst);
1908 break;
1909 case OPCODE_TXB:
1910 emit_txb(c, inst);
1911 break;
1912 case OPCODE_KIL_NV:
1913 emit_kil(c);
1914 break;
1915 case OPCODE_IF:
1916 assert(if_depth < MAX_IF_DEPTH);
1917 if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8);
1918 break;
1919 case OPCODE_ELSE:
1920 if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
1921 break;
1922 case OPCODE_ENDIF:
1923 assert(if_depth > 0);
1924 brw_ENDIF(p, if_inst[--if_depth]);
1925 break;
1926 case OPCODE_BGNSUB:
1927 brw_save_label(p, inst->Comment, p->nr_insn);
1928 break;
1929 case OPCODE_ENDSUB:
1930 /* no-op */
1931 break;
1932 case OPCODE_CAL:
1933 brw_push_insn_state(p);
1934 brw_set_mask_control(p, BRW_MASK_DISABLE);
1935 brw_set_access_mode(p, BRW_ALIGN_1);
1936 brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
1937 brw_set_access_mode(p, BRW_ALIGN_16);
1938 brw_ADD(p, get_addr_reg(stack_index),
1939 get_addr_reg(stack_index), brw_imm_d(4));
1940 brw_save_call(&c->func, inst->label, p->nr_insn);
1941 brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
1942 brw_pop_insn_state(p);
1943 break;
1944
1945 case OPCODE_RET:
1946 brw_push_insn_state(p);
1947 brw_set_mask_control(p, BRW_MASK_DISABLE);
1948 brw_ADD(p, get_addr_reg(stack_index),
1949 get_addr_reg(stack_index), brw_imm_d(-4));
1950 brw_set_access_mode(p, BRW_ALIGN_1);
1951 brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0));
1952 brw_set_access_mode(p, BRW_ALIGN_16);
1953 brw_pop_insn_state(p);
1954
1955 break;
1956 case OPCODE_BGNLOOP:
1957 /* XXX may need to invalidate the current_constant regs */
1958 loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
1959 break;
1960 case OPCODE_BRK:
1961 brw_BREAK(p);
1962 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1963 break;
1964 case OPCODE_CONT:
1965 brw_CONT(p);
1966 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1967 break;
1968 case OPCODE_ENDLOOP:
1969 {
1970 struct brw_instruction *inst0, *inst1;
1971 GLuint br = 1;
1972
1973 if (BRW_IS_IGDNG(brw))
1974 br = 2;
1975
1976 loop_depth--;
1977 inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
1978 /* patch all the BREAK/CONT instructions from last BGNLOOP */
1979 while (inst0 > loop_inst[loop_depth]) {
1980 inst0--;
1981 if (inst0->header.opcode == BRW_OPCODE_BREAK) {
1982 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
1983 inst0->bits3.if_else.pop_count = 0;
1984 }
1985 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
1986 inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
1987 inst0->bits3.if_else.pop_count = 0;
1988 }
1989 }
1990 }
1991 break;
1992 default:
1993 debug_printf("unsupported IR in fragment shader %d\n",
1994 inst->Opcode);
1995 }
1996
1997 if (inst->CondUpdate)
1998 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
1999 else
2000 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2001 }
2002 post_wm_emit(c);
2003
2004 if (BRW_DEBUG & DEBUG_WM) {
2005 debug_printf("wm-native:\n");
2006 brw_disasm(stderr, p->store, p->nr_insn);
2007 }
2008 }
2009
2010 /**
2011 * Do GPU code generation for shaders that use GLSL features such as
2012 * flow control. Other shaders will be compiled with the
2013 */
2014 void brw_wm_branching_shader_emit(struct brw_context *brw, struct brw_wm_compile *c)
2015 {
2016 if (BRW_DEBUG & DEBUG_WM) {
2017 debug_printf("%s:\n", __FUNCTION__);
2018 }
2019
2020 /* initial instruction translation/simplification */
2021 brw_wm_pass_fp(c);
2022
2023 /* actual code generation */
2024 brw_wm_emit_branching_shader(brw, c);
2025
2026 if (BRW_DEBUG & DEBUG_WM) {
2027 brw_wm_print_program(c, "brw_wm_branching_shader_emit done");
2028 }
2029
2030 c->prog_data.total_grf = num_grf_used(c);
2031 c->prog_data.total_scratch = 0;
2032 }