Merge branch 'lp-offset-twoside'
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_glsl.c
1 #include "main/macros.h"
2 #include "program/prog_parameter.h"
3 #include "program/prog_print.h"
4 #include "program/prog_optimize.h"
5 #include "brw_context.h"
6 #include "brw_eu.h"
7 #include "brw_wm.h"
8
9 static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
10 const struct prog_instruction *inst,
11 GLuint component);
12
13 /**
14 * Determine if the given fragment program uses GLSL features such
15 * as flow conditionals, loops, subroutines.
16 * Some GLSL shaders may use these features, others might not.
17 */
18 GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
19 {
20 int i;
21
22 if (unlikely(INTEL_DEBUG & DEBUG_GLSL_FORCE))
23 return GL_TRUE;
24
25 for (i = 0; i < fp->Base.NumInstructions; i++) {
26 const struct prog_instruction *inst = &fp->Base.Instructions[i];
27 switch (inst->Opcode) {
28 case OPCODE_ARL:
29 case OPCODE_IF:
30 case OPCODE_ENDIF:
31 case OPCODE_CAL:
32 case OPCODE_BRK:
33 case OPCODE_RET:
34 case OPCODE_BGNLOOP:
35 return GL_TRUE;
36 default:
37 break;
38 }
39 }
40 return GL_FALSE;
41 }
42
43
44
45 static void
46 reclaim_temps(struct brw_wm_compile *c);
47
48
49 /** Mark GRF register as used. */
50 static void
51 prealloc_grf(struct brw_wm_compile *c, int r)
52 {
53 c->used_grf[r] = GL_TRUE;
54 }
55
56
57 /** Mark given GRF register as not in use. */
58 static void
59 release_grf(struct brw_wm_compile *c, int r)
60 {
61 /*assert(c->used_grf[r]);*/
62 c->used_grf[r] = GL_FALSE;
63 c->first_free_grf = MIN2(c->first_free_grf, r);
64 }
65
66
67 /** Return index of a free GRF, mark it as used. */
68 static int
69 alloc_grf(struct brw_wm_compile *c)
70 {
71 GLuint r;
72 for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
73 if (!c->used_grf[r]) {
74 c->used_grf[r] = GL_TRUE;
75 c->first_free_grf = r + 1; /* a guess */
76 return r;
77 }
78 }
79
80 /* no free temps, try to reclaim some */
81 reclaim_temps(c);
82 c->first_free_grf = 0;
83
84 /* try alloc again */
85 for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
86 if (!c->used_grf[r]) {
87 c->used_grf[r] = GL_TRUE;
88 c->first_free_grf = r + 1; /* a guess */
89 return r;
90 }
91 }
92
93 for (r = 0; r < BRW_WM_MAX_GRF; r++) {
94 assert(c->used_grf[r]);
95 }
96
97 /* really, no free GRF regs found */
98 if (!c->out_of_regs) {
99 /* print warning once per compilation */
100 _mesa_warning(NULL, "i965: ran out of registers for fragment program");
101 c->out_of_regs = GL_TRUE;
102 }
103
104 return -1;
105 }
106
107
108 /** Return number of GRF registers used */
109 static int
110 num_grf_used(const struct brw_wm_compile *c)
111 {
112 int r;
113 for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--)
114 if (c->used_grf[r])
115 return r + 1;
116 return 0;
117 }
118
119
120
121 /**
122 * Record the mapping of a Mesa register to a hardware register.
123 */
124 static void set_reg(struct brw_wm_compile *c, int file, int index,
125 int component, struct brw_reg reg)
126 {
127 c->wm_regs[file][index][component].reg = reg;
128 c->wm_regs[file][index][component].inited = GL_TRUE;
129 }
130
131 static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
132 {
133 struct brw_reg reg;
134
135 /* if we need to allocate another temp, grow the tmp_regs[] array */
136 if (c->tmp_index == c->tmp_max) {
137 int r = alloc_grf(c);
138 if (r < 0) {
139 /*printf("Out of temps in %s\n", __FUNCTION__);*/
140 r = 50; /* XXX random register! */
141 }
142 c->tmp_regs[ c->tmp_max++ ] = r;
143 }
144
145 /* form the GRF register */
146 reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0);
147 /*printf("alloc_temp %d\n", reg.nr);*/
148 assert(reg.nr < BRW_WM_MAX_GRF);
149 return reg;
150
151 }
152
153 /**
154 * Save current temp register info.
155 * There must be a matching call to release_tmps().
156 */
157 static int mark_tmps(struct brw_wm_compile *c)
158 {
159 return c->tmp_index;
160 }
161
162 static void release_tmps(struct brw_wm_compile *c, int mark)
163 {
164 c->tmp_index = mark;
165 }
166
167 /**
168 * Convert Mesa src register to brw register.
169 *
170 * Since we're running in SOA mode each Mesa register corresponds to four
171 * hardware registers. We allocate the hardware registers as needed here.
172 *
173 * \param file register file, one of PROGRAM_x
174 * \param index register number
175 * \param component src component (X=0, Y=1, Z=2, W=3)
176 * \param nr not used?!?
177 * \param neg negate value?
178 * \param abs take absolute value?
179 */
180 static struct brw_reg
181 get_reg(struct brw_wm_compile *c, int file, int index, int component,
182 int nr, GLuint neg, GLuint abs)
183 {
184 struct brw_reg reg;
185 switch (file) {
186 case PROGRAM_STATE_VAR:
187 case PROGRAM_CONSTANT:
188 case PROGRAM_UNIFORM:
189 file = PROGRAM_STATE_VAR;
190 break;
191 case PROGRAM_UNDEFINED:
192 return brw_null_reg();
193 case PROGRAM_TEMPORARY:
194 case PROGRAM_INPUT:
195 case PROGRAM_OUTPUT:
196 case PROGRAM_PAYLOAD:
197 break;
198 default:
199 _mesa_problem(NULL, "Unexpected file in get_reg()");
200 return brw_null_reg();
201 }
202
203 assert(index < 256);
204 assert(component < 4);
205
206 /* see if we've already allocated a HW register for this Mesa register */
207 if (c->wm_regs[file][index][component].inited) {
208 /* yes, re-use */
209 reg = c->wm_regs[file][index][component].reg;
210 }
211 else {
212 /* no, allocate new register */
213 int grf = alloc_grf(c);
214 /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
215 if (grf < 0) {
216 /* totally out of temps */
217 grf = 51; /* XXX random register! */
218 }
219
220 reg = brw_vec8_grf(grf, 0);
221 /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
222
223 set_reg(c, file, index, component, reg);
224 }
225
226 if (neg & (1 << component)) {
227 reg = negate(reg);
228 }
229 if (abs)
230 reg = brw_abs(reg);
231 return reg;
232 }
233
234
235
236 /**
237 * This is called if we run out of GRF registers. Examine the live intervals
238 * of temp regs in the program and free those which won't be used again.
239 */
240 static void
241 reclaim_temps(struct brw_wm_compile *c)
242 {
243 GLint intBegin[MAX_PROGRAM_TEMPS];
244 GLint intEnd[MAX_PROGRAM_TEMPS];
245 int index;
246
247 /*printf("Reclaim temps:\n");*/
248
249 _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns,
250 intBegin, intEnd);
251
252 for (index = 0; index < MAX_PROGRAM_TEMPS; index++) {
253 if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) {
254 /* program temp[i] can be freed */
255 int component;
256 /*printf(" temp[%d] is dead\n", index);*/
257 for (component = 0; component < 4; component++) {
258 if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) {
259 int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr;
260 release_grf(c, r);
261 /*
262 printf(" Reclaim temp %d, reg %d at inst %d\n",
263 index, r, c->cur_inst);
264 */
265 c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE;
266 }
267 }
268 }
269 }
270 }
271
272
273
274
275 /**
276 * Preallocate registers. This sets up the Mesa to hardware register
277 * mapping for certain registers, such as constants (uniforms/state vars)
278 * and shader inputs.
279 */
280 static void prealloc_reg(struct brw_wm_compile *c)
281 {
282 struct intel_context *intel = &c->func.brw->intel;
283 int i, j;
284 struct brw_reg reg;
285 int urb_read_length = 0;
286 GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted;
287 GLuint reg_index = 0;
288
289 memset(c->used_grf, GL_FALSE, sizeof(c->used_grf));
290 c->first_free_grf = 0;
291
292 for (i = 0; i < 4; i++) {
293 if (i < (c->key.nr_payload_regs + 1) / 2)
294 reg = brw_vec8_grf(i * 2, 0);
295 else
296 reg = brw_vec8_grf(0, 0);
297 set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
298 }
299 set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_W, 0,
300 brw_vec8_grf(c->key.source_w_reg, 0));
301 reg_index += c->key.nr_payload_regs;
302
303 /* constants */
304 {
305 const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters;
306 const GLuint nr_temps = c->fp->program.Base.NumTemporaries;
307
308 /* use a real constant buffer, or just use a section of the GRF? */
309 /* XXX this heuristic may need adjustment... */
310 if ((nr_params + nr_temps) * 4 + reg_index > 80) {
311 for (i = 0; i < nr_params; i++) {
312 float *pv = c->fp->program.Base.Parameters->ParameterValues[i];
313 for (j = 0; j < 4; j++) {
314 c->prog_data.pull_param[c->prog_data.nr_pull_params] = &pv[j];
315 c->prog_data.nr_pull_params++;
316 }
317 }
318
319 c->prog_data.nr_params = 0;
320 }
321 /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
322
323 if (!c->prog_data.nr_pull_params) {
324 const struct gl_program_parameter_list *plist =
325 c->fp->program.Base.Parameters;
326 int index = 0;
327
328 /* number of float constants in CURBE */
329 c->prog_data.nr_params = 4 * nr_params;
330
331 /* loop over program constants (float[4]) */
332 for (i = 0; i < nr_params; i++) {
333 /* loop over XYZW channels */
334 for (j = 0; j < 4; j++, index++) {
335 reg = brw_vec1_grf(reg_index + index / 8, index % 8);
336 /* Save pointer to parameter/constant value.
337 * Constants will be copied in prepare_constant_buffer()
338 */
339 c->prog_data.param[index] = &plist->ParameterValues[i][j];
340 set_reg(c, PROGRAM_STATE_VAR, i, j, reg);
341 }
342 }
343 /* number of constant regs used (each reg is float[8]) */
344 c->nr_creg = ALIGN(nr_params, 2) / 2;
345 reg_index += c->nr_creg;
346 }
347 }
348
349 /* fragment shader inputs: One 2-reg pair of interpolation
350 * coefficients for each vec4 to be set up.
351 */
352 if (intel->gen >= 6) {
353 for (i = 0; i < FRAG_ATTRIB_MAX; i++) {
354 if (!(c->fp->program.Base.InputsRead & BITFIELD64_BIT(i)))
355 continue;
356
357 reg = brw_vec8_grf(reg_index, 0);
358 for (j = 0; j < 4; j++) {
359 set_reg(c, PROGRAM_PAYLOAD, i, j, reg);
360 }
361 reg_index += 2;
362 }
363 urb_read_length = reg_index;
364 } else {
365 for (i = 0; i < VERT_RESULT_MAX; i++) {
366 int fp_input;
367
368 if (i >= VERT_RESULT_VAR0)
369 fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0;
370 else if (i <= VERT_RESULT_TEX7)
371 fp_input = i;
372 else
373 fp_input = -1;
374
375 if (fp_input >= 0 && inputs & (1 << fp_input)) {
376 urb_read_length = reg_index;
377 reg = brw_vec8_grf(reg_index, 0);
378 for (j = 0; j < 4; j++)
379 set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg);
380 }
381 if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) {
382 reg_index += 2;
383 }
384 }
385 }
386
387 c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
388 c->prog_data.urb_read_length = urb_read_length;
389 c->prog_data.curb_read_length = c->nr_creg;
390 c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
391 reg_index++;
392 c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
393 reg_index += 2;
394
395 /* mark GRF regs [0..reg_index-1] as in-use */
396 for (i = 0; i < reg_index; i++)
397 prealloc_grf(c, i);
398
399 /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */
400 prealloc_grf(c, 126);
401 prealloc_grf(c, 127);
402
403 for (i = 0; i < c->nr_fp_insns; i++) {
404 const struct prog_instruction *inst = &c->prog_instructions[i];
405 struct brw_reg dst[4];
406
407 switch (inst->Opcode) {
408 case OPCODE_TEX:
409 case OPCODE_TXB:
410 /* Allocate the channels of texture results contiguously,
411 * since they are written out that way by the sampler unit.
412 */
413 for (j = 0; j < 4; j++) {
414 dst[j] = get_dst_reg(c, inst, j);
415 if (j != 0)
416 assert(dst[j].nr == dst[j - 1].nr + 1);
417 }
418 break;
419 default:
420 break;
421 }
422 }
423
424 for (i = 0; i < c->nr_fp_insns; i++) {
425 const struct prog_instruction *inst = &c->prog_instructions[i];
426
427 switch (inst->Opcode) {
428 case WM_DELTAXY:
429 /* Allocate WM_DELTAXY destination on G45/GM45 to an
430 * even-numbered GRF if possible so that we can use the PLN
431 * instruction.
432 */
433 if (inst->DstReg.WriteMask == WRITEMASK_XY &&
434 !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][0].inited &&
435 !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][1].inited &&
436 (IS_G4X(intel->intelScreen->deviceID) || intel->gen == 5)) {
437 int grf;
438
439 for (grf = c->first_free_grf & ~1;
440 grf < BRW_WM_MAX_GRF;
441 grf += 2)
442 {
443 if (!c->used_grf[grf] && !c->used_grf[grf + 1]) {
444 c->used_grf[grf] = GL_TRUE;
445 c->used_grf[grf + 1] = GL_TRUE;
446 c->first_free_grf = grf + 2; /* a guess */
447
448 set_reg(c, inst->DstReg.File, inst->DstReg.Index, 0,
449 brw_vec8_grf(grf, 0));
450 set_reg(c, inst->DstReg.File, inst->DstReg.Index, 1,
451 brw_vec8_grf(grf + 1, 0));
452 break;
453 }
454 }
455 }
456 default:
457 break;
458 }
459 }
460
461 /* An instruction may reference up to three constants.
462 * They'll be found in these registers.
463 * XXX alloc these on demand!
464 */
465 if (c->prog_data.nr_pull_params) {
466 for (i = 0; i < 3; i++) {
467 c->current_const[i].index = -1;
468 c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0);
469 }
470 }
471 #if 0
472 printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer);
473 printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index);
474 #endif
475 }
476
477
478 /**
479 * Check if any of the instruction's src registers are constants, uniforms,
480 * or statevars. If so, fetch any constants that we don't already have in
481 * the three GRF slots.
482 */
483 static void fetch_constants(struct brw_wm_compile *c,
484 const struct prog_instruction *inst)
485 {
486 struct brw_compile *p = &c->func;
487 GLuint i;
488
489 /* loop over instruction src regs */
490 for (i = 0; i < 3; i++) {
491 const struct prog_src_register *src = &inst->SrcReg[i];
492 if (src->File == PROGRAM_STATE_VAR ||
493 src->File == PROGRAM_CONSTANT ||
494 src->File == PROGRAM_UNIFORM) {
495 c->current_const[i].index = src->Index;
496
497 #if 0
498 printf(" fetch const[%d] for arg %d into reg %d\n",
499 src->Index, i, c->current_const[i].reg.nr);
500 #endif
501
502 /* need to fetch the constant now */
503 brw_oword_block_read(p,
504 c->current_const[i].reg,
505 brw_message_reg(1),
506 16 * src->Index,
507 SURF_INDEX_FRAG_CONST_BUFFER);
508 }
509 }
510 }
511
512
513 /**
514 * Convert Mesa dst register to brw register.
515 */
516 static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
517 const struct prog_instruction *inst,
518 GLuint component)
519 {
520 const int nr = 1;
521 return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr,
522 0, 0);
523 }
524
525
526 static struct brw_reg
527 get_src_reg_const(struct brw_wm_compile *c,
528 const struct prog_instruction *inst,
529 GLuint srcRegIndex, GLuint component)
530 {
531 /* We should have already fetched the constant from the constant
532 * buffer in fetch_constants(). Now we just have to return a
533 * register description that extracts the needed component and
534 * smears it across all eight vector components.
535 */
536 const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
537 struct brw_reg const_reg;
538
539 assert(component < 4);
540 assert(srcRegIndex < 3);
541 assert(c->current_const[srcRegIndex].index != -1);
542 const_reg = c->current_const[srcRegIndex].reg;
543
544 /* extract desired float from the const_reg, and smear */
545 const_reg = stride(const_reg, 0, 1, 0);
546 const_reg.subnr = component * 4;
547
548 if (src->Negate & (1 << component))
549 const_reg = negate(const_reg);
550 if (src->Abs)
551 const_reg = brw_abs(const_reg);
552
553 #if 0
554 printf(" form const[%d].%d for arg %d, reg %d\n",
555 c->current_const[srcRegIndex].index,
556 component,
557 srcRegIndex,
558 const_reg.nr);
559 #endif
560
561 return const_reg;
562 }
563
564
565 /**
566 * Convert Mesa src register to brw register.
567 */
568 static struct brw_reg get_src_reg(struct brw_wm_compile *c,
569 const struct prog_instruction *inst,
570 GLuint srcRegIndex, GLuint channel)
571 {
572 const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
573 const GLuint nr = 1;
574 const GLuint component = GET_SWZ(src->Swizzle, channel);
575
576 /* Only one immediate value can be used per native opcode, and it
577 * has be in the src1 slot, so not all Mesa instructions will get
578 * to take advantage of immediate constants.
579 */
580 if (brw_wm_arg_can_be_immediate(inst->Opcode, srcRegIndex)) {
581 const struct gl_program_parameter_list *params;
582
583 params = c->fp->program.Base.Parameters;
584
585 /* Extended swizzle terms */
586 if (component == SWIZZLE_ZERO) {
587 return brw_imm_f(0.0F);
588 } else if (component == SWIZZLE_ONE) {
589 if (src->Negate)
590 return brw_imm_f(-1.0F);
591 else
592 return brw_imm_f(1.0F);
593 }
594
595 if (src->File == PROGRAM_CONSTANT) {
596 float f = params->ParameterValues[src->Index][component];
597
598 if (src->Abs)
599 f = fabs(f);
600 if (src->Negate)
601 f = -f;
602
603 return brw_imm_f(f);
604 }
605 }
606
607 if (c->prog_data.nr_pull_params &&
608 (src->File == PROGRAM_STATE_VAR ||
609 src->File == PROGRAM_CONSTANT ||
610 src->File == PROGRAM_UNIFORM)) {
611 return get_src_reg_const(c, inst, srcRegIndex, component);
612 }
613 else {
614 /* other type of source register */
615 return get_reg(c, src->File, src->Index, component, nr,
616 src->Negate, src->Abs);
617 }
618 }
619
620 static void emit_arl(struct brw_wm_compile *c,
621 const struct prog_instruction *inst)
622 {
623 struct brw_compile *p = &c->func;
624 struct brw_reg src0, addr_reg;
625 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
626 addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
627 BRW_ARF_ADDRESS, 0);
628 src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */
629 brw_MOV(p, addr_reg, src0);
630 brw_set_saturate(p, 0);
631 }
632
633 static INLINE struct brw_reg high_words( struct brw_reg reg )
634 {
635 return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ),
636 0, 8, 2 );
637 }
638
639 static INLINE struct brw_reg low_words( struct brw_reg reg )
640 {
641 return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 );
642 }
643
644 static INLINE struct brw_reg even_bytes( struct brw_reg reg )
645 {
646 return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 );
647 }
648
649 static INLINE struct brw_reg odd_bytes( struct brw_reg reg )
650 {
651 return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ),
652 0, 16, 2 );
653 }
654
655 /**
656 * Resolve subroutine calls after code emit is done.
657 */
658 static void post_wm_emit( struct brw_wm_compile *c )
659 {
660 brw_resolve_cals(&c->func);
661 }
662
663 static void
664 get_argument_regs(struct brw_wm_compile *c,
665 const struct prog_instruction *inst,
666 int index,
667 struct brw_reg *dst,
668 struct brw_reg *regs,
669 int mask)
670 {
671 struct brw_compile *p = &c->func;
672 int i, j;
673
674 for (i = 0; i < 4; i++) {
675 if (mask & (1 << i)) {
676 regs[i] = get_src_reg(c, inst, index, i);
677
678 /* Unalias destination registers from our sources. */
679 if (regs[i].file == BRW_GENERAL_REGISTER_FILE) {
680 for (j = 0; j < 4; j++) {
681 if (memcmp(&regs[i], &dst[j], sizeof(regs[0])) == 0) {
682 struct brw_reg tmp = alloc_tmp(c);
683 brw_MOV(p, tmp, regs[i]);
684 regs[i] = tmp;
685 break;
686 }
687 }
688 }
689 }
690 }
691 }
692
693 static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
694 {
695 struct intel_context *intel = &brw->intel;
696 #define MAX_IF_DEPTH 32
697 #define MAX_LOOP_DEPTH 32
698 struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
699 int if_depth_in_loop[MAX_LOOP_DEPTH];
700 GLuint i, if_depth = 0, loop_depth = 0;
701 struct brw_compile *p = &c->func;
702 struct brw_indirect stack_index = brw_indirect(0, 0);
703
704 c->out_of_regs = GL_FALSE;
705
706 if_depth_in_loop[loop_depth] = 0;
707
708 prealloc_reg(c);
709 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
710 brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
711
712 if (intel->gen >= 6)
713 brw_set_acc_write_control(p, 1);
714
715 for (i = 0; i < c->nr_fp_insns; i++) {
716 const struct prog_instruction *inst = &c->prog_instructions[i];
717 int dst_flags;
718 struct brw_reg args[3][4], dst[4];
719 int j;
720 int mark = mark_tmps( c );
721
722 c->cur_inst = i;
723
724 #if 0
725 printf("Inst %d: ", i);
726 _mesa_print_instruction(inst);
727 #endif
728
729 /* fetch any constants that this instruction needs */
730 if (c->prog_data.nr_pull_params)
731 fetch_constants(c, inst);
732
733 if (inst->Opcode != OPCODE_ARL) {
734 for (j = 0; j < 4; j++) {
735 if (inst->DstReg.WriteMask & (1 << j))
736 dst[j] = get_dst_reg(c, inst, j);
737 else
738 dst[j] = brw_null_reg();
739 }
740 }
741 for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++)
742 get_argument_regs(c, inst, j, dst, args[j], WRITEMASK_XYZW);
743
744 dst_flags = inst->DstReg.WriteMask;
745 if (inst->SaturateMode == SATURATE_ZERO_ONE)
746 dst_flags |= SATURATE;
747
748 if (inst->CondUpdate)
749 brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
750 else
751 brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
752
753 switch (inst->Opcode) {
754 case WM_PIXELXY:
755 emit_pixel_xy(c, dst, dst_flags);
756 break;
757 case WM_DELTAXY:
758 emit_delta_xy(p, dst, dst_flags, args[0]);
759 break;
760 case WM_PIXELW:
761 emit_pixel_w(c, dst, dst_flags, args[0], args[1]);
762 break;
763 case WM_LINTERP:
764 emit_linterp(p, dst, dst_flags, args[0], args[1]);
765 break;
766 case WM_PINTERP:
767 emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
768 break;
769 case WM_CINTERP:
770 emit_cinterp(p, dst, dst_flags, args[0]);
771 break;
772 case WM_WPOSXY:
773 emit_wpos_xy(c, dst, dst_flags, args[0]);
774 break;
775 case WM_FB_WRITE:
776 emit_fb_write(c, args[0], args[1], args[2],
777 INST_AUX_GET_TARGET(inst->Aux),
778 inst->Aux & INST_AUX_EOT);
779 break;
780 case WM_FRONTFACING:
781 emit_frontfacing(p, dst, dst_flags);
782 break;
783 case OPCODE_ADD:
784 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
785 break;
786 case OPCODE_ARL:
787 emit_arl(c, inst);
788 break;
789 case OPCODE_FRC:
790 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
791 break;
792 case OPCODE_FLR:
793 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
794 break;
795 case OPCODE_LRP:
796 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
797 break;
798 case OPCODE_TRUNC:
799 emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
800 break;
801 case OPCODE_MOV:
802 case OPCODE_SWZ:
803 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
804 break;
805 case OPCODE_DP2:
806 emit_dp2(p, dst, dst_flags, args[0], args[1]);
807 break;
808 case OPCODE_DP3:
809 emit_dp3(p, dst, dst_flags, args[0], args[1]);
810 break;
811 case OPCODE_DP4:
812 emit_dp4(p, dst, dst_flags, args[0], args[1]);
813 break;
814 case OPCODE_XPD:
815 emit_xpd(p, dst, dst_flags, args[0], args[1]);
816 break;
817 case OPCODE_DPH:
818 emit_dph(p, dst, dst_flags, args[0], args[1]);
819 break;
820 case OPCODE_RCP:
821 emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
822 break;
823 case OPCODE_RSQ:
824 emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
825 break;
826 case OPCODE_SIN:
827 emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
828 break;
829 case OPCODE_COS:
830 emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
831 break;
832 case OPCODE_EX2:
833 emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
834 break;
835 case OPCODE_LG2:
836 emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
837 break;
838 case OPCODE_CMP:
839 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
840 break;
841 case OPCODE_MIN:
842 emit_min(p, dst, dst_flags, args[0], args[1]);
843 break;
844 case OPCODE_MAX:
845 emit_max(p, dst, dst_flags, args[0], args[1]);
846 break;
847 case OPCODE_DDX:
848 case OPCODE_DDY:
849 emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX),
850 args[0]);
851 break;
852 case OPCODE_SLT:
853 emit_sop(p, dst, dst_flags,
854 BRW_CONDITIONAL_L, args[0], args[1]);
855 break;
856 case OPCODE_SLE:
857 emit_sop(p, dst, dst_flags,
858 BRW_CONDITIONAL_LE, args[0], args[1]);
859 break;
860 case OPCODE_SGT:
861 emit_sop(p, dst, dst_flags,
862 BRW_CONDITIONAL_G, args[0], args[1]);
863 break;
864 case OPCODE_SGE:
865 emit_sop(p, dst, dst_flags,
866 BRW_CONDITIONAL_GE, args[0], args[1]);
867 break;
868 case OPCODE_SEQ:
869 emit_sop(p, dst, dst_flags,
870 BRW_CONDITIONAL_EQ, args[0], args[1]);
871 break;
872 case OPCODE_SNE:
873 emit_sop(p, dst, dst_flags,
874 BRW_CONDITIONAL_NEQ, args[0], args[1]);
875 break;
876 case OPCODE_SSG:
877 emit_sign(p, dst, dst_flags, args[0]);
878 break;
879 case OPCODE_MUL:
880 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
881 break;
882 case OPCODE_POW:
883 emit_math2(c, BRW_MATH_FUNCTION_POW,
884 dst, dst_flags, args[0], args[1]);
885 break;
886 case OPCODE_MAD:
887 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
888 break;
889 case OPCODE_TEX:
890 emit_tex(c, dst, dst_flags, args[0],
891 get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH,
892 0, 1, 0, 0),
893 inst->TexSrcTarget,
894 inst->TexSrcUnit,
895 (c->key.shadowtex_mask & (1 << inst->TexSrcUnit)) != 0);
896 break;
897 case OPCODE_TXB:
898 emit_txb(c, dst, dst_flags, args[0],
899 get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH,
900 0, 1, 0, 0),
901 inst->TexSrcTarget,
902 c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]);
903 break;
904 case OPCODE_KIL_NV:
905 emit_kil_nv(c);
906 break;
907 case OPCODE_IF:
908 assert(if_depth < MAX_IF_DEPTH);
909 if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8);
910 if_depth_in_loop[loop_depth]++;
911 break;
912 case OPCODE_ELSE:
913 assert(if_depth > 0);
914 if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
915 break;
916 case OPCODE_ENDIF:
917 assert(if_depth > 0);
918 brw_ENDIF(p, if_inst[--if_depth]);
919 if_depth_in_loop[loop_depth]--;
920 break;
921 case OPCODE_BGNSUB:
922 brw_save_label(p, inst->Comment, p->nr_insn);
923 break;
924 case OPCODE_ENDSUB:
925 /* no-op */
926 break;
927 case OPCODE_CAL:
928 brw_push_insn_state(p);
929 brw_set_mask_control(p, BRW_MASK_DISABLE);
930 brw_set_access_mode(p, BRW_ALIGN_1);
931 brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
932 brw_set_access_mode(p, BRW_ALIGN_16);
933 brw_ADD(p, get_addr_reg(stack_index),
934 get_addr_reg(stack_index), brw_imm_d(4));
935 brw_save_call(&c->func, inst->Comment, p->nr_insn);
936 brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
937 brw_pop_insn_state(p);
938 break;
939
940 case OPCODE_RET:
941 brw_push_insn_state(p);
942 brw_set_mask_control(p, BRW_MASK_DISABLE);
943 brw_ADD(p, get_addr_reg(stack_index),
944 get_addr_reg(stack_index), brw_imm_d(-4));
945 brw_set_access_mode(p, BRW_ALIGN_1);
946 brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0));
947 brw_set_access_mode(p, BRW_ALIGN_16);
948 brw_pop_insn_state(p);
949
950 break;
951 case OPCODE_BGNLOOP:
952 /* XXX may need to invalidate the current_constant regs */
953 loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
954 if_depth_in_loop[loop_depth] = 0;
955 break;
956 case OPCODE_BRK:
957 brw_BREAK(p, if_depth_in_loop[loop_depth]);
958 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
959 break;
960 case OPCODE_CONT:
961 brw_CONT(p, if_depth_in_loop[loop_depth]);
962 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
963 break;
964 case OPCODE_ENDLOOP:
965 {
966 struct brw_instruction *inst0, *inst1;
967 GLuint br = 1;
968
969 if (intel->gen == 5)
970 br = 2;
971
972 assert(loop_depth > 0);
973 loop_depth--;
974 inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
975 /* patch all the BREAK/CONT instructions from last BGNLOOP */
976 while (inst0 > loop_inst[loop_depth]) {
977 inst0--;
978 if (inst0->header.opcode == BRW_OPCODE_BREAK &&
979 inst0->bits3.if_else.jump_count == 0) {
980 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
981 }
982 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
983 inst0->bits3.if_else.jump_count == 0) {
984 inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
985 }
986 }
987 }
988 break;
989 default:
990 printf("unsupported opcode %d (%s) in fragment shader\n",
991 inst->Opcode, inst->Opcode < MAX_OPCODE ?
992 _mesa_opcode_string(inst->Opcode) : "unknown");
993 }
994
995 /* Release temporaries containing any unaliased source regs. */
996 release_tmps( c, mark );
997
998 if (inst->CondUpdate)
999 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
1000 else
1001 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1002 }
1003 post_wm_emit(c);
1004
1005 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
1006 printf("wm-native:\n");
1007 for (i = 0; i < p->nr_insn; i++)
1008 brw_disasm(stdout, &p->store[i], intel->gen);
1009 printf("\n");
1010 }
1011 }
1012
1013 /**
1014 * Do GPU code generation for shaders that use GLSL features such as
1015 * flow control. Other shaders will be compiled with the
1016 */
1017 void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
1018 {
1019 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
1020 printf("brw_wm_glsl_emit:\n");
1021 }
1022
1023 /* initial instruction translation/simplification */
1024 brw_wm_pass_fp(c);
1025
1026 /* actual code generation */
1027 brw_wm_emit_glsl(brw, c);
1028
1029 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
1030 brw_wm_print_program(c, "brw_wm_glsl_emit done");
1031 }
1032
1033 c->prog_data.total_grf = num_grf_used(c);
1034 c->prog_data.total_scratch = 0;
1035 }