i965 VS: Change nr_userclip to nr_userclip_planes.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_emit.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_fs_emit.cpp
25 *
26 * This file supports emitting code from the FS LIR to the actual
27 * native instructions.
28 */
29
30 extern "C" {
31 #include "main/macros.h"
32 #include "brw_context.h"
33 #include "brw_eu.h"
34 } /* extern "C" */
35
36 #include "brw_fs.h"
37 #include "glsl/ir_print_visitor.h"
38
39 void
40 fs_visitor::generate_fb_write(fs_inst *inst)
41 {
42 GLboolean eot = inst->eot;
43 struct brw_reg implied_header;
44
45 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
46 * move, here's g1.
47 */
48 brw_push_insn_state(p);
49 brw_set_mask_control(p, BRW_MASK_DISABLE);
50 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
51
52 if (inst->header_present) {
53 if (intel->gen >= 6) {
54 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
55 brw_MOV(p,
56 retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
57 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
58 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
59
60 if (inst->target > 0) {
61 /* Set the render target index for choosing BLEND_STATE. */
62 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
63 inst->base_mrf, 2),
64 BRW_REGISTER_TYPE_UD),
65 brw_imm_ud(inst->target));
66 }
67
68 implied_header = brw_null_reg();
69 } else {
70 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
71
72 brw_MOV(p,
73 brw_message_reg(inst->base_mrf + 1),
74 brw_vec8_grf(1, 0));
75 }
76 } else {
77 implied_header = brw_null_reg();
78 }
79
80 brw_pop_insn_state(p);
81
82 brw_fb_WRITE(p,
83 c->dispatch_width,
84 inst->base_mrf,
85 implied_header,
86 inst->target,
87 inst->mlen,
88 0,
89 eot,
90 inst->header_present);
91 }
92
93 /* Computes the integer pixel x,y values from the origin.
94 *
95 * This is the basis of gl_FragCoord computation, but is also used
96 * pre-gen6 for computing the deltas from v0 for computing
97 * interpolation.
98 */
99 void
100 fs_visitor::generate_pixel_xy(struct brw_reg dst, bool is_x)
101 {
102 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
103 struct brw_reg src;
104 struct brw_reg deltas;
105
106 if (is_x) {
107 src = stride(suboffset(g1_uw, 4), 2, 4, 0);
108 deltas = brw_imm_v(0x10101010);
109 } else {
110 src = stride(suboffset(g1_uw, 5), 2, 4, 0);
111 deltas = brw_imm_v(0x11001100);
112 }
113
114 if (c->dispatch_width == 16) {
115 dst = vec16(dst);
116 }
117
118 /* We do this 8 or 16-wide, but since the destination is UW we
119 * don't do compression in the 16-wide case.
120 */
121 brw_push_insn_state(p);
122 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
123 brw_ADD(p, dst, src, deltas);
124 brw_pop_insn_state(p);
125 }
126
127 void
128 fs_visitor::generate_linterp(fs_inst *inst,
129 struct brw_reg dst, struct brw_reg *src)
130 {
131 struct brw_reg delta_x = src[0];
132 struct brw_reg delta_y = src[1];
133 struct brw_reg interp = src[2];
134
135 if (brw->has_pln &&
136 delta_y.nr == delta_x.nr + 1 &&
137 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
138 brw_PLN(p, dst, interp, delta_x);
139 } else {
140 brw_LINE(p, brw_null_reg(), interp, delta_x);
141 brw_MAC(p, dst, suboffset(interp, 1), delta_y);
142 }
143 }
144
145 void
146 fs_visitor::generate_math1_gen6(fs_inst *inst,
147 struct brw_reg dst,
148 struct brw_reg src0)
149 {
150 int op = brw_math_function(inst->opcode);
151
152 assert(inst->mlen == 0);
153
154 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
155 brw_math(p, dst,
156 op,
157 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
158 BRW_MATH_SATURATE_NONE,
159 0, src0,
160 BRW_MATH_DATA_VECTOR,
161 BRW_MATH_PRECISION_FULL);
162
163 if (c->dispatch_width == 16) {
164 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
165 brw_math(p, sechalf(dst),
166 op,
167 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
168 BRW_MATH_SATURATE_NONE,
169 0, sechalf(src0),
170 BRW_MATH_DATA_VECTOR,
171 BRW_MATH_PRECISION_FULL);
172 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
173 }
174 }
175
176 void
177 fs_visitor::generate_math2_gen6(fs_inst *inst,
178 struct brw_reg dst,
179 struct brw_reg src0,
180 struct brw_reg src1)
181 {
182 int op = brw_math_function(inst->opcode);
183
184 assert(inst->mlen == 0);
185
186 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
187 brw_math2(p, dst, op, src0, src1);
188
189 if (c->dispatch_width == 16) {
190 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
191 brw_math2(p, sechalf(dst), op, sechalf(src0), sechalf(src1));
192 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
193 }
194 }
195
196 void
197 fs_visitor::generate_math_gen4(fs_inst *inst,
198 struct brw_reg dst,
199 struct brw_reg src)
200 {
201 int op = brw_math_function(inst->opcode);
202
203 assert(inst->mlen >= 1);
204
205 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
206 brw_math(p, dst,
207 op,
208 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
209 BRW_MATH_SATURATE_NONE,
210 inst->base_mrf, src,
211 BRW_MATH_DATA_VECTOR,
212 BRW_MATH_PRECISION_FULL);
213
214 if (c->dispatch_width == 16) {
215 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
216 brw_math(p, sechalf(dst),
217 op,
218 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
219 BRW_MATH_SATURATE_NONE,
220 inst->base_mrf + 1, sechalf(src),
221 BRW_MATH_DATA_VECTOR,
222 BRW_MATH_PRECISION_FULL);
223
224 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
225 }
226 }
227
228 void
229 fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
230 {
231 int msg_type = -1;
232 int rlen = 4;
233 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
234
235 if (c->dispatch_width == 16)
236 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
237
238 if (intel->gen >= 5) {
239 switch (inst->opcode) {
240 case FS_OPCODE_TEX:
241 if (inst->shadow_compare) {
242 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
243 } else {
244 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
245 }
246 break;
247 case FS_OPCODE_TXB:
248 if (inst->shadow_compare) {
249 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
250 } else {
251 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
252 }
253 break;
254 case FS_OPCODE_TXL:
255 if (inst->shadow_compare) {
256 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
257 } else {
258 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
259 }
260 break;
261 case FS_OPCODE_TXS:
262 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
263 break;
264 case FS_OPCODE_TXD:
265 /* There is no sample_d_c message; comparisons are done manually */
266 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
267 break;
268 case FS_OPCODE_TXF:
269 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
270 break;
271 default:
272 assert(!"not reached");
273 break;
274 }
275 } else {
276 switch (inst->opcode) {
277 case FS_OPCODE_TEX:
278 /* Note that G45 and older determines shadow compare and dispatch width
279 * from message length for most messages.
280 */
281 assert(c->dispatch_width == 8);
282 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
283 if (inst->shadow_compare) {
284 assert(inst->mlen == 6);
285 } else {
286 assert(inst->mlen <= 4);
287 }
288 break;
289 case FS_OPCODE_TXB:
290 if (inst->shadow_compare) {
291 assert(inst->mlen == 6);
292 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
293 } else {
294 assert(inst->mlen == 9);
295 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
296 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
297 }
298 break;
299 case FS_OPCODE_TXL:
300 if (inst->shadow_compare) {
301 assert(inst->mlen == 6);
302 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
303 } else {
304 assert(inst->mlen == 9);
305 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD;
306 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
307 }
308 break;
309 case FS_OPCODE_TXD:
310 /* There is no sample_d_c message; comparisons are done manually */
311 assert(inst->mlen == 7 || inst->mlen == 10);
312 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
313 break;
314 case FS_OPCODE_TXF:
315 assert(inst->mlen == 9);
316 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD;
317 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
318 break;
319 case FS_OPCODE_TXS:
320 assert(inst->mlen == 3);
321 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_RESINFO;
322 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
323 break;
324 default:
325 assert(!"not reached");
326 break;
327 }
328 }
329 assert(msg_type != -1);
330
331 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
332 rlen = 8;
333 dst = vec16(dst);
334 }
335
336 brw_SAMPLE(p,
337 retype(dst, BRW_REGISTER_TYPE_UW),
338 inst->base_mrf,
339 src,
340 SURF_INDEX_TEXTURE(inst->sampler),
341 inst->sampler,
342 WRITEMASK_XYZW,
343 msg_type,
344 rlen,
345 inst->mlen,
346 0,
347 inst->header_present,
348 simd_mode);
349 }
350
351
352 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
353 * looking like:
354 *
355 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
356 *
357 * and we're trying to produce:
358 *
359 * DDX DDY
360 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
361 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
362 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
363 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
364 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
365 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
366 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
367 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
368 *
369 * and add another set of two more subspans if in 16-pixel dispatch mode.
370 *
371 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
372 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
373 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
374 * between each other. We could probably do it like ddx and swizzle the right
375 * order later, but bail for now and just produce
376 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
377 */
378 void
379 fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
380 {
381 struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
382 BRW_REGISTER_TYPE_F,
383 BRW_VERTICAL_STRIDE_2,
384 BRW_WIDTH_2,
385 BRW_HORIZONTAL_STRIDE_0,
386 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
387 struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
388 BRW_REGISTER_TYPE_F,
389 BRW_VERTICAL_STRIDE_2,
390 BRW_WIDTH_2,
391 BRW_HORIZONTAL_STRIDE_0,
392 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
393 brw_ADD(p, dst, src0, negate(src1));
394 }
395
396 void
397 fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
398 {
399 struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
400 BRW_REGISTER_TYPE_F,
401 BRW_VERTICAL_STRIDE_4,
402 BRW_WIDTH_4,
403 BRW_HORIZONTAL_STRIDE_0,
404 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
405 struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
406 BRW_REGISTER_TYPE_F,
407 BRW_VERTICAL_STRIDE_4,
408 BRW_WIDTH_4,
409 BRW_HORIZONTAL_STRIDE_0,
410 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
411 brw_ADD(p, dst, src0, negate(src1));
412 }
413
414 void
415 fs_visitor::generate_discard(fs_inst *inst)
416 {
417 struct brw_reg f0 = brw_flag_reg();
418
419 if (intel->gen >= 6) {
420 struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
421 struct brw_reg some_register;
422
423 /* As of gen6, we no longer have the mask register to look at,
424 * so life gets a bit more complicated.
425 */
426
427 /* Load the flag register with all ones. */
428 brw_push_insn_state(p);
429 brw_set_mask_control(p, BRW_MASK_DISABLE);
430 brw_MOV(p, f0, brw_imm_uw(0xffff));
431 brw_pop_insn_state(p);
432
433 /* Do a comparison that should always fail, to produce 0s in the flag
434 * reg where we have active channels.
435 */
436 some_register = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
437 brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
438 BRW_CONDITIONAL_NZ, some_register, some_register);
439
440 /* Undo CMP's whacking of predication*/
441 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
442
443 brw_push_insn_state(p);
444 brw_set_mask_control(p, BRW_MASK_DISABLE);
445 brw_AND(p, g1, f0, g1);
446 brw_pop_insn_state(p);
447 } else {
448 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
449
450 brw_push_insn_state(p);
451 brw_set_mask_control(p, BRW_MASK_DISABLE);
452 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
453
454 /* Unlike the 965, we have the mask reg, so we just need
455 * somewhere to invert that (containing channels to be disabled)
456 * so it can be ANDed with the mask of pixels still to be
457 * written. Use the flag reg for consistency with gen6+.
458 */
459 brw_NOT(p, f0, brw_mask_reg(1)); /* IMASK */
460 brw_AND(p, g0, f0, g0);
461
462 brw_pop_insn_state(p);
463 }
464 }
465
466 void
467 fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src)
468 {
469 assert(inst->mlen != 0);
470
471 brw_MOV(p,
472 retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD),
473 retype(src, BRW_REGISTER_TYPE_UD));
474 brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1,
475 inst->offset);
476 }
477
478 void
479 fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst)
480 {
481 assert(inst->mlen != 0);
482
483 /* Clear any post destination dependencies that would be ignored by
484 * the block read. See the B-Spec for pre-gen5 send instruction.
485 *
486 * This could use a better solution, since texture sampling and
487 * math reads could potentially run into it as well -- anywhere
488 * that we have a SEND with a destination that is a register that
489 * was written but not read within the last N instructions (what's
490 * N? unsure). This is rare because of dead code elimination, but
491 * not impossible.
492 */
493 if (intel->gen == 4 && !intel->is_g4x)
494 brw_MOV(p, brw_null_reg(), dst);
495
496 brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1,
497 inst->offset);
498
499 if (intel->gen == 4 && !intel->is_g4x) {
500 /* gen4 errata: destination from a send can't be used as a
501 * destination until it's been read. Just read it so we don't
502 * have to worry.
503 */
504 brw_MOV(p, brw_null_reg(), dst);
505 }
506 }
507
508 void
509 fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
510 {
511 assert(inst->mlen != 0);
512
513 /* Clear any post destination dependencies that would be ignored by
514 * the block read. See the B-Spec for pre-gen5 send instruction.
515 *
516 * This could use a better solution, since texture sampling and
517 * math reads could potentially run into it as well -- anywhere
518 * that we have a SEND with a destination that is a register that
519 * was written but not read within the last N instructions (what's
520 * N? unsure). This is rare because of dead code elimination, but
521 * not impossible.
522 */
523 if (intel->gen == 4 && !intel->is_g4x)
524 brw_MOV(p, brw_null_reg(), dst);
525
526 brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
527 inst->offset, SURF_INDEX_FRAG_CONST_BUFFER);
528
529 if (intel->gen == 4 && !intel->is_g4x) {
530 /* gen4 errata: destination from a send can't be used as a
531 * destination until it's been read. Just read it so we don't
532 * have to worry.
533 */
534 brw_MOV(p, brw_null_reg(), dst);
535 }
536 }
537
538 static struct brw_reg
539 brw_reg_from_fs_reg(fs_reg *reg)
540 {
541 struct brw_reg brw_reg;
542
543 switch (reg->file) {
544 case GRF:
545 case ARF:
546 case MRF:
547 if (reg->smear == -1) {
548 brw_reg = brw_vec8_reg(reg->file, reg->reg, 0);
549 } else {
550 brw_reg = brw_vec1_reg(reg->file, reg->reg, reg->smear);
551 }
552 brw_reg = retype(brw_reg, reg->type);
553 if (reg->sechalf)
554 brw_reg = sechalf(brw_reg);
555 break;
556 case IMM:
557 switch (reg->type) {
558 case BRW_REGISTER_TYPE_F:
559 brw_reg = brw_imm_f(reg->imm.f);
560 break;
561 case BRW_REGISTER_TYPE_D:
562 brw_reg = brw_imm_d(reg->imm.i);
563 break;
564 case BRW_REGISTER_TYPE_UD:
565 brw_reg = brw_imm_ud(reg->imm.u);
566 break;
567 default:
568 assert(!"not reached");
569 brw_reg = brw_null_reg();
570 break;
571 }
572 break;
573 case FIXED_HW_REG:
574 brw_reg = reg->fixed_hw_reg;
575 break;
576 case BAD_FILE:
577 /* Probably unused. */
578 brw_reg = brw_null_reg();
579 break;
580 case UNIFORM:
581 assert(!"not reached");
582 brw_reg = brw_null_reg();
583 break;
584 default:
585 assert(!"not reached");
586 brw_reg = brw_null_reg();
587 break;
588 }
589 if (reg->abs)
590 brw_reg = brw_abs(brw_reg);
591 if (reg->negate)
592 brw_reg = negate(brw_reg);
593
594 return brw_reg;
595 }
596
597 void
598 fs_visitor::generate_code()
599 {
600 int last_native_inst = p->nr_insn;
601 const char *last_annotation_string = NULL;
602 ir_instruction *last_annotation_ir = NULL;
603
604 int loop_stack_array_size = 16;
605 int loop_stack_depth = 0;
606 brw_instruction **loop_stack =
607 rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
608 int *if_depth_in_loop =
609 rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
610
611
612 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
613 printf("Native code for fragment shader %d (%d-wide dispatch):\n",
614 prog->Name, c->dispatch_width);
615 }
616
617 foreach_list(node, &this->instructions) {
618 fs_inst *inst = (fs_inst *)node;
619 struct brw_reg src[3], dst;
620
621 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
622 if (last_annotation_ir != inst->ir) {
623 last_annotation_ir = inst->ir;
624 if (last_annotation_ir) {
625 printf(" ");
626 last_annotation_ir->print();
627 printf("\n");
628 }
629 }
630 if (last_annotation_string != inst->annotation) {
631 last_annotation_string = inst->annotation;
632 if (last_annotation_string)
633 printf(" %s\n", last_annotation_string);
634 }
635 }
636
637 for (unsigned int i = 0; i < 3; i++) {
638 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
639 }
640 dst = brw_reg_from_fs_reg(&inst->dst);
641
642 brw_set_conditionalmod(p, inst->conditional_mod);
643 brw_set_predicate_control(p, inst->predicated);
644 brw_set_predicate_inverse(p, inst->predicate_inverse);
645 brw_set_saturate(p, inst->saturate);
646
647 if (inst->force_uncompressed || c->dispatch_width == 8) {
648 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
649 } else if (inst->force_sechalf) {
650 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
651 } else {
652 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
653 }
654
655 switch (inst->opcode) {
656 case BRW_OPCODE_MOV:
657 brw_MOV(p, dst, src[0]);
658 break;
659 case BRW_OPCODE_ADD:
660 brw_ADD(p, dst, src[0], src[1]);
661 break;
662 case BRW_OPCODE_MUL:
663 brw_MUL(p, dst, src[0], src[1]);
664 break;
665 case BRW_OPCODE_MACH:
666 brw_set_acc_write_control(p, 1);
667 brw_MACH(p, dst, src[0], src[1]);
668 brw_set_acc_write_control(p, 0);
669 break;
670
671 case BRW_OPCODE_FRC:
672 brw_FRC(p, dst, src[0]);
673 break;
674 case BRW_OPCODE_RNDD:
675 brw_RNDD(p, dst, src[0]);
676 break;
677 case BRW_OPCODE_RNDE:
678 brw_RNDE(p, dst, src[0]);
679 break;
680 case BRW_OPCODE_RNDZ:
681 brw_RNDZ(p, dst, src[0]);
682 break;
683
684 case BRW_OPCODE_AND:
685 brw_AND(p, dst, src[0], src[1]);
686 break;
687 case BRW_OPCODE_OR:
688 brw_OR(p, dst, src[0], src[1]);
689 break;
690 case BRW_OPCODE_XOR:
691 brw_XOR(p, dst, src[0], src[1]);
692 break;
693 case BRW_OPCODE_NOT:
694 brw_NOT(p, dst, src[0]);
695 break;
696 case BRW_OPCODE_ASR:
697 brw_ASR(p, dst, src[0], src[1]);
698 break;
699 case BRW_OPCODE_SHR:
700 brw_SHR(p, dst, src[0], src[1]);
701 break;
702 case BRW_OPCODE_SHL:
703 brw_SHL(p, dst, src[0], src[1]);
704 break;
705
706 case BRW_OPCODE_CMP:
707 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
708 break;
709 case BRW_OPCODE_SEL:
710 brw_SEL(p, dst, src[0], src[1]);
711 break;
712
713 case BRW_OPCODE_IF:
714 if (inst->src[0].file != BAD_FILE) {
715 /* The instruction has an embedded compare (only allowed on gen6) */
716 assert(intel->gen == 6);
717 gen6_IF(p, inst->conditional_mod, src[0], src[1]);
718 } else {
719 brw_IF(p, c->dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);
720 }
721 if_depth_in_loop[loop_stack_depth]++;
722 break;
723
724 case BRW_OPCODE_ELSE:
725 brw_ELSE(p);
726 break;
727 case BRW_OPCODE_ENDIF:
728 brw_ENDIF(p);
729 if_depth_in_loop[loop_stack_depth]--;
730 break;
731
732 case BRW_OPCODE_DO:
733 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
734 if (loop_stack_array_size <= loop_stack_depth) {
735 loop_stack_array_size *= 2;
736 loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
737 loop_stack_array_size);
738 if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
739 loop_stack_array_size);
740 }
741 if_depth_in_loop[loop_stack_depth] = 0;
742 break;
743
744 case BRW_OPCODE_BREAK:
745 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
746 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
747 break;
748 case BRW_OPCODE_CONTINUE:
749 /* FINISHME: We need to write the loop instruction support still. */
750 if (intel->gen >= 6)
751 gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
752 else
753 brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
754 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
755 break;
756
757 case BRW_OPCODE_WHILE: {
758 struct brw_instruction *inst0, *inst1;
759 GLuint br = 1;
760
761 if (intel->gen >= 5)
762 br = 2;
763
764 assert(loop_stack_depth > 0);
765 loop_stack_depth--;
766 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
767 if (intel->gen < 6) {
768 /* patch all the BREAK/CONT instructions from last BGNLOOP */
769 while (inst0 > loop_stack[loop_stack_depth]) {
770 inst0--;
771 if (inst0->header.opcode == BRW_OPCODE_BREAK &&
772 inst0->bits3.if_else.jump_count == 0) {
773 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
774 }
775 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
776 inst0->bits3.if_else.jump_count == 0) {
777 inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
778 }
779 }
780 }
781 }
782 break;
783
784 case SHADER_OPCODE_RCP:
785 case SHADER_OPCODE_RSQ:
786 case SHADER_OPCODE_SQRT:
787 case SHADER_OPCODE_EXP2:
788 case SHADER_OPCODE_LOG2:
789 case SHADER_OPCODE_SIN:
790 case SHADER_OPCODE_COS:
791 if (intel->gen >= 6) {
792 generate_math1_gen6(inst, dst, src[0]);
793 } else {
794 generate_math_gen4(inst, dst, src[0]);
795 }
796 break;
797 case SHADER_OPCODE_INT_QUOTIENT:
798 case SHADER_OPCODE_INT_REMAINDER:
799 case SHADER_OPCODE_POW:
800 if (intel->gen >= 6) {
801 generate_math2_gen6(inst, dst, src[0], src[1]);
802 } else {
803 generate_math_gen4(inst, dst, src[0]);
804 }
805 break;
806 case FS_OPCODE_PIXEL_X:
807 generate_pixel_xy(dst, true);
808 break;
809 case FS_OPCODE_PIXEL_Y:
810 generate_pixel_xy(dst, false);
811 break;
812 case FS_OPCODE_CINTERP:
813 brw_MOV(p, dst, src[0]);
814 break;
815 case FS_OPCODE_LINTERP:
816 generate_linterp(inst, dst, src);
817 break;
818 case FS_OPCODE_TEX:
819 case FS_OPCODE_TXB:
820 case FS_OPCODE_TXD:
821 case FS_OPCODE_TXF:
822 case FS_OPCODE_TXL:
823 case FS_OPCODE_TXS:
824 generate_tex(inst, dst, src[0]);
825 break;
826 case FS_OPCODE_DISCARD:
827 generate_discard(inst);
828 break;
829 case FS_OPCODE_DDX:
830 generate_ddx(inst, dst, src[0]);
831 break;
832 case FS_OPCODE_DDY:
833 generate_ddy(inst, dst, src[0]);
834 break;
835
836 case FS_OPCODE_SPILL:
837 generate_spill(inst, src[0]);
838 break;
839
840 case FS_OPCODE_UNSPILL:
841 generate_unspill(inst, dst);
842 break;
843
844 case FS_OPCODE_PULL_CONSTANT_LOAD:
845 generate_pull_constant_load(inst, dst);
846 break;
847
848 case FS_OPCODE_FB_WRITE:
849 generate_fb_write(inst);
850 break;
851 default:
852 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
853 _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
854 brw_opcodes[inst->opcode].name);
855 } else {
856 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
857 }
858 fail("unsupported opcode in FS\n");
859 }
860
861 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
862 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
863 if (0) {
864 printf("0x%08x 0x%08x 0x%08x 0x%08x ",
865 ((uint32_t *)&p->store[i])[3],
866 ((uint32_t *)&p->store[i])[2],
867 ((uint32_t *)&p->store[i])[1],
868 ((uint32_t *)&p->store[i])[0]);
869 }
870 brw_disasm(stdout, &p->store[i], intel->gen);
871 }
872 }
873
874 last_native_inst = p->nr_insn;
875 }
876
877 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
878 printf("\n");
879 }
880
881 ralloc_free(loop_stack);
882 ralloc_free(if_depth_in_loop);
883
884 brw_set_uip_jip(p);
885
886 /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS
887 * emit issues, it doesn't get the jump distances into the output,
888 * which is often something we want to debug. So this is here in
889 * case you're doing that.
890 */
891 if (0) {
892 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
893 for (unsigned int i = 0; i < p->nr_insn; i++) {
894 printf("0x%08x 0x%08x 0x%08x 0x%08x ",
895 ((uint32_t *)&p->store[i])[3],
896 ((uint32_t *)&p->store[i])[2],
897 ((uint32_t *)&p->store[i])[1],
898 ((uint32_t *)&p->store[i])[0]);
899 brw_disasm(stdout, &p->store[i], intel->gen);
900 }
901 }
902 }
903 }