i965: Remove unused structures for command packets.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_emit.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_fs_emit.cpp
25 *
26 * This file supports emitting code from the FS LIR to the actual
27 * native instructions.
28 */
29
30 extern "C" {
31 #include "main/macros.h"
32 #include "brw_context.h"
33 #include "brw_eu.h"
34 } /* extern "C" */
35
36 #include "brw_fs.h"
37 #include "../glsl/ir_print_visitor.h"
38
39 void
40 fs_visitor::generate_fb_write(fs_inst *inst)
41 {
42 GLboolean eot = inst->eot;
43 struct brw_reg implied_header;
44
45 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
46 * move, here's g1.
47 */
48 brw_push_insn_state(p);
49 brw_set_mask_control(p, BRW_MASK_DISABLE);
50 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
51
52 if (inst->header_present) {
53 if (intel->gen >= 6) {
54 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
55 brw_MOV(p,
56 retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
57 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
58 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
59
60 if (inst->target > 0) {
61 /* Set the render target index for choosing BLEND_STATE. */
62 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2),
63 BRW_REGISTER_TYPE_UD),
64 brw_imm_ud(inst->target));
65 }
66
67 implied_header = brw_null_reg();
68 } else {
69 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
70
71 brw_MOV(p,
72 brw_message_reg(inst->base_mrf + 1),
73 brw_vec8_grf(1, 0));
74 }
75 } else {
76 implied_header = brw_null_reg();
77 }
78
79 brw_pop_insn_state(p);
80
81 brw_fb_WRITE(p,
82 c->dispatch_width,
83 inst->base_mrf,
84 implied_header,
85 inst->target,
86 inst->mlen,
87 0,
88 eot,
89 inst->header_present);
90 }
91
92 /* Computes the integer pixel x,y values from the origin.
93 *
94 * This is the basis of gl_FragCoord computation, but is also used
95 * pre-gen6 for computing the deltas from v0 for computing
96 * interpolation.
97 */
98 void
99 fs_visitor::generate_pixel_xy(struct brw_reg dst, bool is_x)
100 {
101 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
102 struct brw_reg src;
103 struct brw_reg deltas;
104
105 if (is_x) {
106 src = stride(suboffset(g1_uw, 4), 2, 4, 0);
107 deltas = brw_imm_v(0x10101010);
108 } else {
109 src = stride(suboffset(g1_uw, 5), 2, 4, 0);
110 deltas = brw_imm_v(0x11001100);
111 }
112
113 if (c->dispatch_width == 16) {
114 dst = vec16(dst);
115 }
116
117 /* We do this 8 or 16-wide, but since the destination is UW we
118 * don't do compression in the 16-wide case.
119 */
120 brw_push_insn_state(p);
121 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
122 brw_ADD(p, dst, src, deltas);
123 brw_pop_insn_state(p);
124 }
125
126 void
127 fs_visitor::generate_linterp(fs_inst *inst,
128 struct brw_reg dst, struct brw_reg *src)
129 {
130 struct brw_reg delta_x = src[0];
131 struct brw_reg delta_y = src[1];
132 struct brw_reg interp = src[2];
133
134 if (brw->has_pln &&
135 delta_y.nr == delta_x.nr + 1 &&
136 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
137 brw_PLN(p, dst, interp, delta_x);
138 } else {
139 brw_LINE(p, brw_null_reg(), interp, delta_x);
140 brw_MAC(p, dst, suboffset(interp, 1), delta_y);
141 }
142 }
143
144 void
145 fs_visitor::generate_math(fs_inst *inst,
146 struct brw_reg dst, struct brw_reg *src)
147 {
148 int op;
149
150 switch (inst->opcode) {
151 case FS_OPCODE_RCP:
152 op = BRW_MATH_FUNCTION_INV;
153 break;
154 case FS_OPCODE_RSQ:
155 op = BRW_MATH_FUNCTION_RSQ;
156 break;
157 case FS_OPCODE_SQRT:
158 op = BRW_MATH_FUNCTION_SQRT;
159 break;
160 case FS_OPCODE_EXP2:
161 op = BRW_MATH_FUNCTION_EXP;
162 break;
163 case FS_OPCODE_LOG2:
164 op = BRW_MATH_FUNCTION_LOG;
165 break;
166 case FS_OPCODE_POW:
167 op = BRW_MATH_FUNCTION_POW;
168 break;
169 case FS_OPCODE_SIN:
170 op = BRW_MATH_FUNCTION_SIN;
171 break;
172 case FS_OPCODE_COS:
173 op = BRW_MATH_FUNCTION_COS;
174 break;
175 default:
176 assert(!"not reached: unknown math function");
177 op = 0;
178 break;
179 }
180
181 if (intel->gen >= 6) {
182 assert(inst->mlen == 0);
183
184 if (inst->opcode == FS_OPCODE_POW) {
185 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
186 brw_math2(p, dst, op, src[0], src[1]);
187
188 if (c->dispatch_width == 16) {
189 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
190 brw_math2(p, sechalf(dst), op, sechalf(src[0]), sechalf(src[1]));
191 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
192 }
193 } else {
194 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
195 brw_math(p, dst,
196 op,
197 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
198 BRW_MATH_SATURATE_NONE,
199 0, src[0],
200 BRW_MATH_DATA_VECTOR,
201 BRW_MATH_PRECISION_FULL);
202
203 if (c->dispatch_width == 16) {
204 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
205 brw_math(p, sechalf(dst),
206 op,
207 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
208 BRW_MATH_SATURATE_NONE,
209 0, sechalf(src[0]),
210 BRW_MATH_DATA_VECTOR,
211 BRW_MATH_PRECISION_FULL);
212 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
213 }
214 }
215 } else /* gen <= 5 */{
216 assert(inst->mlen >= 1);
217
218 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
219 brw_math(p, dst,
220 op,
221 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
222 BRW_MATH_SATURATE_NONE,
223 inst->base_mrf, src[0],
224 BRW_MATH_DATA_VECTOR,
225 BRW_MATH_PRECISION_FULL);
226
227 if (c->dispatch_width == 16) {
228 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
229 brw_math(p, sechalf(dst),
230 op,
231 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
232 BRW_MATH_SATURATE_NONE,
233 inst->base_mrf + 1, sechalf(src[0]),
234 BRW_MATH_DATA_VECTOR,
235 BRW_MATH_PRECISION_FULL);
236
237 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
238 }
239 }
240 }
241
242 void
243 fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
244 {
245 int msg_type = -1;
246 int rlen = 4;
247 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
248
249 if (c->dispatch_width == 16)
250 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
251
252 if (intel->gen >= 5) {
253 switch (inst->opcode) {
254 case FS_OPCODE_TEX:
255 if (inst->shadow_compare) {
256 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
257 } else {
258 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
259 }
260 break;
261 case FS_OPCODE_TXB:
262 if (inst->shadow_compare) {
263 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
264 } else {
265 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
266 }
267 break;
268 case FS_OPCODE_TXL:
269 if (inst->shadow_compare) {
270 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
271 } else {
272 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
273 }
274 break;
275 case FS_OPCODE_TXD:
276 /* There is no sample_d_c message; comparisons are done manually */
277 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
278 break;
279 }
280 } else {
281 switch (inst->opcode) {
282 case FS_OPCODE_TEX:
283 /* Note that G45 and older determines shadow compare and dispatch width
284 * from message length for most messages.
285 */
286 assert(c->dispatch_width == 8);
287 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
288 if (inst->shadow_compare) {
289 assert(inst->mlen == 6);
290 } else {
291 assert(inst->mlen <= 4);
292 }
293 break;
294 case FS_OPCODE_TXB:
295 if (inst->shadow_compare) {
296 assert(inst->mlen == 6);
297 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
298 } else {
299 assert(inst->mlen == 9);
300 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
301 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
302 }
303 break;
304 case FS_OPCODE_TXL:
305 if (inst->shadow_compare) {
306 assert(inst->mlen == 6);
307 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
308 } else {
309 assert(inst->mlen == 9);
310 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD;
311 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
312 }
313 break;
314 case FS_OPCODE_TXD:
315 /* There is no sample_d_c message; comparisons are done manually */
316 assert(inst->mlen == 7 || inst->mlen == 10);
317 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
318 break;
319 }
320 }
321 assert(msg_type != -1);
322
323 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
324 rlen = 8;
325 dst = vec16(dst);
326 }
327
328 brw_SAMPLE(p,
329 retype(dst, BRW_REGISTER_TYPE_UW),
330 inst->base_mrf,
331 src,
332 SURF_INDEX_TEXTURE(inst->sampler),
333 inst->sampler,
334 WRITEMASK_XYZW,
335 msg_type,
336 rlen,
337 inst->mlen,
338 0,
339 inst->header_present,
340 simd_mode);
341 }
342
343
344 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
345 * looking like:
346 *
347 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
348 *
349 * and we're trying to produce:
350 *
351 * DDX DDY
352 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
353 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
354 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
355 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
356 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
357 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
358 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
359 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
360 *
361 * and add another set of two more subspans if in 16-pixel dispatch mode.
362 *
363 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
364 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
365 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
366 * between each other. We could probably do it like ddx and swizzle the right
367 * order later, but bail for now and just produce
368 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
369 */
370 void
371 fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
372 {
373 struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
374 BRW_REGISTER_TYPE_F,
375 BRW_VERTICAL_STRIDE_2,
376 BRW_WIDTH_2,
377 BRW_HORIZONTAL_STRIDE_0,
378 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
379 struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
380 BRW_REGISTER_TYPE_F,
381 BRW_VERTICAL_STRIDE_2,
382 BRW_WIDTH_2,
383 BRW_HORIZONTAL_STRIDE_0,
384 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
385 brw_ADD(p, dst, src0, negate(src1));
386 }
387
388 void
389 fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
390 {
391 struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
392 BRW_REGISTER_TYPE_F,
393 BRW_VERTICAL_STRIDE_4,
394 BRW_WIDTH_4,
395 BRW_HORIZONTAL_STRIDE_0,
396 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
397 struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
398 BRW_REGISTER_TYPE_F,
399 BRW_VERTICAL_STRIDE_4,
400 BRW_WIDTH_4,
401 BRW_HORIZONTAL_STRIDE_0,
402 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
403 brw_ADD(p, dst, src0, negate(src1));
404 }
405
406 void
407 fs_visitor::generate_discard(fs_inst *inst)
408 {
409 struct brw_reg f0 = brw_flag_reg();
410
411 if (intel->gen >= 6) {
412 struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
413 struct brw_reg some_register;
414
415 /* As of gen6, we no longer have the mask register to look at,
416 * so life gets a bit more complicated.
417 */
418
419 /* Load the flag register with all ones. */
420 brw_push_insn_state(p);
421 brw_set_mask_control(p, BRW_MASK_DISABLE);
422 brw_MOV(p, f0, brw_imm_uw(0xffff));
423 brw_pop_insn_state(p);
424
425 /* Do a comparison that should always fail, to produce 0s in the flag
426 * reg where we have active channels.
427 */
428 some_register = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
429 brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
430 BRW_CONDITIONAL_NZ, some_register, some_register);
431
432 /* Undo CMP's whacking of predication*/
433 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
434
435 brw_push_insn_state(p);
436 brw_set_mask_control(p, BRW_MASK_DISABLE);
437 brw_AND(p, g1, f0, g1);
438 brw_pop_insn_state(p);
439 } else {
440 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
441
442 brw_push_insn_state(p);
443 brw_set_mask_control(p, BRW_MASK_DISABLE);
444 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
445
446 /* Unlike the 965, we have the mask reg, so we just need
447 * somewhere to invert that (containing channels to be disabled)
448 * so it can be ANDed with the mask of pixels still to be
449 * written. Use the flag reg for consistency with gen6+.
450 */
451 brw_NOT(p, f0, brw_mask_reg(1)); /* IMASK */
452 brw_AND(p, g0, f0, g0);
453
454 brw_pop_insn_state(p);
455 }
456 }
457
458 void
459 fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src)
460 {
461 assert(inst->mlen != 0);
462
463 brw_MOV(p,
464 retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD),
465 retype(src, BRW_REGISTER_TYPE_UD));
466 brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1,
467 inst->offset);
468 }
469
470 void
471 fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst)
472 {
473 assert(inst->mlen != 0);
474
475 /* Clear any post destination dependencies that would be ignored by
476 * the block read. See the B-Spec for pre-gen5 send instruction.
477 *
478 * This could use a better solution, since texture sampling and
479 * math reads could potentially run into it as well -- anywhere
480 * that we have a SEND with a destination that is a register that
481 * was written but not read within the last N instructions (what's
482 * N? unsure). This is rare because of dead code elimination, but
483 * not impossible.
484 */
485 if (intel->gen == 4 && !intel->is_g4x)
486 brw_MOV(p, brw_null_reg(), dst);
487
488 brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1,
489 inst->offset);
490
491 if (intel->gen == 4 && !intel->is_g4x) {
492 /* gen4 errata: destination from a send can't be used as a
493 * destination until it's been read. Just read it so we don't
494 * have to worry.
495 */
496 brw_MOV(p, brw_null_reg(), dst);
497 }
498 }
499
500 void
501 fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
502 {
503 assert(inst->mlen != 0);
504
505 /* Clear any post destination dependencies that would be ignored by
506 * the block read. See the B-Spec for pre-gen5 send instruction.
507 *
508 * This could use a better solution, since texture sampling and
509 * math reads could potentially run into it as well -- anywhere
510 * that we have a SEND with a destination that is a register that
511 * was written but not read within the last N instructions (what's
512 * N? unsure). This is rare because of dead code elimination, but
513 * not impossible.
514 */
515 if (intel->gen == 4 && !intel->is_g4x)
516 brw_MOV(p, brw_null_reg(), dst);
517
518 brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
519 inst->offset, SURF_INDEX_FRAG_CONST_BUFFER);
520
521 if (intel->gen == 4 && !intel->is_g4x) {
522 /* gen4 errata: destination from a send can't be used as a
523 * destination until it's been read. Just read it so we don't
524 * have to worry.
525 */
526 brw_MOV(p, brw_null_reg(), dst);
527 }
528 }
529
530 static struct brw_reg
531 brw_reg_from_fs_reg(fs_reg *reg)
532 {
533 struct brw_reg brw_reg;
534
535 switch (reg->file) {
536 case GRF:
537 case ARF:
538 case MRF:
539 if (reg->smear == -1) {
540 brw_reg = brw_vec8_reg(reg->file,
541 reg->hw_reg, 0);
542 } else {
543 brw_reg = brw_vec1_reg(reg->file,
544 reg->hw_reg, reg->smear);
545 }
546 brw_reg = retype(brw_reg, reg->type);
547 if (reg->sechalf)
548 brw_reg = sechalf(brw_reg);
549 break;
550 case IMM:
551 switch (reg->type) {
552 case BRW_REGISTER_TYPE_F:
553 brw_reg = brw_imm_f(reg->imm.f);
554 break;
555 case BRW_REGISTER_TYPE_D:
556 brw_reg = brw_imm_d(reg->imm.i);
557 break;
558 case BRW_REGISTER_TYPE_UD:
559 brw_reg = brw_imm_ud(reg->imm.u);
560 break;
561 default:
562 assert(!"not reached");
563 brw_reg = brw_null_reg();
564 break;
565 }
566 break;
567 case FIXED_HW_REG:
568 brw_reg = reg->fixed_hw_reg;
569 break;
570 case BAD_FILE:
571 /* Probably unused. */
572 brw_reg = brw_null_reg();
573 break;
574 case UNIFORM:
575 assert(!"not reached");
576 brw_reg = brw_null_reg();
577 break;
578 default:
579 assert(!"not reached");
580 brw_reg = brw_null_reg();
581 break;
582 }
583 if (reg->abs)
584 brw_reg = brw_abs(brw_reg);
585 if (reg->negate)
586 brw_reg = negate(brw_reg);
587
588 return brw_reg;
589 }
590
591 void
592 fs_visitor::generate_code()
593 {
594 int last_native_inst = p->nr_insn;
595 const char *last_annotation_string = NULL;
596 ir_instruction *last_annotation_ir = NULL;
597
598 int loop_stack_array_size = 16;
599 int loop_stack_depth = 0;
600 brw_instruction **loop_stack =
601 rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
602 int *if_depth_in_loop =
603 rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
604
605
606 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
607 printf("Native code for fragment shader %d (%d-wide dispatch):\n",
608 prog->Name, c->dispatch_width);
609 }
610
611 foreach_iter(exec_list_iterator, iter, this->instructions) {
612 fs_inst *inst = (fs_inst *)iter.get();
613 struct brw_reg src[3], dst;
614
615 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
616 if (last_annotation_ir != inst->ir) {
617 last_annotation_ir = inst->ir;
618 if (last_annotation_ir) {
619 printf(" ");
620 last_annotation_ir->print();
621 printf("\n");
622 }
623 }
624 if (last_annotation_string != inst->annotation) {
625 last_annotation_string = inst->annotation;
626 if (last_annotation_string)
627 printf(" %s\n", last_annotation_string);
628 }
629 }
630
631 for (unsigned int i = 0; i < 3; i++) {
632 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
633 }
634 dst = brw_reg_from_fs_reg(&inst->dst);
635
636 brw_set_conditionalmod(p, inst->conditional_mod);
637 brw_set_predicate_control(p, inst->predicated);
638 brw_set_predicate_inverse(p, inst->predicate_inverse);
639 brw_set_saturate(p, inst->saturate);
640
641 if (inst->force_uncompressed || c->dispatch_width == 8) {
642 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
643 } else if (inst->force_sechalf) {
644 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
645 } else {
646 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
647 }
648
649 switch (inst->opcode) {
650 case BRW_OPCODE_MOV:
651 brw_MOV(p, dst, src[0]);
652 break;
653 case BRW_OPCODE_ADD:
654 brw_ADD(p, dst, src[0], src[1]);
655 break;
656 case BRW_OPCODE_MUL:
657 brw_MUL(p, dst, src[0], src[1]);
658 break;
659
660 case BRW_OPCODE_FRC:
661 brw_FRC(p, dst, src[0]);
662 break;
663 case BRW_OPCODE_RNDD:
664 brw_RNDD(p, dst, src[0]);
665 break;
666 case BRW_OPCODE_RNDE:
667 brw_RNDE(p, dst, src[0]);
668 break;
669 case BRW_OPCODE_RNDZ:
670 brw_RNDZ(p, dst, src[0]);
671 break;
672
673 case BRW_OPCODE_AND:
674 brw_AND(p, dst, src[0], src[1]);
675 break;
676 case BRW_OPCODE_OR:
677 brw_OR(p, dst, src[0], src[1]);
678 break;
679 case BRW_OPCODE_XOR:
680 brw_XOR(p, dst, src[0], src[1]);
681 break;
682 case BRW_OPCODE_NOT:
683 brw_NOT(p, dst, src[0]);
684 break;
685 case BRW_OPCODE_ASR:
686 brw_ASR(p, dst, src[0], src[1]);
687 break;
688 case BRW_OPCODE_SHR:
689 brw_SHR(p, dst, src[0], src[1]);
690 break;
691 case BRW_OPCODE_SHL:
692 brw_SHL(p, dst, src[0], src[1]);
693 break;
694
695 case BRW_OPCODE_CMP:
696 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
697 break;
698 case BRW_OPCODE_SEL:
699 brw_SEL(p, dst, src[0], src[1]);
700 break;
701
702 case BRW_OPCODE_IF:
703 if (inst->src[0].file != BAD_FILE) {
704 /* The instruction has an embedded compare (only allowed on gen6) */
705 assert(intel->gen == 6);
706 gen6_IF(p, inst->conditional_mod, src[0], src[1]);
707 } else {
708 brw_IF(p, c->dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);
709 }
710 if_depth_in_loop[loop_stack_depth]++;
711 break;
712
713 case BRW_OPCODE_ELSE:
714 brw_ELSE(p);
715 break;
716 case BRW_OPCODE_ENDIF:
717 brw_ENDIF(p);
718 if_depth_in_loop[loop_stack_depth]--;
719 break;
720
721 case BRW_OPCODE_DO:
722 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
723 if (loop_stack_array_size <= loop_stack_depth) {
724 loop_stack_array_size *= 2;
725 loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
726 loop_stack_array_size);
727 if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
728 loop_stack_array_size);
729 }
730 if_depth_in_loop[loop_stack_depth] = 0;
731 break;
732
733 case BRW_OPCODE_BREAK:
734 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
735 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
736 break;
737 case BRW_OPCODE_CONTINUE:
738 /* FINISHME: We need to write the loop instruction support still. */
739 if (intel->gen >= 6)
740 gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
741 else
742 brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
743 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
744 break;
745
746 case BRW_OPCODE_WHILE: {
747 struct brw_instruction *inst0, *inst1;
748 GLuint br = 1;
749
750 if (intel->gen >= 5)
751 br = 2;
752
753 assert(loop_stack_depth > 0);
754 loop_stack_depth--;
755 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
756 if (intel->gen < 6) {
757 /* patch all the BREAK/CONT instructions from last BGNLOOP */
758 while (inst0 > loop_stack[loop_stack_depth]) {
759 inst0--;
760 if (inst0->header.opcode == BRW_OPCODE_BREAK &&
761 inst0->bits3.if_else.jump_count == 0) {
762 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
763 }
764 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
765 inst0->bits3.if_else.jump_count == 0) {
766 inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
767 }
768 }
769 }
770 }
771 break;
772
773 case FS_OPCODE_RCP:
774 case FS_OPCODE_RSQ:
775 case FS_OPCODE_SQRT:
776 case FS_OPCODE_EXP2:
777 case FS_OPCODE_LOG2:
778 case FS_OPCODE_POW:
779 case FS_OPCODE_SIN:
780 case FS_OPCODE_COS:
781 generate_math(inst, dst, src);
782 break;
783 case FS_OPCODE_PIXEL_X:
784 generate_pixel_xy(dst, true);
785 break;
786 case FS_OPCODE_PIXEL_Y:
787 generate_pixel_xy(dst, false);
788 break;
789 case FS_OPCODE_CINTERP:
790 brw_MOV(p, dst, src[0]);
791 break;
792 case FS_OPCODE_LINTERP:
793 generate_linterp(inst, dst, src);
794 break;
795 case FS_OPCODE_TEX:
796 case FS_OPCODE_TXB:
797 case FS_OPCODE_TXD:
798 case FS_OPCODE_TXL:
799 generate_tex(inst, dst, src[0]);
800 break;
801 case FS_OPCODE_DISCARD:
802 generate_discard(inst);
803 break;
804 case FS_OPCODE_DDX:
805 generate_ddx(inst, dst, src[0]);
806 break;
807 case FS_OPCODE_DDY:
808 generate_ddy(inst, dst, src[0]);
809 break;
810
811 case FS_OPCODE_SPILL:
812 generate_spill(inst, src[0]);
813 break;
814
815 case FS_OPCODE_UNSPILL:
816 generate_unspill(inst, dst);
817 break;
818
819 case FS_OPCODE_PULL_CONSTANT_LOAD:
820 generate_pull_constant_load(inst, dst);
821 break;
822
823 case FS_OPCODE_FB_WRITE:
824 generate_fb_write(inst);
825 break;
826 default:
827 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
828 _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
829 brw_opcodes[inst->opcode].name);
830 } else {
831 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
832 }
833 fail("unsupported opcode in FS\n");
834 }
835
836 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
837 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
838 if (0) {
839 printf("0x%08x 0x%08x 0x%08x 0x%08x ",
840 ((uint32_t *)&p->store[i])[3],
841 ((uint32_t *)&p->store[i])[2],
842 ((uint32_t *)&p->store[i])[1],
843 ((uint32_t *)&p->store[i])[0]);
844 }
845 brw_disasm(stdout, &p->store[i], intel->gen);
846 }
847 }
848
849 last_native_inst = p->nr_insn;
850 }
851
852 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
853 printf("\n");
854 }
855
856 ralloc_free(loop_stack);
857 ralloc_free(if_depth_in_loop);
858
859 brw_set_uip_jip(p);
860
861 /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS
862 * emit issues, it doesn't get the jump distances into the output,
863 * which is often something we want to debug. So this is here in
864 * case you're doing that.
865 */
866 if (0) {
867 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
868 for (unsigned int i = 0; i < p->nr_insn; i++) {
869 printf("0x%08x 0x%08x 0x%08x 0x%08x ",
870 ((uint32_t *)&p->store[i])[3],
871 ((uint32_t *)&p->store[i])[2],
872 ((uint32_t *)&p->store[i])[1],
873 ((uint32_t *)&p->store[i])[0]);
874 brw_disasm(stdout, &p->store[i], intel->gen);
875 }
876 }
877 }
878 }