i965/fs: Implement texelFetch() on Gen4.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_emit.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_fs_emit.cpp
25 *
26 * This file supports emitting code from the FS LIR to the actual
27 * native instructions.
28 */
29
30 extern "C" {
31 #include "main/macros.h"
32 #include "brw_context.h"
33 #include "brw_eu.h"
34 } /* extern "C" */
35
36 #include "brw_fs.h"
37 #include "glsl/ir_print_visitor.h"
38
39 void
40 fs_visitor::generate_fb_write(fs_inst *inst)
41 {
42 GLboolean eot = inst->eot;
43 struct brw_reg implied_header;
44
45 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
46 * move, here's g1.
47 */
48 brw_push_insn_state(p);
49 brw_set_mask_control(p, BRW_MASK_DISABLE);
50 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
51
52 if (inst->header_present) {
53 if (intel->gen >= 6) {
54 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
55 brw_MOV(p,
56 retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
57 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
58 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
59
60 if (inst->target > 0) {
61 /* Set the render target index for choosing BLEND_STATE. */
62 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
63 inst->base_mrf, 2),
64 BRW_REGISTER_TYPE_UD),
65 brw_imm_ud(inst->target));
66 }
67
68 implied_header = brw_null_reg();
69 } else {
70 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
71
72 brw_MOV(p,
73 brw_message_reg(inst->base_mrf + 1),
74 brw_vec8_grf(1, 0));
75 }
76 } else {
77 implied_header = brw_null_reg();
78 }
79
80 brw_pop_insn_state(p);
81
82 brw_fb_WRITE(p,
83 c->dispatch_width,
84 inst->base_mrf,
85 implied_header,
86 inst->target,
87 inst->mlen,
88 0,
89 eot,
90 inst->header_present);
91 }
92
93 /* Computes the integer pixel x,y values from the origin.
94 *
95 * This is the basis of gl_FragCoord computation, but is also used
96 * pre-gen6 for computing the deltas from v0 for computing
97 * interpolation.
98 */
99 void
100 fs_visitor::generate_pixel_xy(struct brw_reg dst, bool is_x)
101 {
102 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
103 struct brw_reg src;
104 struct brw_reg deltas;
105
106 if (is_x) {
107 src = stride(suboffset(g1_uw, 4), 2, 4, 0);
108 deltas = brw_imm_v(0x10101010);
109 } else {
110 src = stride(suboffset(g1_uw, 5), 2, 4, 0);
111 deltas = brw_imm_v(0x11001100);
112 }
113
114 if (c->dispatch_width == 16) {
115 dst = vec16(dst);
116 }
117
118 /* We do this 8 or 16-wide, but since the destination is UW we
119 * don't do compression in the 16-wide case.
120 */
121 brw_push_insn_state(p);
122 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
123 brw_ADD(p, dst, src, deltas);
124 brw_pop_insn_state(p);
125 }
126
127 void
128 fs_visitor::generate_linterp(fs_inst *inst,
129 struct brw_reg dst, struct brw_reg *src)
130 {
131 struct brw_reg delta_x = src[0];
132 struct brw_reg delta_y = src[1];
133 struct brw_reg interp = src[2];
134
135 if (brw->has_pln &&
136 delta_y.nr == delta_x.nr + 1 &&
137 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
138 brw_PLN(p, dst, interp, delta_x);
139 } else {
140 brw_LINE(p, brw_null_reg(), interp, delta_x);
141 brw_MAC(p, dst, suboffset(interp, 1), delta_y);
142 }
143 }
144
145 void
146 fs_visitor::generate_math(fs_inst *inst,
147 struct brw_reg dst, struct brw_reg *src)
148 {
149 int op = brw_math_function(inst->opcode);
150
151 if (intel->gen >= 6) {
152 assert(inst->mlen == 0);
153
154 if (inst->opcode == SHADER_OPCODE_POW) {
155 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
156 brw_math2(p, dst, op, src[0], src[1]);
157
158 if (c->dispatch_width == 16) {
159 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
160 brw_math2(p, sechalf(dst), op, sechalf(src[0]), sechalf(src[1]));
161 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
162 }
163 } else {
164 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
165 brw_math(p, dst,
166 op,
167 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
168 BRW_MATH_SATURATE_NONE,
169 0, src[0],
170 BRW_MATH_DATA_VECTOR,
171 BRW_MATH_PRECISION_FULL);
172
173 if (c->dispatch_width == 16) {
174 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
175 brw_math(p, sechalf(dst),
176 op,
177 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
178 BRW_MATH_SATURATE_NONE,
179 0, sechalf(src[0]),
180 BRW_MATH_DATA_VECTOR,
181 BRW_MATH_PRECISION_FULL);
182 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
183 }
184 }
185 } else /* gen <= 5 */{
186 assert(inst->mlen >= 1);
187
188 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
189 brw_math(p, dst,
190 op,
191 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
192 BRW_MATH_SATURATE_NONE,
193 inst->base_mrf, src[0],
194 BRW_MATH_DATA_VECTOR,
195 BRW_MATH_PRECISION_FULL);
196
197 if (c->dispatch_width == 16) {
198 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
199 brw_math(p, sechalf(dst),
200 op,
201 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
202 BRW_MATH_SATURATE_NONE,
203 inst->base_mrf + 1, sechalf(src[0]),
204 BRW_MATH_DATA_VECTOR,
205 BRW_MATH_PRECISION_FULL);
206
207 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
208 }
209 }
210 }
211
212 void
213 fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
214 {
215 int msg_type = -1;
216 int rlen = 4;
217 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
218
219 if (c->dispatch_width == 16)
220 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
221
222 if (intel->gen >= 5) {
223 switch (inst->opcode) {
224 case FS_OPCODE_TEX:
225 if (inst->shadow_compare) {
226 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
227 } else {
228 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
229 }
230 break;
231 case FS_OPCODE_TXB:
232 if (inst->shadow_compare) {
233 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
234 } else {
235 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
236 }
237 break;
238 case FS_OPCODE_TXL:
239 if (inst->shadow_compare) {
240 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
241 } else {
242 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
243 }
244 break;
245 case FS_OPCODE_TXS:
246 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
247 break;
248 case FS_OPCODE_TXD:
249 /* There is no sample_d_c message; comparisons are done manually */
250 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
251 break;
252 case FS_OPCODE_TXF:
253 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
254 break;
255 default:
256 assert(!"not reached");
257 break;
258 }
259 } else {
260 switch (inst->opcode) {
261 case FS_OPCODE_TEX:
262 /* Note that G45 and older determines shadow compare and dispatch width
263 * from message length for most messages.
264 */
265 assert(c->dispatch_width == 8);
266 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
267 if (inst->shadow_compare) {
268 assert(inst->mlen == 6);
269 } else {
270 assert(inst->mlen <= 4);
271 }
272 break;
273 case FS_OPCODE_TXB:
274 if (inst->shadow_compare) {
275 assert(inst->mlen == 6);
276 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
277 } else {
278 assert(inst->mlen == 9);
279 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
280 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
281 }
282 break;
283 case FS_OPCODE_TXL:
284 if (inst->shadow_compare) {
285 assert(inst->mlen == 6);
286 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
287 } else {
288 assert(inst->mlen == 9);
289 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD;
290 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
291 }
292 break;
293 case FS_OPCODE_TXD:
294 /* There is no sample_d_c message; comparisons are done manually */
295 assert(inst->mlen == 7 || inst->mlen == 10);
296 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
297 break;
298 case FS_OPCODE_TXF:
299 assert(inst->mlen == 9);
300 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD;
301 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
302 break;
303 case FS_OPCODE_TXS:
304 assert(inst->mlen == 3);
305 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_RESINFO;
306 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
307 break;
308 default:
309 assert(!"not reached");
310 break;
311 }
312 }
313 assert(msg_type != -1);
314
315 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
316 rlen = 8;
317 dst = vec16(dst);
318 }
319
320 brw_SAMPLE(p,
321 retype(dst, BRW_REGISTER_TYPE_UW),
322 inst->base_mrf,
323 src,
324 SURF_INDEX_TEXTURE(inst->sampler),
325 inst->sampler,
326 WRITEMASK_XYZW,
327 msg_type,
328 rlen,
329 inst->mlen,
330 0,
331 inst->header_present,
332 simd_mode);
333 }
334
335
336 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
337 * looking like:
338 *
339 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
340 *
341 * and we're trying to produce:
342 *
343 * DDX DDY
344 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
345 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
346 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
347 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
348 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
349 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
350 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
351 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
352 *
353 * and add another set of two more subspans if in 16-pixel dispatch mode.
354 *
355 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
356 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
357 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
358 * between each other. We could probably do it like ddx and swizzle the right
359 * order later, but bail for now and just produce
360 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
361 */
362 void
363 fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
364 {
365 struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
366 BRW_REGISTER_TYPE_F,
367 BRW_VERTICAL_STRIDE_2,
368 BRW_WIDTH_2,
369 BRW_HORIZONTAL_STRIDE_0,
370 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
371 struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
372 BRW_REGISTER_TYPE_F,
373 BRW_VERTICAL_STRIDE_2,
374 BRW_WIDTH_2,
375 BRW_HORIZONTAL_STRIDE_0,
376 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
377 brw_ADD(p, dst, src0, negate(src1));
378 }
379
380 void
381 fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
382 {
383 struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
384 BRW_REGISTER_TYPE_F,
385 BRW_VERTICAL_STRIDE_4,
386 BRW_WIDTH_4,
387 BRW_HORIZONTAL_STRIDE_0,
388 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
389 struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
390 BRW_REGISTER_TYPE_F,
391 BRW_VERTICAL_STRIDE_4,
392 BRW_WIDTH_4,
393 BRW_HORIZONTAL_STRIDE_0,
394 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
395 brw_ADD(p, dst, src0, negate(src1));
396 }
397
398 void
399 fs_visitor::generate_discard(fs_inst *inst)
400 {
401 struct brw_reg f0 = brw_flag_reg();
402
403 if (intel->gen >= 6) {
404 struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
405 struct brw_reg some_register;
406
407 /* As of gen6, we no longer have the mask register to look at,
408 * so life gets a bit more complicated.
409 */
410
411 /* Load the flag register with all ones. */
412 brw_push_insn_state(p);
413 brw_set_mask_control(p, BRW_MASK_DISABLE);
414 brw_MOV(p, f0, brw_imm_uw(0xffff));
415 brw_pop_insn_state(p);
416
417 /* Do a comparison that should always fail, to produce 0s in the flag
418 * reg where we have active channels.
419 */
420 some_register = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
421 brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
422 BRW_CONDITIONAL_NZ, some_register, some_register);
423
424 /* Undo CMP's whacking of predication*/
425 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
426
427 brw_push_insn_state(p);
428 brw_set_mask_control(p, BRW_MASK_DISABLE);
429 brw_AND(p, g1, f0, g1);
430 brw_pop_insn_state(p);
431 } else {
432 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
433
434 brw_push_insn_state(p);
435 brw_set_mask_control(p, BRW_MASK_DISABLE);
436 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
437
438 /* Unlike the 965, we have the mask reg, so we just need
439 * somewhere to invert that (containing channels to be disabled)
440 * so it can be ANDed with the mask of pixels still to be
441 * written. Use the flag reg for consistency with gen6+.
442 */
443 brw_NOT(p, f0, brw_mask_reg(1)); /* IMASK */
444 brw_AND(p, g0, f0, g0);
445
446 brw_pop_insn_state(p);
447 }
448 }
449
450 void
451 fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src)
452 {
453 assert(inst->mlen != 0);
454
455 brw_MOV(p,
456 retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD),
457 retype(src, BRW_REGISTER_TYPE_UD));
458 brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1,
459 inst->offset);
460 }
461
462 void
463 fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst)
464 {
465 assert(inst->mlen != 0);
466
467 /* Clear any post destination dependencies that would be ignored by
468 * the block read. See the B-Spec for pre-gen5 send instruction.
469 *
470 * This could use a better solution, since texture sampling and
471 * math reads could potentially run into it as well -- anywhere
472 * that we have a SEND with a destination that is a register that
473 * was written but not read within the last N instructions (what's
474 * N? unsure). This is rare because of dead code elimination, but
475 * not impossible.
476 */
477 if (intel->gen == 4 && !intel->is_g4x)
478 brw_MOV(p, brw_null_reg(), dst);
479
480 brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1,
481 inst->offset);
482
483 if (intel->gen == 4 && !intel->is_g4x) {
484 /* gen4 errata: destination from a send can't be used as a
485 * destination until it's been read. Just read it so we don't
486 * have to worry.
487 */
488 brw_MOV(p, brw_null_reg(), dst);
489 }
490 }
491
492 void
493 fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
494 {
495 assert(inst->mlen != 0);
496
497 /* Clear any post destination dependencies that would be ignored by
498 * the block read. See the B-Spec for pre-gen5 send instruction.
499 *
500 * This could use a better solution, since texture sampling and
501 * math reads could potentially run into it as well -- anywhere
502 * that we have a SEND with a destination that is a register that
503 * was written but not read within the last N instructions (what's
504 * N? unsure). This is rare because of dead code elimination, but
505 * not impossible.
506 */
507 if (intel->gen == 4 && !intel->is_g4x)
508 brw_MOV(p, brw_null_reg(), dst);
509
510 brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
511 inst->offset, SURF_INDEX_FRAG_CONST_BUFFER);
512
513 if (intel->gen == 4 && !intel->is_g4x) {
514 /* gen4 errata: destination from a send can't be used as a
515 * destination until it's been read. Just read it so we don't
516 * have to worry.
517 */
518 brw_MOV(p, brw_null_reg(), dst);
519 }
520 }
521
522 static struct brw_reg
523 brw_reg_from_fs_reg(fs_reg *reg)
524 {
525 struct brw_reg brw_reg;
526
527 switch (reg->file) {
528 case GRF:
529 case ARF:
530 case MRF:
531 if (reg->smear == -1) {
532 brw_reg = brw_vec8_reg(reg->file, reg->reg, 0);
533 } else {
534 brw_reg = brw_vec1_reg(reg->file, reg->reg, reg->smear);
535 }
536 brw_reg = retype(brw_reg, reg->type);
537 if (reg->sechalf)
538 brw_reg = sechalf(brw_reg);
539 break;
540 case IMM:
541 switch (reg->type) {
542 case BRW_REGISTER_TYPE_F:
543 brw_reg = brw_imm_f(reg->imm.f);
544 break;
545 case BRW_REGISTER_TYPE_D:
546 brw_reg = brw_imm_d(reg->imm.i);
547 break;
548 case BRW_REGISTER_TYPE_UD:
549 brw_reg = brw_imm_ud(reg->imm.u);
550 break;
551 default:
552 assert(!"not reached");
553 brw_reg = brw_null_reg();
554 break;
555 }
556 break;
557 case FIXED_HW_REG:
558 brw_reg = reg->fixed_hw_reg;
559 break;
560 case BAD_FILE:
561 /* Probably unused. */
562 brw_reg = brw_null_reg();
563 break;
564 case UNIFORM:
565 assert(!"not reached");
566 brw_reg = brw_null_reg();
567 break;
568 default:
569 assert(!"not reached");
570 brw_reg = brw_null_reg();
571 break;
572 }
573 if (reg->abs)
574 brw_reg = brw_abs(brw_reg);
575 if (reg->negate)
576 brw_reg = negate(brw_reg);
577
578 return brw_reg;
579 }
580
581 void
582 fs_visitor::generate_code()
583 {
584 int last_native_inst = p->nr_insn;
585 const char *last_annotation_string = NULL;
586 ir_instruction *last_annotation_ir = NULL;
587
588 int loop_stack_array_size = 16;
589 int loop_stack_depth = 0;
590 brw_instruction **loop_stack =
591 rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
592 int *if_depth_in_loop =
593 rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
594
595
596 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
597 printf("Native code for fragment shader %d (%d-wide dispatch):\n",
598 prog->Name, c->dispatch_width);
599 }
600
601 foreach_list(node, &this->instructions) {
602 fs_inst *inst = (fs_inst *)node;
603 struct brw_reg src[3], dst;
604
605 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
606 if (last_annotation_ir != inst->ir) {
607 last_annotation_ir = inst->ir;
608 if (last_annotation_ir) {
609 printf(" ");
610 last_annotation_ir->print();
611 printf("\n");
612 }
613 }
614 if (last_annotation_string != inst->annotation) {
615 last_annotation_string = inst->annotation;
616 if (last_annotation_string)
617 printf(" %s\n", last_annotation_string);
618 }
619 }
620
621 for (unsigned int i = 0; i < 3; i++) {
622 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
623 }
624 dst = brw_reg_from_fs_reg(&inst->dst);
625
626 brw_set_conditionalmod(p, inst->conditional_mod);
627 brw_set_predicate_control(p, inst->predicated);
628 brw_set_predicate_inverse(p, inst->predicate_inverse);
629 brw_set_saturate(p, inst->saturate);
630
631 if (inst->force_uncompressed || c->dispatch_width == 8) {
632 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
633 } else if (inst->force_sechalf) {
634 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
635 } else {
636 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
637 }
638
639 switch (inst->opcode) {
640 case BRW_OPCODE_MOV:
641 brw_MOV(p, dst, src[0]);
642 break;
643 case BRW_OPCODE_ADD:
644 brw_ADD(p, dst, src[0], src[1]);
645 break;
646 case BRW_OPCODE_MUL:
647 brw_MUL(p, dst, src[0], src[1]);
648 break;
649 case BRW_OPCODE_MACH:
650 brw_set_acc_write_control(p, 1);
651 brw_MACH(p, dst, src[0], src[1]);
652 brw_set_acc_write_control(p, 0);
653 break;
654
655 case BRW_OPCODE_FRC:
656 brw_FRC(p, dst, src[0]);
657 break;
658 case BRW_OPCODE_RNDD:
659 brw_RNDD(p, dst, src[0]);
660 break;
661 case BRW_OPCODE_RNDE:
662 brw_RNDE(p, dst, src[0]);
663 break;
664 case BRW_OPCODE_RNDZ:
665 brw_RNDZ(p, dst, src[0]);
666 break;
667
668 case BRW_OPCODE_AND:
669 brw_AND(p, dst, src[0], src[1]);
670 break;
671 case BRW_OPCODE_OR:
672 brw_OR(p, dst, src[0], src[1]);
673 break;
674 case BRW_OPCODE_XOR:
675 brw_XOR(p, dst, src[0], src[1]);
676 break;
677 case BRW_OPCODE_NOT:
678 brw_NOT(p, dst, src[0]);
679 break;
680 case BRW_OPCODE_ASR:
681 brw_ASR(p, dst, src[0], src[1]);
682 break;
683 case BRW_OPCODE_SHR:
684 brw_SHR(p, dst, src[0], src[1]);
685 break;
686 case BRW_OPCODE_SHL:
687 brw_SHL(p, dst, src[0], src[1]);
688 break;
689
690 case BRW_OPCODE_CMP:
691 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
692 break;
693 case BRW_OPCODE_SEL:
694 brw_SEL(p, dst, src[0], src[1]);
695 break;
696
697 case BRW_OPCODE_IF:
698 if (inst->src[0].file != BAD_FILE) {
699 /* The instruction has an embedded compare (only allowed on gen6) */
700 assert(intel->gen == 6);
701 gen6_IF(p, inst->conditional_mod, src[0], src[1]);
702 } else {
703 brw_IF(p, c->dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);
704 }
705 if_depth_in_loop[loop_stack_depth]++;
706 break;
707
708 case BRW_OPCODE_ELSE:
709 brw_ELSE(p);
710 break;
711 case BRW_OPCODE_ENDIF:
712 brw_ENDIF(p);
713 if_depth_in_loop[loop_stack_depth]--;
714 break;
715
716 case BRW_OPCODE_DO:
717 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
718 if (loop_stack_array_size <= loop_stack_depth) {
719 loop_stack_array_size *= 2;
720 loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
721 loop_stack_array_size);
722 if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
723 loop_stack_array_size);
724 }
725 if_depth_in_loop[loop_stack_depth] = 0;
726 break;
727
728 case BRW_OPCODE_BREAK:
729 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
730 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
731 break;
732 case BRW_OPCODE_CONTINUE:
733 /* FINISHME: We need to write the loop instruction support still. */
734 if (intel->gen >= 6)
735 gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
736 else
737 brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
738 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
739 break;
740
741 case BRW_OPCODE_WHILE: {
742 struct brw_instruction *inst0, *inst1;
743 GLuint br = 1;
744
745 if (intel->gen >= 5)
746 br = 2;
747
748 assert(loop_stack_depth > 0);
749 loop_stack_depth--;
750 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
751 if (intel->gen < 6) {
752 /* patch all the BREAK/CONT instructions from last BGNLOOP */
753 while (inst0 > loop_stack[loop_stack_depth]) {
754 inst0--;
755 if (inst0->header.opcode == BRW_OPCODE_BREAK &&
756 inst0->bits3.if_else.jump_count == 0) {
757 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
758 }
759 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
760 inst0->bits3.if_else.jump_count == 0) {
761 inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
762 }
763 }
764 }
765 }
766 break;
767
768 case SHADER_OPCODE_RCP:
769 case SHADER_OPCODE_RSQ:
770 case SHADER_OPCODE_SQRT:
771 case SHADER_OPCODE_EXP2:
772 case SHADER_OPCODE_LOG2:
773 case SHADER_OPCODE_POW:
774 case SHADER_OPCODE_SIN:
775 case SHADER_OPCODE_COS:
776 generate_math(inst, dst, src);
777 break;
778 case FS_OPCODE_PIXEL_X:
779 generate_pixel_xy(dst, true);
780 break;
781 case FS_OPCODE_PIXEL_Y:
782 generate_pixel_xy(dst, false);
783 break;
784 case FS_OPCODE_CINTERP:
785 brw_MOV(p, dst, src[0]);
786 break;
787 case FS_OPCODE_LINTERP:
788 generate_linterp(inst, dst, src);
789 break;
790 case FS_OPCODE_TEX:
791 case FS_OPCODE_TXB:
792 case FS_OPCODE_TXD:
793 case FS_OPCODE_TXF:
794 case FS_OPCODE_TXL:
795 case FS_OPCODE_TXS:
796 generate_tex(inst, dst, src[0]);
797 break;
798 case FS_OPCODE_DISCARD:
799 generate_discard(inst);
800 break;
801 case FS_OPCODE_DDX:
802 generate_ddx(inst, dst, src[0]);
803 break;
804 case FS_OPCODE_DDY:
805 generate_ddy(inst, dst, src[0]);
806 break;
807
808 case FS_OPCODE_SPILL:
809 generate_spill(inst, src[0]);
810 break;
811
812 case FS_OPCODE_UNSPILL:
813 generate_unspill(inst, dst);
814 break;
815
816 case FS_OPCODE_PULL_CONSTANT_LOAD:
817 generate_pull_constant_load(inst, dst);
818 break;
819
820 case FS_OPCODE_FB_WRITE:
821 generate_fb_write(inst);
822 break;
823 default:
824 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
825 _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
826 brw_opcodes[inst->opcode].name);
827 } else {
828 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
829 }
830 fail("unsupported opcode in FS\n");
831 }
832
833 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
834 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
835 if (0) {
836 printf("0x%08x 0x%08x 0x%08x 0x%08x ",
837 ((uint32_t *)&p->store[i])[3],
838 ((uint32_t *)&p->store[i])[2],
839 ((uint32_t *)&p->store[i])[1],
840 ((uint32_t *)&p->store[i])[0]);
841 }
842 brw_disasm(stdout, &p->store[i], intel->gen);
843 }
844 }
845
846 last_native_inst = p->nr_insn;
847 }
848
849 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
850 printf("\n");
851 }
852
853 ralloc_free(loop_stack);
854 ralloc_free(if_depth_in_loop);
855
856 brw_set_uip_jip(p);
857
858 /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS
859 * emit issues, it doesn't get the jump distances into the output,
860 * which is often something we want to debug. So this is here in
861 * case you're doing that.
862 */
863 if (0) {
864 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
865 for (unsigned int i = 0; i < p->nr_insn; i++) {
866 printf("0x%08x 0x%08x 0x%08x 0x%08x ",
867 ((uint32_t *)&p->store[i])[3],
868 ((uint32_t *)&p->store[i])[2],
869 ((uint32_t *)&p->store[i])[1],
870 ((uint32_t *)&p->store[i])[0]);
871 brw_disasm(stdout, &p->store[i], intel->gen);
872 }
873 }
874 }
875 }