i965/vs: Move the flag for whether to use the new backend to the context.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_emit.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_fs_emit.cpp
25 *
26 * This file supports emitting code from the FS LIR to the actual
27 * native instructions.
28 */
29
30 extern "C" {
31 #include "main/macros.h"
32 #include "brw_context.h"
33 #include "brw_eu.h"
34 } /* extern "C" */
35
36 #include "brw_fs.h"
37 #include "glsl/ir_print_visitor.h"
38
39 void
40 fs_visitor::generate_fb_write(fs_inst *inst)
41 {
42 GLboolean eot = inst->eot;
43 struct brw_reg implied_header;
44
45 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
46 * move, here's g1.
47 */
48 brw_push_insn_state(p);
49 brw_set_mask_control(p, BRW_MASK_DISABLE);
50 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
51
52 if (inst->header_present) {
53 if (intel->gen >= 6) {
54 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
55 brw_MOV(p,
56 retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
57 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
58 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
59
60 if (inst->target > 0) {
61 /* Set the render target index for choosing BLEND_STATE. */
62 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
63 inst->base_mrf, 2),
64 BRW_REGISTER_TYPE_UD),
65 brw_imm_ud(inst->target));
66 }
67
68 implied_header = brw_null_reg();
69 } else {
70 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
71
72 brw_MOV(p,
73 brw_message_reg(inst->base_mrf + 1),
74 brw_vec8_grf(1, 0));
75 }
76 } else {
77 implied_header = brw_null_reg();
78 }
79
80 brw_pop_insn_state(p);
81
82 brw_fb_WRITE(p,
83 c->dispatch_width,
84 inst->base_mrf,
85 implied_header,
86 inst->target,
87 inst->mlen,
88 0,
89 eot,
90 inst->header_present);
91 }
92
93 /* Computes the integer pixel x,y values from the origin.
94 *
95 * This is the basis of gl_FragCoord computation, but is also used
96 * pre-gen6 for computing the deltas from v0 for computing
97 * interpolation.
98 */
99 void
100 fs_visitor::generate_pixel_xy(struct brw_reg dst, bool is_x)
101 {
102 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
103 struct brw_reg src;
104 struct brw_reg deltas;
105
106 if (is_x) {
107 src = stride(suboffset(g1_uw, 4), 2, 4, 0);
108 deltas = brw_imm_v(0x10101010);
109 } else {
110 src = stride(suboffset(g1_uw, 5), 2, 4, 0);
111 deltas = brw_imm_v(0x11001100);
112 }
113
114 if (c->dispatch_width == 16) {
115 dst = vec16(dst);
116 }
117
118 /* We do this 8 or 16-wide, but since the destination is UW we
119 * don't do compression in the 16-wide case.
120 */
121 brw_push_insn_state(p);
122 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
123 brw_ADD(p, dst, src, deltas);
124 brw_pop_insn_state(p);
125 }
126
127 void
128 fs_visitor::generate_linterp(fs_inst *inst,
129 struct brw_reg dst, struct brw_reg *src)
130 {
131 struct brw_reg delta_x = src[0];
132 struct brw_reg delta_y = src[1];
133 struct brw_reg interp = src[2];
134
135 if (brw->has_pln &&
136 delta_y.nr == delta_x.nr + 1 &&
137 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
138 brw_PLN(p, dst, interp, delta_x);
139 } else {
140 brw_LINE(p, brw_null_reg(), interp, delta_x);
141 brw_MAC(p, dst, suboffset(interp, 1), delta_y);
142 }
143 }
144
145 void
146 fs_visitor::generate_math(fs_inst *inst,
147 struct brw_reg dst, struct brw_reg *src)
148 {
149 int op = brw_math_function(inst->opcode);
150
151 if (intel->gen >= 6) {
152 assert(inst->mlen == 0);
153
154 if (inst->opcode == SHADER_OPCODE_POW) {
155 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
156 brw_math2(p, dst, op, src[0], src[1]);
157
158 if (c->dispatch_width == 16) {
159 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
160 brw_math2(p, sechalf(dst), op, sechalf(src[0]), sechalf(src[1]));
161 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
162 }
163 } else {
164 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
165 brw_math(p, dst,
166 op,
167 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
168 BRW_MATH_SATURATE_NONE,
169 0, src[0],
170 BRW_MATH_DATA_VECTOR,
171 BRW_MATH_PRECISION_FULL);
172
173 if (c->dispatch_width == 16) {
174 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
175 brw_math(p, sechalf(dst),
176 op,
177 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
178 BRW_MATH_SATURATE_NONE,
179 0, sechalf(src[0]),
180 BRW_MATH_DATA_VECTOR,
181 BRW_MATH_PRECISION_FULL);
182 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
183 }
184 }
185 } else /* gen <= 5 */{
186 assert(inst->mlen >= 1);
187
188 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
189 brw_math(p, dst,
190 op,
191 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
192 BRW_MATH_SATURATE_NONE,
193 inst->base_mrf, src[0],
194 BRW_MATH_DATA_VECTOR,
195 BRW_MATH_PRECISION_FULL);
196
197 if (c->dispatch_width == 16) {
198 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
199 brw_math(p, sechalf(dst),
200 op,
201 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
202 BRW_MATH_SATURATE_NONE,
203 inst->base_mrf + 1, sechalf(src[0]),
204 BRW_MATH_DATA_VECTOR,
205 BRW_MATH_PRECISION_FULL);
206
207 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
208 }
209 }
210 }
211
212 void
213 fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
214 {
215 int msg_type = -1;
216 int rlen = 4;
217 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
218
219 if (c->dispatch_width == 16)
220 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
221
222 if (intel->gen >= 5) {
223 switch (inst->opcode) {
224 case FS_OPCODE_TEX:
225 if (inst->shadow_compare) {
226 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
227 } else {
228 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
229 }
230 break;
231 case FS_OPCODE_TXB:
232 if (inst->shadow_compare) {
233 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
234 } else {
235 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
236 }
237 break;
238 case FS_OPCODE_TXL:
239 if (inst->shadow_compare) {
240 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
241 } else {
242 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
243 }
244 break;
245 case FS_OPCODE_TXS:
246 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
247 break;
248 case FS_OPCODE_TXD:
249 /* There is no sample_d_c message; comparisons are done manually */
250 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
251 break;
252 default:
253 assert(!"not reached");
254 break;
255 }
256 } else {
257 switch (inst->opcode) {
258 case FS_OPCODE_TEX:
259 /* Note that G45 and older determines shadow compare and dispatch width
260 * from message length for most messages.
261 */
262 assert(c->dispatch_width == 8);
263 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
264 if (inst->shadow_compare) {
265 assert(inst->mlen == 6);
266 } else {
267 assert(inst->mlen <= 4);
268 }
269 break;
270 case FS_OPCODE_TXB:
271 if (inst->shadow_compare) {
272 assert(inst->mlen == 6);
273 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
274 } else {
275 assert(inst->mlen == 9);
276 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
277 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
278 }
279 break;
280 case FS_OPCODE_TXL:
281 if (inst->shadow_compare) {
282 assert(inst->mlen == 6);
283 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
284 } else {
285 assert(inst->mlen == 9);
286 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD;
287 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
288 }
289 break;
290 case FS_OPCODE_TXD:
291 /* There is no sample_d_c message; comparisons are done manually */
292 assert(inst->mlen == 7 || inst->mlen == 10);
293 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
294 break;
295 case FS_OPCODE_TXS:
296 assert(inst->mlen == 3);
297 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_RESINFO;
298 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
299 break;
300 default:
301 assert(!"not reached");
302 break;
303 }
304 }
305 assert(msg_type != -1);
306
307 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
308 rlen = 8;
309 dst = vec16(dst);
310 }
311
312 brw_SAMPLE(p,
313 retype(dst, BRW_REGISTER_TYPE_UW),
314 inst->base_mrf,
315 src,
316 SURF_INDEX_TEXTURE(inst->sampler),
317 inst->sampler,
318 WRITEMASK_XYZW,
319 msg_type,
320 rlen,
321 inst->mlen,
322 0,
323 inst->header_present,
324 simd_mode);
325 }
326
327
328 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
329 * looking like:
330 *
331 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
332 *
333 * and we're trying to produce:
334 *
335 * DDX DDY
336 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
337 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
338 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
339 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
340 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
341 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
342 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
343 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
344 *
345 * and add another set of two more subspans if in 16-pixel dispatch mode.
346 *
347 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
348 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
349 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
350 * between each other. We could probably do it like ddx and swizzle the right
351 * order later, but bail for now and just produce
352 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
353 */
354 void
355 fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
356 {
357 struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
358 BRW_REGISTER_TYPE_F,
359 BRW_VERTICAL_STRIDE_2,
360 BRW_WIDTH_2,
361 BRW_HORIZONTAL_STRIDE_0,
362 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
363 struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
364 BRW_REGISTER_TYPE_F,
365 BRW_VERTICAL_STRIDE_2,
366 BRW_WIDTH_2,
367 BRW_HORIZONTAL_STRIDE_0,
368 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
369 brw_ADD(p, dst, src0, negate(src1));
370 }
371
372 void
373 fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
374 {
375 struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
376 BRW_REGISTER_TYPE_F,
377 BRW_VERTICAL_STRIDE_4,
378 BRW_WIDTH_4,
379 BRW_HORIZONTAL_STRIDE_0,
380 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
381 struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
382 BRW_REGISTER_TYPE_F,
383 BRW_VERTICAL_STRIDE_4,
384 BRW_WIDTH_4,
385 BRW_HORIZONTAL_STRIDE_0,
386 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
387 brw_ADD(p, dst, src0, negate(src1));
388 }
389
390 void
391 fs_visitor::generate_discard(fs_inst *inst)
392 {
393 struct brw_reg f0 = brw_flag_reg();
394
395 if (intel->gen >= 6) {
396 struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
397 struct brw_reg some_register;
398
399 /* As of gen6, we no longer have the mask register to look at,
400 * so life gets a bit more complicated.
401 */
402
403 /* Load the flag register with all ones. */
404 brw_push_insn_state(p);
405 brw_set_mask_control(p, BRW_MASK_DISABLE);
406 brw_MOV(p, f0, brw_imm_uw(0xffff));
407 brw_pop_insn_state(p);
408
409 /* Do a comparison that should always fail, to produce 0s in the flag
410 * reg where we have active channels.
411 */
412 some_register = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
413 brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
414 BRW_CONDITIONAL_NZ, some_register, some_register);
415
416 /* Undo CMP's whacking of predication*/
417 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
418
419 brw_push_insn_state(p);
420 brw_set_mask_control(p, BRW_MASK_DISABLE);
421 brw_AND(p, g1, f0, g1);
422 brw_pop_insn_state(p);
423 } else {
424 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
425
426 brw_push_insn_state(p);
427 brw_set_mask_control(p, BRW_MASK_DISABLE);
428 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
429
430 /* Unlike the 965, we have the mask reg, so we just need
431 * somewhere to invert that (containing channels to be disabled)
432 * so it can be ANDed with the mask of pixels still to be
433 * written. Use the flag reg for consistency with gen6+.
434 */
435 brw_NOT(p, f0, brw_mask_reg(1)); /* IMASK */
436 brw_AND(p, g0, f0, g0);
437
438 brw_pop_insn_state(p);
439 }
440 }
441
442 void
443 fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src)
444 {
445 assert(inst->mlen != 0);
446
447 brw_MOV(p,
448 retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD),
449 retype(src, BRW_REGISTER_TYPE_UD));
450 brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1,
451 inst->offset);
452 }
453
454 void
455 fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst)
456 {
457 assert(inst->mlen != 0);
458
459 /* Clear any post destination dependencies that would be ignored by
460 * the block read. See the B-Spec for pre-gen5 send instruction.
461 *
462 * This could use a better solution, since texture sampling and
463 * math reads could potentially run into it as well -- anywhere
464 * that we have a SEND with a destination that is a register that
465 * was written but not read within the last N instructions (what's
466 * N? unsure). This is rare because of dead code elimination, but
467 * not impossible.
468 */
469 if (intel->gen == 4 && !intel->is_g4x)
470 brw_MOV(p, brw_null_reg(), dst);
471
472 brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1,
473 inst->offset);
474
475 if (intel->gen == 4 && !intel->is_g4x) {
476 /* gen4 errata: destination from a send can't be used as a
477 * destination until it's been read. Just read it so we don't
478 * have to worry.
479 */
480 brw_MOV(p, brw_null_reg(), dst);
481 }
482 }
483
484 void
485 fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
486 {
487 assert(inst->mlen != 0);
488
489 /* Clear any post destination dependencies that would be ignored by
490 * the block read. See the B-Spec for pre-gen5 send instruction.
491 *
492 * This could use a better solution, since texture sampling and
493 * math reads could potentially run into it as well -- anywhere
494 * that we have a SEND with a destination that is a register that
495 * was written but not read within the last N instructions (what's
496 * N? unsure). This is rare because of dead code elimination, but
497 * not impossible.
498 */
499 if (intel->gen == 4 && !intel->is_g4x)
500 brw_MOV(p, brw_null_reg(), dst);
501
502 brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
503 inst->offset, SURF_INDEX_FRAG_CONST_BUFFER);
504
505 if (intel->gen == 4 && !intel->is_g4x) {
506 /* gen4 errata: destination from a send can't be used as a
507 * destination until it's been read. Just read it so we don't
508 * have to worry.
509 */
510 brw_MOV(p, brw_null_reg(), dst);
511 }
512 }
513
514 static struct brw_reg
515 brw_reg_from_fs_reg(fs_reg *reg)
516 {
517 struct brw_reg brw_reg;
518
519 switch (reg->file) {
520 case GRF:
521 case ARF:
522 case MRF:
523 if (reg->smear == -1) {
524 brw_reg = brw_vec8_reg(reg->file, reg->reg, 0);
525 } else {
526 brw_reg = brw_vec1_reg(reg->file, reg->reg, reg->smear);
527 }
528 brw_reg = retype(brw_reg, reg->type);
529 if (reg->sechalf)
530 brw_reg = sechalf(brw_reg);
531 break;
532 case IMM:
533 switch (reg->type) {
534 case BRW_REGISTER_TYPE_F:
535 brw_reg = brw_imm_f(reg->imm.f);
536 break;
537 case BRW_REGISTER_TYPE_D:
538 brw_reg = brw_imm_d(reg->imm.i);
539 break;
540 case BRW_REGISTER_TYPE_UD:
541 brw_reg = brw_imm_ud(reg->imm.u);
542 break;
543 default:
544 assert(!"not reached");
545 brw_reg = brw_null_reg();
546 break;
547 }
548 break;
549 case FIXED_HW_REG:
550 brw_reg = reg->fixed_hw_reg;
551 break;
552 case BAD_FILE:
553 /* Probably unused. */
554 brw_reg = brw_null_reg();
555 break;
556 case UNIFORM:
557 assert(!"not reached");
558 brw_reg = brw_null_reg();
559 break;
560 default:
561 assert(!"not reached");
562 brw_reg = brw_null_reg();
563 break;
564 }
565 if (reg->abs)
566 brw_reg = brw_abs(brw_reg);
567 if (reg->negate)
568 brw_reg = negate(brw_reg);
569
570 return brw_reg;
571 }
572
573 void
574 fs_visitor::generate_code()
575 {
576 int last_native_inst = p->nr_insn;
577 const char *last_annotation_string = NULL;
578 ir_instruction *last_annotation_ir = NULL;
579
580 int loop_stack_array_size = 16;
581 int loop_stack_depth = 0;
582 brw_instruction **loop_stack =
583 rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
584 int *if_depth_in_loop =
585 rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
586
587
588 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
589 printf("Native code for fragment shader %d (%d-wide dispatch):\n",
590 prog->Name, c->dispatch_width);
591 }
592
593 foreach_list(node, &this->instructions) {
594 fs_inst *inst = (fs_inst *)node;
595 struct brw_reg src[3], dst;
596
597 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
598 if (last_annotation_ir != inst->ir) {
599 last_annotation_ir = inst->ir;
600 if (last_annotation_ir) {
601 printf(" ");
602 last_annotation_ir->print();
603 printf("\n");
604 }
605 }
606 if (last_annotation_string != inst->annotation) {
607 last_annotation_string = inst->annotation;
608 if (last_annotation_string)
609 printf(" %s\n", last_annotation_string);
610 }
611 }
612
613 for (unsigned int i = 0; i < 3; i++) {
614 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
615 }
616 dst = brw_reg_from_fs_reg(&inst->dst);
617
618 brw_set_conditionalmod(p, inst->conditional_mod);
619 brw_set_predicate_control(p, inst->predicated);
620 brw_set_predicate_inverse(p, inst->predicate_inverse);
621 brw_set_saturate(p, inst->saturate);
622
623 if (inst->force_uncompressed || c->dispatch_width == 8) {
624 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
625 } else if (inst->force_sechalf) {
626 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
627 } else {
628 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
629 }
630
631 switch (inst->opcode) {
632 case BRW_OPCODE_MOV:
633 brw_MOV(p, dst, src[0]);
634 break;
635 case BRW_OPCODE_ADD:
636 brw_ADD(p, dst, src[0], src[1]);
637 break;
638 case BRW_OPCODE_MUL:
639 brw_MUL(p, dst, src[0], src[1]);
640 break;
641 case BRW_OPCODE_MACH:
642 brw_set_acc_write_control(p, 1);
643 brw_MACH(p, dst, src[0], src[1]);
644 brw_set_acc_write_control(p, 0);
645 break;
646
647 case BRW_OPCODE_FRC:
648 brw_FRC(p, dst, src[0]);
649 break;
650 case BRW_OPCODE_RNDD:
651 brw_RNDD(p, dst, src[0]);
652 break;
653 case BRW_OPCODE_RNDE:
654 brw_RNDE(p, dst, src[0]);
655 break;
656 case BRW_OPCODE_RNDZ:
657 brw_RNDZ(p, dst, src[0]);
658 break;
659
660 case BRW_OPCODE_AND:
661 brw_AND(p, dst, src[0], src[1]);
662 break;
663 case BRW_OPCODE_OR:
664 brw_OR(p, dst, src[0], src[1]);
665 break;
666 case BRW_OPCODE_XOR:
667 brw_XOR(p, dst, src[0], src[1]);
668 break;
669 case BRW_OPCODE_NOT:
670 brw_NOT(p, dst, src[0]);
671 break;
672 case BRW_OPCODE_ASR:
673 brw_ASR(p, dst, src[0], src[1]);
674 break;
675 case BRW_OPCODE_SHR:
676 brw_SHR(p, dst, src[0], src[1]);
677 break;
678 case BRW_OPCODE_SHL:
679 brw_SHL(p, dst, src[0], src[1]);
680 break;
681
682 case BRW_OPCODE_CMP:
683 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
684 break;
685 case BRW_OPCODE_SEL:
686 brw_SEL(p, dst, src[0], src[1]);
687 break;
688
689 case BRW_OPCODE_IF:
690 if (inst->src[0].file != BAD_FILE) {
691 /* The instruction has an embedded compare (only allowed on gen6) */
692 assert(intel->gen == 6);
693 gen6_IF(p, inst->conditional_mod, src[0], src[1]);
694 } else {
695 brw_IF(p, c->dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);
696 }
697 if_depth_in_loop[loop_stack_depth]++;
698 break;
699
700 case BRW_OPCODE_ELSE:
701 brw_ELSE(p);
702 break;
703 case BRW_OPCODE_ENDIF:
704 brw_ENDIF(p);
705 if_depth_in_loop[loop_stack_depth]--;
706 break;
707
708 case BRW_OPCODE_DO:
709 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
710 if (loop_stack_array_size <= loop_stack_depth) {
711 loop_stack_array_size *= 2;
712 loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
713 loop_stack_array_size);
714 if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
715 loop_stack_array_size);
716 }
717 if_depth_in_loop[loop_stack_depth] = 0;
718 break;
719
720 case BRW_OPCODE_BREAK:
721 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
722 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
723 break;
724 case BRW_OPCODE_CONTINUE:
725 /* FINISHME: We need to write the loop instruction support still. */
726 if (intel->gen >= 6)
727 gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
728 else
729 brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
730 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
731 break;
732
733 case BRW_OPCODE_WHILE: {
734 struct brw_instruction *inst0, *inst1;
735 GLuint br = 1;
736
737 if (intel->gen >= 5)
738 br = 2;
739
740 assert(loop_stack_depth > 0);
741 loop_stack_depth--;
742 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
743 if (intel->gen < 6) {
744 /* patch all the BREAK/CONT instructions from last BGNLOOP */
745 while (inst0 > loop_stack[loop_stack_depth]) {
746 inst0--;
747 if (inst0->header.opcode == BRW_OPCODE_BREAK &&
748 inst0->bits3.if_else.jump_count == 0) {
749 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
750 }
751 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
752 inst0->bits3.if_else.jump_count == 0) {
753 inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
754 }
755 }
756 }
757 }
758 break;
759
760 case SHADER_OPCODE_RCP:
761 case SHADER_OPCODE_RSQ:
762 case SHADER_OPCODE_SQRT:
763 case SHADER_OPCODE_EXP2:
764 case SHADER_OPCODE_LOG2:
765 case SHADER_OPCODE_POW:
766 case SHADER_OPCODE_SIN:
767 case SHADER_OPCODE_COS:
768 generate_math(inst, dst, src);
769 break;
770 case FS_OPCODE_PIXEL_X:
771 generate_pixel_xy(dst, true);
772 break;
773 case FS_OPCODE_PIXEL_Y:
774 generate_pixel_xy(dst, false);
775 break;
776 case FS_OPCODE_CINTERP:
777 brw_MOV(p, dst, src[0]);
778 break;
779 case FS_OPCODE_LINTERP:
780 generate_linterp(inst, dst, src);
781 break;
782 case FS_OPCODE_TEX:
783 case FS_OPCODE_TXB:
784 case FS_OPCODE_TXD:
785 case FS_OPCODE_TXL:
786 case FS_OPCODE_TXS:
787 generate_tex(inst, dst, src[0]);
788 break;
789 case FS_OPCODE_DISCARD:
790 generate_discard(inst);
791 break;
792 case FS_OPCODE_DDX:
793 generate_ddx(inst, dst, src[0]);
794 break;
795 case FS_OPCODE_DDY:
796 generate_ddy(inst, dst, src[0]);
797 break;
798
799 case FS_OPCODE_SPILL:
800 generate_spill(inst, src[0]);
801 break;
802
803 case FS_OPCODE_UNSPILL:
804 generate_unspill(inst, dst);
805 break;
806
807 case FS_OPCODE_PULL_CONSTANT_LOAD:
808 generate_pull_constant_load(inst, dst);
809 break;
810
811 case FS_OPCODE_FB_WRITE:
812 generate_fb_write(inst);
813 break;
814 default:
815 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
816 _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
817 brw_opcodes[inst->opcode].name);
818 } else {
819 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
820 }
821 fail("unsupported opcode in FS\n");
822 }
823
824 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
825 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
826 if (0) {
827 printf("0x%08x 0x%08x 0x%08x 0x%08x ",
828 ((uint32_t *)&p->store[i])[3],
829 ((uint32_t *)&p->store[i])[2],
830 ((uint32_t *)&p->store[i])[1],
831 ((uint32_t *)&p->store[i])[0]);
832 }
833 brw_disasm(stdout, &p->store[i], intel->gen);
834 }
835 }
836
837 last_native_inst = p->nr_insn;
838 }
839
840 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
841 printf("\n");
842 }
843
844 ralloc_free(loop_stack);
845 ralloc_free(if_depth_in_loop);
846
847 brw_set_uip_jip(p);
848
849 /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS
850 * emit issues, it doesn't get the jump distances into the output,
851 * which is often something we want to debug. So this is here in
852 * case you're doing that.
853 */
854 if (0) {
855 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
856 for (unsigned int i = 0; i < p->nr_insn; i++) {
857 printf("0x%08x 0x%08x 0x%08x 0x%08x ",
858 ((uint32_t *)&p->store[i])[3],
859 ((uint32_t *)&p->store[i])[2],
860 ((uint32_t *)&p->store[i])[1],
861 ((uint32_t *)&p->store[i])[0]);
862 brw_disasm(stdout, &p->store[i], intel->gen);
863 }
864 }
865 }
866 }