i965: Start adding the VS visitor and codegen.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_emit.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_fs_emit.cpp
25 *
26 * This file supports emitting code from the FS LIR to the actual
27 * native instructions.
28 */
29
30 extern "C" {
31 #include "main/macros.h"
32 #include "brw_context.h"
33 #include "brw_eu.h"
34 } /* extern "C" */
35
36 #include "brw_fs.h"
37 #include "../glsl/ir_print_visitor.h"
38
39 void
40 fs_visitor::generate_fb_write(fs_inst *inst)
41 {
42 GLboolean eot = inst->eot;
43 struct brw_reg implied_header;
44
45 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
46 * move, here's g1.
47 */
48 brw_push_insn_state(p);
49 brw_set_mask_control(p, BRW_MASK_DISABLE);
50 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
51
52 if (inst->header_present) {
53 if (intel->gen >= 6) {
54 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
55 brw_MOV(p,
56 retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
57 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
58 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
59
60 if (inst->target > 0) {
61 /* Set the render target index for choosing BLEND_STATE. */
62 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
63 inst->base_mrf, 2),
64 BRW_REGISTER_TYPE_UD),
65 brw_imm_ud(inst->target));
66 }
67
68 implied_header = brw_null_reg();
69 } else {
70 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
71
72 brw_MOV(p,
73 brw_message_reg(inst->base_mrf + 1),
74 brw_vec8_grf(1, 0));
75 }
76 } else {
77 implied_header = brw_null_reg();
78 }
79
80 brw_pop_insn_state(p);
81
82 brw_fb_WRITE(p,
83 c->dispatch_width,
84 inst->base_mrf,
85 implied_header,
86 inst->target,
87 inst->mlen,
88 0,
89 eot,
90 inst->header_present);
91 }
92
93 /* Computes the integer pixel x,y values from the origin.
94 *
95 * This is the basis of gl_FragCoord computation, but is also used
96 * pre-gen6 for computing the deltas from v0 for computing
97 * interpolation.
98 */
99 void
100 fs_visitor::generate_pixel_xy(struct brw_reg dst, bool is_x)
101 {
102 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
103 struct brw_reg src;
104 struct brw_reg deltas;
105
106 if (is_x) {
107 src = stride(suboffset(g1_uw, 4), 2, 4, 0);
108 deltas = brw_imm_v(0x10101010);
109 } else {
110 src = stride(suboffset(g1_uw, 5), 2, 4, 0);
111 deltas = brw_imm_v(0x11001100);
112 }
113
114 if (c->dispatch_width == 16) {
115 dst = vec16(dst);
116 }
117
118 /* We do this 8 or 16-wide, but since the destination is UW we
119 * don't do compression in the 16-wide case.
120 */
121 brw_push_insn_state(p);
122 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
123 brw_ADD(p, dst, src, deltas);
124 brw_pop_insn_state(p);
125 }
126
127 void
128 fs_visitor::generate_linterp(fs_inst *inst,
129 struct brw_reg dst, struct brw_reg *src)
130 {
131 struct brw_reg delta_x = src[0];
132 struct brw_reg delta_y = src[1];
133 struct brw_reg interp = src[2];
134
135 if (brw->has_pln &&
136 delta_y.nr == delta_x.nr + 1 &&
137 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
138 brw_PLN(p, dst, interp, delta_x);
139 } else {
140 brw_LINE(p, brw_null_reg(), interp, delta_x);
141 brw_MAC(p, dst, suboffset(interp, 1), delta_y);
142 }
143 }
144
145 void
146 fs_visitor::generate_math(fs_inst *inst,
147 struct brw_reg dst, struct brw_reg *src)
148 {
149 int op = brw_math_function(inst->opcode);
150
151 if (intel->gen >= 6) {
152 assert(inst->mlen == 0);
153
154 if (inst->opcode == SHADER_OPCODE_POW) {
155 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
156 brw_math2(p, dst, op, src[0], src[1]);
157
158 if (c->dispatch_width == 16) {
159 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
160 brw_math2(p, sechalf(dst), op, sechalf(src[0]), sechalf(src[1]));
161 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
162 }
163 } else {
164 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
165 brw_math(p, dst,
166 op,
167 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
168 BRW_MATH_SATURATE_NONE,
169 0, src[0],
170 BRW_MATH_DATA_VECTOR,
171 BRW_MATH_PRECISION_FULL);
172
173 if (c->dispatch_width == 16) {
174 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
175 brw_math(p, sechalf(dst),
176 op,
177 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
178 BRW_MATH_SATURATE_NONE,
179 0, sechalf(src[0]),
180 BRW_MATH_DATA_VECTOR,
181 BRW_MATH_PRECISION_FULL);
182 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
183 }
184 }
185 } else /* gen <= 5 */{
186 assert(inst->mlen >= 1);
187
188 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
189 brw_math(p, dst,
190 op,
191 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
192 BRW_MATH_SATURATE_NONE,
193 inst->base_mrf, src[0],
194 BRW_MATH_DATA_VECTOR,
195 BRW_MATH_PRECISION_FULL);
196
197 if (c->dispatch_width == 16) {
198 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
199 brw_math(p, sechalf(dst),
200 op,
201 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
202 BRW_MATH_SATURATE_NONE,
203 inst->base_mrf + 1, sechalf(src[0]),
204 BRW_MATH_DATA_VECTOR,
205 BRW_MATH_PRECISION_FULL);
206
207 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
208 }
209 }
210 }
211
212 void
213 fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
214 {
215 int msg_type = -1;
216 int rlen = 4;
217 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
218
219 if (c->dispatch_width == 16)
220 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
221
222 if (intel->gen >= 5) {
223 switch (inst->opcode) {
224 case FS_OPCODE_TEX:
225 if (inst->shadow_compare) {
226 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
227 } else {
228 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
229 }
230 break;
231 case FS_OPCODE_TXB:
232 if (inst->shadow_compare) {
233 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
234 } else {
235 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
236 }
237 break;
238 case FS_OPCODE_TXL:
239 if (inst->shadow_compare) {
240 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
241 } else {
242 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
243 }
244 break;
245 case FS_OPCODE_TXD:
246 /* There is no sample_d_c message; comparisons are done manually */
247 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
248 break;
249 default:
250 assert(!"not reached");
251 break;
252 }
253 } else {
254 switch (inst->opcode) {
255 case FS_OPCODE_TEX:
256 /* Note that G45 and older determines shadow compare and dispatch width
257 * from message length for most messages.
258 */
259 assert(c->dispatch_width == 8);
260 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
261 if (inst->shadow_compare) {
262 assert(inst->mlen == 6);
263 } else {
264 assert(inst->mlen <= 4);
265 }
266 break;
267 case FS_OPCODE_TXB:
268 if (inst->shadow_compare) {
269 assert(inst->mlen == 6);
270 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
271 } else {
272 assert(inst->mlen == 9);
273 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
274 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
275 }
276 break;
277 case FS_OPCODE_TXL:
278 if (inst->shadow_compare) {
279 assert(inst->mlen == 6);
280 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
281 } else {
282 assert(inst->mlen == 9);
283 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD;
284 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
285 }
286 break;
287 case FS_OPCODE_TXD:
288 /* There is no sample_d_c message; comparisons are done manually */
289 assert(inst->mlen == 7 || inst->mlen == 10);
290 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
291 break;
292 default:
293 assert(!"not reached");
294 break;
295 }
296 }
297 assert(msg_type != -1);
298
299 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
300 rlen = 8;
301 dst = vec16(dst);
302 }
303
304 brw_SAMPLE(p,
305 retype(dst, BRW_REGISTER_TYPE_UW),
306 inst->base_mrf,
307 src,
308 SURF_INDEX_TEXTURE(inst->sampler),
309 inst->sampler,
310 WRITEMASK_XYZW,
311 msg_type,
312 rlen,
313 inst->mlen,
314 0,
315 inst->header_present,
316 simd_mode);
317 }
318
319
320 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
321 * looking like:
322 *
323 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
324 *
325 * and we're trying to produce:
326 *
327 * DDX DDY
328 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
329 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
330 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
331 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
332 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
333 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
334 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
335 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
336 *
337 * and add another set of two more subspans if in 16-pixel dispatch mode.
338 *
339 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
340 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
341 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
342 * between each other. We could probably do it like ddx and swizzle the right
343 * order later, but bail for now and just produce
344 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
345 */
346 void
347 fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
348 {
349 struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
350 BRW_REGISTER_TYPE_F,
351 BRW_VERTICAL_STRIDE_2,
352 BRW_WIDTH_2,
353 BRW_HORIZONTAL_STRIDE_0,
354 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
355 struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
356 BRW_REGISTER_TYPE_F,
357 BRW_VERTICAL_STRIDE_2,
358 BRW_WIDTH_2,
359 BRW_HORIZONTAL_STRIDE_0,
360 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
361 brw_ADD(p, dst, src0, negate(src1));
362 }
363
364 void
365 fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
366 {
367 struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
368 BRW_REGISTER_TYPE_F,
369 BRW_VERTICAL_STRIDE_4,
370 BRW_WIDTH_4,
371 BRW_HORIZONTAL_STRIDE_0,
372 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
373 struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
374 BRW_REGISTER_TYPE_F,
375 BRW_VERTICAL_STRIDE_4,
376 BRW_WIDTH_4,
377 BRW_HORIZONTAL_STRIDE_0,
378 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
379 brw_ADD(p, dst, src0, negate(src1));
380 }
381
382 void
383 fs_visitor::generate_discard(fs_inst *inst)
384 {
385 struct brw_reg f0 = brw_flag_reg();
386
387 if (intel->gen >= 6) {
388 struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
389 struct brw_reg some_register;
390
391 /* As of gen6, we no longer have the mask register to look at,
392 * so life gets a bit more complicated.
393 */
394
395 /* Load the flag register with all ones. */
396 brw_push_insn_state(p);
397 brw_set_mask_control(p, BRW_MASK_DISABLE);
398 brw_MOV(p, f0, brw_imm_uw(0xffff));
399 brw_pop_insn_state(p);
400
401 /* Do a comparison that should always fail, to produce 0s in the flag
402 * reg where we have active channels.
403 */
404 some_register = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
405 brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
406 BRW_CONDITIONAL_NZ, some_register, some_register);
407
408 /* Undo CMP's whacking of predication*/
409 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
410
411 brw_push_insn_state(p);
412 brw_set_mask_control(p, BRW_MASK_DISABLE);
413 brw_AND(p, g1, f0, g1);
414 brw_pop_insn_state(p);
415 } else {
416 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
417
418 brw_push_insn_state(p);
419 brw_set_mask_control(p, BRW_MASK_DISABLE);
420 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
421
422 /* Unlike the 965, we have the mask reg, so we just need
423 * somewhere to invert that (containing channels to be disabled)
424 * so it can be ANDed with the mask of pixels still to be
425 * written. Use the flag reg for consistency with gen6+.
426 */
427 brw_NOT(p, f0, brw_mask_reg(1)); /* IMASK */
428 brw_AND(p, g0, f0, g0);
429
430 brw_pop_insn_state(p);
431 }
432 }
433
434 void
435 fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src)
436 {
437 assert(inst->mlen != 0);
438
439 brw_MOV(p,
440 retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD),
441 retype(src, BRW_REGISTER_TYPE_UD));
442 brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1,
443 inst->offset);
444 }
445
446 void
447 fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst)
448 {
449 assert(inst->mlen != 0);
450
451 /* Clear any post destination dependencies that would be ignored by
452 * the block read. See the B-Spec for pre-gen5 send instruction.
453 *
454 * This could use a better solution, since texture sampling and
455 * math reads could potentially run into it as well -- anywhere
456 * that we have a SEND with a destination that is a register that
457 * was written but not read within the last N instructions (what's
458 * N? unsure). This is rare because of dead code elimination, but
459 * not impossible.
460 */
461 if (intel->gen == 4 && !intel->is_g4x)
462 brw_MOV(p, brw_null_reg(), dst);
463
464 brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1,
465 inst->offset);
466
467 if (intel->gen == 4 && !intel->is_g4x) {
468 /* gen4 errata: destination from a send can't be used as a
469 * destination until it's been read. Just read it so we don't
470 * have to worry.
471 */
472 brw_MOV(p, brw_null_reg(), dst);
473 }
474 }
475
476 void
477 fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
478 {
479 assert(inst->mlen != 0);
480
481 /* Clear any post destination dependencies that would be ignored by
482 * the block read. See the B-Spec for pre-gen5 send instruction.
483 *
484 * This could use a better solution, since texture sampling and
485 * math reads could potentially run into it as well -- anywhere
486 * that we have a SEND with a destination that is a register that
487 * was written but not read within the last N instructions (what's
488 * N? unsure). This is rare because of dead code elimination, but
489 * not impossible.
490 */
491 if (intel->gen == 4 && !intel->is_g4x)
492 brw_MOV(p, brw_null_reg(), dst);
493
494 brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
495 inst->offset, SURF_INDEX_FRAG_CONST_BUFFER);
496
497 if (intel->gen == 4 && !intel->is_g4x) {
498 /* gen4 errata: destination from a send can't be used as a
499 * destination until it's been read. Just read it so we don't
500 * have to worry.
501 */
502 brw_MOV(p, brw_null_reg(), dst);
503 }
504 }
505
506 static struct brw_reg
507 brw_reg_from_fs_reg(fs_reg *reg)
508 {
509 struct brw_reg brw_reg;
510
511 switch (reg->file) {
512 case GRF:
513 case ARF:
514 case MRF:
515 if (reg->smear == -1) {
516 brw_reg = brw_vec8_reg(reg->file, reg->reg, 0);
517 } else {
518 brw_reg = brw_vec1_reg(reg->file, reg->reg, reg->smear);
519 }
520 brw_reg = retype(brw_reg, reg->type);
521 if (reg->sechalf)
522 brw_reg = sechalf(brw_reg);
523 break;
524 case IMM:
525 switch (reg->type) {
526 case BRW_REGISTER_TYPE_F:
527 brw_reg = brw_imm_f(reg->imm.f);
528 break;
529 case BRW_REGISTER_TYPE_D:
530 brw_reg = brw_imm_d(reg->imm.i);
531 break;
532 case BRW_REGISTER_TYPE_UD:
533 brw_reg = brw_imm_ud(reg->imm.u);
534 break;
535 default:
536 assert(!"not reached");
537 brw_reg = brw_null_reg();
538 break;
539 }
540 break;
541 case FIXED_HW_REG:
542 brw_reg = reg->fixed_hw_reg;
543 break;
544 case BAD_FILE:
545 /* Probably unused. */
546 brw_reg = brw_null_reg();
547 break;
548 case UNIFORM:
549 assert(!"not reached");
550 brw_reg = brw_null_reg();
551 break;
552 default:
553 assert(!"not reached");
554 brw_reg = brw_null_reg();
555 break;
556 }
557 if (reg->abs)
558 brw_reg = brw_abs(brw_reg);
559 if (reg->negate)
560 brw_reg = negate(brw_reg);
561
562 return brw_reg;
563 }
564
565 void
566 fs_visitor::generate_code()
567 {
568 int last_native_inst = p->nr_insn;
569 const char *last_annotation_string = NULL;
570 ir_instruction *last_annotation_ir = NULL;
571
572 int loop_stack_array_size = 16;
573 int loop_stack_depth = 0;
574 brw_instruction **loop_stack =
575 rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
576 int *if_depth_in_loop =
577 rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
578
579
580 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
581 printf("Native code for fragment shader %d (%d-wide dispatch):\n",
582 prog->Name, c->dispatch_width);
583 }
584
585 foreach_list(node, &this->instructions) {
586 fs_inst *inst = (fs_inst *)node;
587 struct brw_reg src[3], dst;
588
589 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
590 if (last_annotation_ir != inst->ir) {
591 last_annotation_ir = inst->ir;
592 if (last_annotation_ir) {
593 printf(" ");
594 last_annotation_ir->print();
595 printf("\n");
596 }
597 }
598 if (last_annotation_string != inst->annotation) {
599 last_annotation_string = inst->annotation;
600 if (last_annotation_string)
601 printf(" %s\n", last_annotation_string);
602 }
603 }
604
605 for (unsigned int i = 0; i < 3; i++) {
606 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
607 }
608 dst = brw_reg_from_fs_reg(&inst->dst);
609
610 brw_set_conditionalmod(p, inst->conditional_mod);
611 brw_set_predicate_control(p, inst->predicated);
612 brw_set_predicate_inverse(p, inst->predicate_inverse);
613 brw_set_saturate(p, inst->saturate);
614
615 if (inst->force_uncompressed || c->dispatch_width == 8) {
616 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
617 } else if (inst->force_sechalf) {
618 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
619 } else {
620 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
621 }
622
623 switch (inst->opcode) {
624 case BRW_OPCODE_MOV:
625 brw_MOV(p, dst, src[0]);
626 break;
627 case BRW_OPCODE_ADD:
628 brw_ADD(p, dst, src[0], src[1]);
629 break;
630 case BRW_OPCODE_MUL:
631 brw_MUL(p, dst, src[0], src[1]);
632 break;
633
634 case BRW_OPCODE_FRC:
635 brw_FRC(p, dst, src[0]);
636 break;
637 case BRW_OPCODE_RNDD:
638 brw_RNDD(p, dst, src[0]);
639 break;
640 case BRW_OPCODE_RNDE:
641 brw_RNDE(p, dst, src[0]);
642 break;
643 case BRW_OPCODE_RNDZ:
644 brw_RNDZ(p, dst, src[0]);
645 break;
646
647 case BRW_OPCODE_AND:
648 brw_AND(p, dst, src[0], src[1]);
649 break;
650 case BRW_OPCODE_OR:
651 brw_OR(p, dst, src[0], src[1]);
652 break;
653 case BRW_OPCODE_XOR:
654 brw_XOR(p, dst, src[0], src[1]);
655 break;
656 case BRW_OPCODE_NOT:
657 brw_NOT(p, dst, src[0]);
658 break;
659 case BRW_OPCODE_ASR:
660 brw_ASR(p, dst, src[0], src[1]);
661 break;
662 case BRW_OPCODE_SHR:
663 brw_SHR(p, dst, src[0], src[1]);
664 break;
665 case BRW_OPCODE_SHL:
666 brw_SHL(p, dst, src[0], src[1]);
667 break;
668
669 case BRW_OPCODE_CMP:
670 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
671 break;
672 case BRW_OPCODE_SEL:
673 brw_SEL(p, dst, src[0], src[1]);
674 break;
675
676 case BRW_OPCODE_IF:
677 if (inst->src[0].file != BAD_FILE) {
678 /* The instruction has an embedded compare (only allowed on gen6) */
679 assert(intel->gen == 6);
680 gen6_IF(p, inst->conditional_mod, src[0], src[1]);
681 } else {
682 brw_IF(p, c->dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);
683 }
684 if_depth_in_loop[loop_stack_depth]++;
685 break;
686
687 case BRW_OPCODE_ELSE:
688 brw_ELSE(p);
689 break;
690 case BRW_OPCODE_ENDIF:
691 brw_ENDIF(p);
692 if_depth_in_loop[loop_stack_depth]--;
693 break;
694
695 case BRW_OPCODE_DO:
696 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
697 if (loop_stack_array_size <= loop_stack_depth) {
698 loop_stack_array_size *= 2;
699 loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
700 loop_stack_array_size);
701 if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
702 loop_stack_array_size);
703 }
704 if_depth_in_loop[loop_stack_depth] = 0;
705 break;
706
707 case BRW_OPCODE_BREAK:
708 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
709 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
710 break;
711 case BRW_OPCODE_CONTINUE:
712 /* FINISHME: We need to write the loop instruction support still. */
713 if (intel->gen >= 6)
714 gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
715 else
716 brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
717 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
718 break;
719
720 case BRW_OPCODE_WHILE: {
721 struct brw_instruction *inst0, *inst1;
722 GLuint br = 1;
723
724 if (intel->gen >= 5)
725 br = 2;
726
727 assert(loop_stack_depth > 0);
728 loop_stack_depth--;
729 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
730 if (intel->gen < 6) {
731 /* patch all the BREAK/CONT instructions from last BGNLOOP */
732 while (inst0 > loop_stack[loop_stack_depth]) {
733 inst0--;
734 if (inst0->header.opcode == BRW_OPCODE_BREAK &&
735 inst0->bits3.if_else.jump_count == 0) {
736 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
737 }
738 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
739 inst0->bits3.if_else.jump_count == 0) {
740 inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
741 }
742 }
743 }
744 }
745 break;
746
747 case SHADER_OPCODE_RCP:
748 case SHADER_OPCODE_RSQ:
749 case SHADER_OPCODE_SQRT:
750 case SHADER_OPCODE_EXP2:
751 case SHADER_OPCODE_LOG2:
752 case SHADER_OPCODE_POW:
753 case SHADER_OPCODE_SIN:
754 case SHADER_OPCODE_COS:
755 generate_math(inst, dst, src);
756 break;
757 case FS_OPCODE_PIXEL_X:
758 generate_pixel_xy(dst, true);
759 break;
760 case FS_OPCODE_PIXEL_Y:
761 generate_pixel_xy(dst, false);
762 break;
763 case FS_OPCODE_CINTERP:
764 brw_MOV(p, dst, src[0]);
765 break;
766 case FS_OPCODE_LINTERP:
767 generate_linterp(inst, dst, src);
768 break;
769 case FS_OPCODE_TEX:
770 case FS_OPCODE_TXB:
771 case FS_OPCODE_TXD:
772 case FS_OPCODE_TXL:
773 generate_tex(inst, dst, src[0]);
774 break;
775 case FS_OPCODE_DISCARD:
776 generate_discard(inst);
777 break;
778 case FS_OPCODE_DDX:
779 generate_ddx(inst, dst, src[0]);
780 break;
781 case FS_OPCODE_DDY:
782 generate_ddy(inst, dst, src[0]);
783 break;
784
785 case FS_OPCODE_SPILL:
786 generate_spill(inst, src[0]);
787 break;
788
789 case FS_OPCODE_UNSPILL:
790 generate_unspill(inst, dst);
791 break;
792
793 case FS_OPCODE_PULL_CONSTANT_LOAD:
794 generate_pull_constant_load(inst, dst);
795 break;
796
797 case FS_OPCODE_FB_WRITE:
798 generate_fb_write(inst);
799 break;
800 default:
801 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
802 _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
803 brw_opcodes[inst->opcode].name);
804 } else {
805 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
806 }
807 fail("unsupported opcode in FS\n");
808 }
809
810 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
811 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
812 if (0) {
813 printf("0x%08x 0x%08x 0x%08x 0x%08x ",
814 ((uint32_t *)&p->store[i])[3],
815 ((uint32_t *)&p->store[i])[2],
816 ((uint32_t *)&p->store[i])[1],
817 ((uint32_t *)&p->store[i])[0]);
818 }
819 brw_disasm(stdout, &p->store[i], intel->gen);
820 }
821 }
822
823 last_native_inst = p->nr_insn;
824 }
825
826 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
827 printf("\n");
828 }
829
830 ralloc_free(loop_stack);
831 ralloc_free(if_depth_in_loop);
832
833 brw_set_uip_jip(p);
834
835 /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS
836 * emit issues, it doesn't get the jump distances into the output,
837 * which is often something we want to debug. So this is here in
838 * case you're doing that.
839 */
840 if (0) {
841 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
842 for (unsigned int i = 0; i < p->nr_insn; i++) {
843 printf("0x%08x 0x%08x 0x%08x 0x%08x ",
844 ((uint32_t *)&p->store[i])[3],
845 ((uint32_t *)&p->store[i])[2],
846 ((uint32_t *)&p->store[i])[1],
847 ((uint32_t *)&p->store[i])[0]);
848 brw_disasm(stdout, &p->store[i], intel->gen);
849 }
850 }
851 }
852 }