8958aeb8153f02fbbcfaeca84d4ecaa581b9eb99
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_emit.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_fs_emit.cpp
25 *
26 * This file supports emitting code from the FS LIR to the actual
27 * native instructions.
28 */
29
30 extern "C" {
31 #include "main/macros.h"
32 #include "brw_context.h"
33 #include "brw_eu.h"
34 } /* extern "C" */
35
36 #include "brw_fs.h"
37 #include "../glsl/ir_print_visitor.h"
38
39 void
40 fs_visitor::generate_fb_write(fs_inst *inst)
41 {
42 GLboolean eot = inst->eot;
43 struct brw_reg implied_header;
44
45 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
46 * move, here's g1.
47 */
48 brw_push_insn_state(p);
49 brw_set_mask_control(p, BRW_MASK_DISABLE);
50 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
51
52 if (inst->header_present) {
53 if (intel->gen >= 6) {
54 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
55 brw_MOV(p,
56 retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
57 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
58 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
59
60 if (inst->target > 0) {
61 /* Set the render target index for choosing BLEND_STATE. */
62 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2),
63 BRW_REGISTER_TYPE_UD),
64 brw_imm_ud(inst->target));
65 }
66
67 implied_header = brw_null_reg();
68 } else {
69 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
70
71 brw_MOV(p,
72 brw_message_reg(inst->base_mrf + 1),
73 brw_vec8_grf(1, 0));
74 }
75 } else {
76 implied_header = brw_null_reg();
77 }
78
79 brw_pop_insn_state(p);
80
81 brw_fb_WRITE(p,
82 c->dispatch_width,
83 inst->base_mrf,
84 implied_header,
85 inst->target,
86 inst->mlen,
87 0,
88 eot,
89 inst->header_present);
90 }
91
92 /* Computes the integer pixel x,y values from the origin.
93 *
94 * This is the basis of gl_FragCoord computation, but is also used
95 * pre-gen6 for computing the deltas from v0 for computing
96 * interpolation.
97 */
98 void
99 fs_visitor::generate_pixel_xy(struct brw_reg dst, bool is_x)
100 {
101 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
102 struct brw_reg src;
103 struct brw_reg deltas;
104
105 if (is_x) {
106 src = stride(suboffset(g1_uw, 4), 2, 4, 0);
107 deltas = brw_imm_v(0x10101010);
108 } else {
109 src = stride(suboffset(g1_uw, 5), 2, 4, 0);
110 deltas = brw_imm_v(0x11001100);
111 }
112
113 if (c->dispatch_width == 16) {
114 dst = vec16(dst);
115 }
116
117 /* We do this 8 or 16-wide, but since the destination is UW we
118 * don't do compression in the 16-wide case.
119 */
120 brw_push_insn_state(p);
121 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
122 brw_ADD(p, dst, src, deltas);
123 brw_pop_insn_state(p);
124 }
125
126 void
127 fs_visitor::generate_linterp(fs_inst *inst,
128 struct brw_reg dst, struct brw_reg *src)
129 {
130 struct brw_reg delta_x = src[0];
131 struct brw_reg delta_y = src[1];
132 struct brw_reg interp = src[2];
133
134 if (brw->has_pln &&
135 delta_y.nr == delta_x.nr + 1 &&
136 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
137 brw_PLN(p, dst, interp, delta_x);
138 } else {
139 brw_LINE(p, brw_null_reg(), interp, delta_x);
140 brw_MAC(p, dst, suboffset(interp, 1), delta_y);
141 }
142 }
143
144 void
145 fs_visitor::generate_math(fs_inst *inst,
146 struct brw_reg dst, struct brw_reg *src)
147 {
148 int op;
149
150 switch (inst->opcode) {
151 case FS_OPCODE_RCP:
152 op = BRW_MATH_FUNCTION_INV;
153 break;
154 case FS_OPCODE_RSQ:
155 op = BRW_MATH_FUNCTION_RSQ;
156 break;
157 case FS_OPCODE_SQRT:
158 op = BRW_MATH_FUNCTION_SQRT;
159 break;
160 case FS_OPCODE_EXP2:
161 op = BRW_MATH_FUNCTION_EXP;
162 break;
163 case FS_OPCODE_LOG2:
164 op = BRW_MATH_FUNCTION_LOG;
165 break;
166 case FS_OPCODE_POW:
167 op = BRW_MATH_FUNCTION_POW;
168 break;
169 case FS_OPCODE_SIN:
170 op = BRW_MATH_FUNCTION_SIN;
171 break;
172 case FS_OPCODE_COS:
173 op = BRW_MATH_FUNCTION_COS;
174 break;
175 default:
176 assert(!"not reached: unknown math function");
177 op = 0;
178 break;
179 }
180
181 if (intel->gen >= 6) {
182 assert(inst->mlen == 0);
183
184 if (inst->opcode == FS_OPCODE_POW) {
185 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
186 brw_math2(p, dst, op, src[0], src[1]);
187
188 if (c->dispatch_width == 16) {
189 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
190 brw_math2(p, sechalf(dst), op, sechalf(src[0]), sechalf(src[1]));
191 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
192 }
193 } else {
194 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
195 brw_math(p, dst,
196 op,
197 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
198 BRW_MATH_SATURATE_NONE,
199 0, src[0],
200 BRW_MATH_DATA_VECTOR,
201 BRW_MATH_PRECISION_FULL);
202
203 if (c->dispatch_width == 16) {
204 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
205 brw_math(p, sechalf(dst),
206 op,
207 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
208 BRW_MATH_SATURATE_NONE,
209 0, sechalf(src[0]),
210 BRW_MATH_DATA_VECTOR,
211 BRW_MATH_PRECISION_FULL);
212 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
213 }
214 }
215 } else /* gen <= 5 */{
216 assert(inst->mlen >= 1);
217
218 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
219 brw_math(p, dst,
220 op,
221 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
222 BRW_MATH_SATURATE_NONE,
223 inst->base_mrf, src[0],
224 BRW_MATH_DATA_VECTOR,
225 BRW_MATH_PRECISION_FULL);
226
227 if (c->dispatch_width == 16) {
228 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
229 brw_math(p, sechalf(dst),
230 op,
231 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
232 BRW_MATH_SATURATE_NONE,
233 inst->base_mrf + 1, sechalf(src[0]),
234 BRW_MATH_DATA_VECTOR,
235 BRW_MATH_PRECISION_FULL);
236
237 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
238 }
239 }
240 }
241
242 void
243 fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
244 {
245 int msg_type = -1;
246 int rlen = 4;
247 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
248
249 if (c->dispatch_width == 16)
250 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
251
252 if (intel->gen >= 5) {
253 switch (inst->opcode) {
254 case FS_OPCODE_TEX:
255 if (inst->shadow_compare) {
256 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
257 } else {
258 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
259 }
260 break;
261 case FS_OPCODE_TXB:
262 if (inst->shadow_compare) {
263 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
264 } else {
265 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
266 }
267 break;
268 case FS_OPCODE_TXL:
269 if (inst->shadow_compare) {
270 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
271 } else {
272 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
273 }
274 break;
275 case FS_OPCODE_TXD:
276 assert(!"TXD isn't supported on gen5+ yet.");
277 break;
278 }
279 } else {
280 switch (inst->opcode) {
281 case FS_OPCODE_TEX:
282 /* Note that G45 and older determines shadow compare and dispatch width
283 * from message length for most messages.
284 */
285 assert(c->dispatch_width == 8);
286 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
287 if (inst->shadow_compare) {
288 assert(inst->mlen == 6);
289 } else {
290 assert(inst->mlen <= 4);
291 }
292 break;
293 case FS_OPCODE_TXB:
294 if (inst->shadow_compare) {
295 assert(inst->mlen == 6);
296 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
297 } else {
298 assert(inst->mlen == 9);
299 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
300 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
301 }
302 break;
303 case FS_OPCODE_TXL:
304 if (inst->shadow_compare) {
305 assert(inst->mlen == 6);
306 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
307 } else {
308 assert(inst->mlen == 9);
309 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD;
310 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
311 }
312 break;
313 case FS_OPCODE_TXD:
314 assert(!"TXD isn't supported on gen4 yet.");
315 break;
316 }
317 }
318 assert(msg_type != -1);
319
320 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
321 rlen = 8;
322 dst = vec16(dst);
323 }
324
325 brw_SAMPLE(p,
326 retype(dst, BRW_REGISTER_TYPE_UW),
327 inst->base_mrf,
328 src,
329 SURF_INDEX_TEXTURE(inst->sampler),
330 inst->sampler,
331 WRITEMASK_XYZW,
332 msg_type,
333 rlen,
334 inst->mlen,
335 0,
336 inst->header_present,
337 simd_mode);
338 }
339
340
341 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
342 * looking like:
343 *
344 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
345 *
346 * and we're trying to produce:
347 *
348 * DDX DDY
349 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
350 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
351 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
352 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
353 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
354 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
355 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
356 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
357 *
358 * and add another set of two more subspans if in 16-pixel dispatch mode.
359 *
360 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
361 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
362 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
363 * between each other. We could probably do it like ddx and swizzle the right
364 * order later, but bail for now and just produce
365 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
366 */
367 void
368 fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
369 {
370 struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
371 BRW_REGISTER_TYPE_F,
372 BRW_VERTICAL_STRIDE_2,
373 BRW_WIDTH_2,
374 BRW_HORIZONTAL_STRIDE_0,
375 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
376 struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
377 BRW_REGISTER_TYPE_F,
378 BRW_VERTICAL_STRIDE_2,
379 BRW_WIDTH_2,
380 BRW_HORIZONTAL_STRIDE_0,
381 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
382 brw_ADD(p, dst, src0, negate(src1));
383 }
384
385 void
386 fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
387 {
388 struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
389 BRW_REGISTER_TYPE_F,
390 BRW_VERTICAL_STRIDE_4,
391 BRW_WIDTH_4,
392 BRW_HORIZONTAL_STRIDE_0,
393 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
394 struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
395 BRW_REGISTER_TYPE_F,
396 BRW_VERTICAL_STRIDE_4,
397 BRW_WIDTH_4,
398 BRW_HORIZONTAL_STRIDE_0,
399 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
400 brw_ADD(p, dst, src0, negate(src1));
401 }
402
403 void
404 fs_visitor::generate_discard(fs_inst *inst)
405 {
406 struct brw_reg f0 = brw_flag_reg();
407
408 if (intel->gen >= 6) {
409 struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
410 struct brw_reg some_register;
411
412 /* As of gen6, we no longer have the mask register to look at,
413 * so life gets a bit more complicated.
414 */
415
416 /* Load the flag register with all ones. */
417 brw_push_insn_state(p);
418 brw_set_mask_control(p, BRW_MASK_DISABLE);
419 brw_MOV(p, f0, brw_imm_uw(0xffff));
420 brw_pop_insn_state(p);
421
422 /* Do a comparison that should always fail, to produce 0s in the flag
423 * reg where we have active channels.
424 */
425 some_register = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
426 brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
427 BRW_CONDITIONAL_NZ, some_register, some_register);
428
429 /* Undo CMP's whacking of predication*/
430 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
431
432 brw_push_insn_state(p);
433 brw_set_mask_control(p, BRW_MASK_DISABLE);
434 brw_AND(p, g1, f0, g1);
435 brw_pop_insn_state(p);
436 } else {
437 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
438
439 brw_push_insn_state(p);
440 brw_set_mask_control(p, BRW_MASK_DISABLE);
441 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
442
443 /* Unlike the 965, we have the mask reg, so we just need
444 * somewhere to invert that (containing channels to be disabled)
445 * so it can be ANDed with the mask of pixels still to be
446 * written. Use the flag reg for consistency with gen6+.
447 */
448 brw_NOT(p, f0, brw_mask_reg(1)); /* IMASK */
449 brw_AND(p, g0, f0, g0);
450
451 brw_pop_insn_state(p);
452 }
453 }
454
455 void
456 fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src)
457 {
458 assert(inst->mlen != 0);
459
460 brw_MOV(p,
461 retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD),
462 retype(src, BRW_REGISTER_TYPE_UD));
463 brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1,
464 inst->offset);
465 }
466
467 void
468 fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst)
469 {
470 assert(inst->mlen != 0);
471
472 /* Clear any post destination dependencies that would be ignored by
473 * the block read. See the B-Spec for pre-gen5 send instruction.
474 *
475 * This could use a better solution, since texture sampling and
476 * math reads could potentially run into it as well -- anywhere
477 * that we have a SEND with a destination that is a register that
478 * was written but not read within the last N instructions (what's
479 * N? unsure). This is rare because of dead code elimination, but
480 * not impossible.
481 */
482 if (intel->gen == 4 && !intel->is_g4x)
483 brw_MOV(p, brw_null_reg(), dst);
484
485 brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1,
486 inst->offset);
487
488 if (intel->gen == 4 && !intel->is_g4x) {
489 /* gen4 errata: destination from a send can't be used as a
490 * destination until it's been read. Just read it so we don't
491 * have to worry.
492 */
493 brw_MOV(p, brw_null_reg(), dst);
494 }
495 }
496
497 void
498 fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
499 {
500 assert(inst->mlen != 0);
501
502 /* Clear any post destination dependencies that would be ignored by
503 * the block read. See the B-Spec for pre-gen5 send instruction.
504 *
505 * This could use a better solution, since texture sampling and
506 * math reads could potentially run into it as well -- anywhere
507 * that we have a SEND with a destination that is a register that
508 * was written but not read within the last N instructions (what's
509 * N? unsure). This is rare because of dead code elimination, but
510 * not impossible.
511 */
512 if (intel->gen == 4 && !intel->is_g4x)
513 brw_MOV(p, brw_null_reg(), dst);
514
515 brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
516 inst->offset, SURF_INDEX_FRAG_CONST_BUFFER);
517
518 if (intel->gen == 4 && !intel->is_g4x) {
519 /* gen4 errata: destination from a send can't be used as a
520 * destination until it's been read. Just read it so we don't
521 * have to worry.
522 */
523 brw_MOV(p, brw_null_reg(), dst);
524 }
525 }
526
527 static struct brw_reg
528 brw_reg_from_fs_reg(fs_reg *reg)
529 {
530 struct brw_reg brw_reg;
531
532 switch (reg->file) {
533 case GRF:
534 case ARF:
535 case MRF:
536 if (reg->smear == -1) {
537 brw_reg = brw_vec8_reg(reg->file,
538 reg->hw_reg, 0);
539 } else {
540 brw_reg = brw_vec1_reg(reg->file,
541 reg->hw_reg, reg->smear);
542 }
543 brw_reg = retype(brw_reg, reg->type);
544 if (reg->sechalf)
545 brw_reg = sechalf(brw_reg);
546 break;
547 case IMM:
548 switch (reg->type) {
549 case BRW_REGISTER_TYPE_F:
550 brw_reg = brw_imm_f(reg->imm.f);
551 break;
552 case BRW_REGISTER_TYPE_D:
553 brw_reg = brw_imm_d(reg->imm.i);
554 break;
555 case BRW_REGISTER_TYPE_UD:
556 brw_reg = brw_imm_ud(reg->imm.u);
557 break;
558 default:
559 assert(!"not reached");
560 brw_reg = brw_null_reg();
561 break;
562 }
563 break;
564 case FIXED_HW_REG:
565 brw_reg = reg->fixed_hw_reg;
566 break;
567 case BAD_FILE:
568 /* Probably unused. */
569 brw_reg = brw_null_reg();
570 break;
571 case UNIFORM:
572 assert(!"not reached");
573 brw_reg = brw_null_reg();
574 break;
575 default:
576 assert(!"not reached");
577 brw_reg = brw_null_reg();
578 break;
579 }
580 if (reg->abs)
581 brw_reg = brw_abs(brw_reg);
582 if (reg->negate)
583 brw_reg = negate(brw_reg);
584
585 return brw_reg;
586 }
587
588 void
589 fs_visitor::generate_code()
590 {
591 int last_native_inst = p->nr_insn;
592 const char *last_annotation_string = NULL;
593 ir_instruction *last_annotation_ir = NULL;
594
595 int loop_stack_array_size = 16;
596 int loop_stack_depth = 0;
597 brw_instruction **loop_stack =
598 rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
599 int *if_depth_in_loop =
600 rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
601
602
603 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
604 printf("Native code for fragment shader %d (%d-wide dispatch):\n",
605 ctx->Shader.CurrentFragmentProgram->Name, c->dispatch_width);
606 }
607
608 foreach_iter(exec_list_iterator, iter, this->instructions) {
609 fs_inst *inst = (fs_inst *)iter.get();
610 struct brw_reg src[3], dst;
611
612 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
613 if (last_annotation_ir != inst->ir) {
614 last_annotation_ir = inst->ir;
615 if (last_annotation_ir) {
616 printf(" ");
617 last_annotation_ir->print();
618 printf("\n");
619 }
620 }
621 if (last_annotation_string != inst->annotation) {
622 last_annotation_string = inst->annotation;
623 if (last_annotation_string)
624 printf(" %s\n", last_annotation_string);
625 }
626 }
627
628 for (unsigned int i = 0; i < 3; i++) {
629 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
630 }
631 dst = brw_reg_from_fs_reg(&inst->dst);
632
633 brw_set_conditionalmod(p, inst->conditional_mod);
634 brw_set_predicate_control(p, inst->predicated);
635 brw_set_predicate_inverse(p, inst->predicate_inverse);
636 brw_set_saturate(p, inst->saturate);
637
638 if (inst->force_uncompressed || c->dispatch_width == 8) {
639 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
640 } else if (inst->force_sechalf) {
641 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
642 } else {
643 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
644 }
645
646 switch (inst->opcode) {
647 case BRW_OPCODE_MOV:
648 brw_MOV(p, dst, src[0]);
649 break;
650 case BRW_OPCODE_ADD:
651 brw_ADD(p, dst, src[0], src[1]);
652 break;
653 case BRW_OPCODE_MUL:
654 brw_MUL(p, dst, src[0], src[1]);
655 break;
656
657 case BRW_OPCODE_FRC:
658 brw_FRC(p, dst, src[0]);
659 break;
660 case BRW_OPCODE_RNDD:
661 brw_RNDD(p, dst, src[0]);
662 break;
663 case BRW_OPCODE_RNDE:
664 brw_RNDE(p, dst, src[0]);
665 break;
666 case BRW_OPCODE_RNDZ:
667 brw_RNDZ(p, dst, src[0]);
668 break;
669
670 case BRW_OPCODE_AND:
671 brw_AND(p, dst, src[0], src[1]);
672 break;
673 case BRW_OPCODE_OR:
674 brw_OR(p, dst, src[0], src[1]);
675 break;
676 case BRW_OPCODE_XOR:
677 brw_XOR(p, dst, src[0], src[1]);
678 break;
679 case BRW_OPCODE_NOT:
680 brw_NOT(p, dst, src[0]);
681 break;
682 case BRW_OPCODE_ASR:
683 brw_ASR(p, dst, src[0], src[1]);
684 break;
685 case BRW_OPCODE_SHR:
686 brw_SHR(p, dst, src[0], src[1]);
687 break;
688 case BRW_OPCODE_SHL:
689 brw_SHL(p, dst, src[0], src[1]);
690 break;
691
692 case BRW_OPCODE_CMP:
693 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
694 break;
695 case BRW_OPCODE_SEL:
696 brw_SEL(p, dst, src[0], src[1]);
697 break;
698
699 case BRW_OPCODE_IF:
700 if (inst->src[0].file != BAD_FILE) {
701 /* The instruction has an embedded compare (only allowed on gen6) */
702 assert(intel->gen == 6);
703 gen6_IF(p, inst->conditional_mod, src[0], src[1]);
704 } else {
705 brw_IF(p, c->dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);
706 }
707 if_depth_in_loop[loop_stack_depth]++;
708 break;
709
710 case BRW_OPCODE_ELSE:
711 brw_ELSE(p);
712 break;
713 case BRW_OPCODE_ENDIF:
714 brw_ENDIF(p);
715 if_depth_in_loop[loop_stack_depth]--;
716 break;
717
718 case BRW_OPCODE_DO:
719 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
720 if (loop_stack_array_size <= loop_stack_depth) {
721 loop_stack_array_size *= 2;
722 loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
723 loop_stack_array_size);
724 if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
725 loop_stack_array_size);
726 }
727 if_depth_in_loop[loop_stack_depth] = 0;
728 break;
729
730 case BRW_OPCODE_BREAK:
731 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
732 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
733 break;
734 case BRW_OPCODE_CONTINUE:
735 /* FINISHME: We need to write the loop instruction support still. */
736 if (intel->gen >= 6)
737 gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
738 else
739 brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
740 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
741 break;
742
743 case BRW_OPCODE_WHILE: {
744 struct brw_instruction *inst0, *inst1;
745 GLuint br = 1;
746
747 if (intel->gen >= 5)
748 br = 2;
749
750 assert(loop_stack_depth > 0);
751 loop_stack_depth--;
752 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
753 if (intel->gen < 6) {
754 /* patch all the BREAK/CONT instructions from last BGNLOOP */
755 while (inst0 > loop_stack[loop_stack_depth]) {
756 inst0--;
757 if (inst0->header.opcode == BRW_OPCODE_BREAK &&
758 inst0->bits3.if_else.jump_count == 0) {
759 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
760 }
761 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
762 inst0->bits3.if_else.jump_count == 0) {
763 inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
764 }
765 }
766 }
767 }
768 break;
769
770 case FS_OPCODE_RCP:
771 case FS_OPCODE_RSQ:
772 case FS_OPCODE_SQRT:
773 case FS_OPCODE_EXP2:
774 case FS_OPCODE_LOG2:
775 case FS_OPCODE_POW:
776 case FS_OPCODE_SIN:
777 case FS_OPCODE_COS:
778 generate_math(inst, dst, src);
779 break;
780 case FS_OPCODE_PIXEL_X:
781 generate_pixel_xy(dst, true);
782 break;
783 case FS_OPCODE_PIXEL_Y:
784 generate_pixel_xy(dst, false);
785 break;
786 case FS_OPCODE_CINTERP:
787 brw_MOV(p, dst, src[0]);
788 break;
789 case FS_OPCODE_LINTERP:
790 generate_linterp(inst, dst, src);
791 break;
792 case FS_OPCODE_TEX:
793 case FS_OPCODE_TXB:
794 case FS_OPCODE_TXD:
795 case FS_OPCODE_TXL:
796 generate_tex(inst, dst, src[0]);
797 break;
798 case FS_OPCODE_DISCARD:
799 generate_discard(inst);
800 break;
801 case FS_OPCODE_DDX:
802 generate_ddx(inst, dst, src[0]);
803 break;
804 case FS_OPCODE_DDY:
805 generate_ddy(inst, dst, src[0]);
806 break;
807
808 case FS_OPCODE_SPILL:
809 generate_spill(inst, src[0]);
810 break;
811
812 case FS_OPCODE_UNSPILL:
813 generate_unspill(inst, dst);
814 break;
815
816 case FS_OPCODE_PULL_CONSTANT_LOAD:
817 generate_pull_constant_load(inst, dst);
818 break;
819
820 case FS_OPCODE_FB_WRITE:
821 generate_fb_write(inst);
822 break;
823 default:
824 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
825 _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
826 brw_opcodes[inst->opcode].name);
827 } else {
828 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
829 }
830 fail("unsupported opcode in FS\n");
831 }
832
833 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
834 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
835 if (0) {
836 printf("0x%08x 0x%08x 0x%08x 0x%08x ",
837 ((uint32_t *)&p->store[i])[3],
838 ((uint32_t *)&p->store[i])[2],
839 ((uint32_t *)&p->store[i])[1],
840 ((uint32_t *)&p->store[i])[0]);
841 }
842 brw_disasm(stdout, &p->store[i], intel->gen);
843 }
844 }
845
846 last_native_inst = p->nr_insn;
847 }
848
849 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
850 printf("\n");
851 }
852
853 ralloc_free(loop_stack);
854 ralloc_free(if_depth_in_loop);
855
856 brw_set_uip_jip(p);
857
858 /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS
859 * emit issues, it doesn't get the jump distances into the output,
860 * which is often something we want to debug. So this is here in
861 * case you're doing that.
862 */
863 if (0) {
864 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
865 for (unsigned int i = 0; i < p->nr_insn; i++) {
866 printf("0x%08x 0x%08x 0x%08x 0x%08x ",
867 ((uint32_t *)&p->store[i])[3],
868 ((uint32_t *)&p->store[i])[2],
869 ((uint32_t *)&p->store[i])[1],
870 ((uint32_t *)&p->store[i])[0]);
871 brw_disasm(stdout, &p->store[i], intel->gen);
872 }
873 }
874 }
875 }