i965: Rewrite the HiZ op
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_emit.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_fs_emit.cpp
25 *
26 * This file supports emitting code from the FS LIR to the actual
27 * native instructions.
28 */
29
30 extern "C" {
31 #include "main/macros.h"
32 #include "brw_context.h"
33 #include "brw_eu.h"
34 } /* extern "C" */
35
36 #include "brw_fs.h"
37 #include "glsl/ir_print_visitor.h"
38
39 void
40 fs_visitor::generate_fb_write(fs_inst *inst)
41 {
42 bool eot = inst->eot;
43 struct brw_reg implied_header;
44
45 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
46 * move, here's g1.
47 */
48 brw_push_insn_state(p);
49 brw_set_mask_control(p, BRW_MASK_DISABLE);
50 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
51
52 if (inst->header_present) {
53 if (intel->gen >= 6) {
54 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
55 brw_MOV(p,
56 retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
57 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
58 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
59
60 if (inst->target > 0) {
61 /* Set the render target index for choosing BLEND_STATE. */
62 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
63 inst->base_mrf, 2),
64 BRW_REGISTER_TYPE_UD),
65 brw_imm_ud(inst->target));
66 }
67
68 implied_header = brw_null_reg();
69 } else {
70 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
71
72 brw_MOV(p,
73 brw_message_reg(inst->base_mrf + 1),
74 brw_vec8_grf(1, 0));
75 }
76 } else {
77 implied_header = brw_null_reg();
78 }
79
80 brw_pop_insn_state(p);
81
82 brw_fb_WRITE(p,
83 c->dispatch_width,
84 inst->base_mrf,
85 implied_header,
86 inst->target,
87 inst->mlen,
88 0,
89 eot,
90 inst->header_present);
91 }
92
93 /* Computes the integer pixel x,y values from the origin.
94 *
95 * This is the basis of gl_FragCoord computation, but is also used
96 * pre-gen6 for computing the deltas from v0 for computing
97 * interpolation.
98 */
99 void
100 fs_visitor::generate_pixel_xy(struct brw_reg dst, bool is_x)
101 {
102 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
103 struct brw_reg src;
104 struct brw_reg deltas;
105
106 if (is_x) {
107 src = stride(suboffset(g1_uw, 4), 2, 4, 0);
108 deltas = brw_imm_v(0x10101010);
109 } else {
110 src = stride(suboffset(g1_uw, 5), 2, 4, 0);
111 deltas = brw_imm_v(0x11001100);
112 }
113
114 if (c->dispatch_width == 16) {
115 dst = vec16(dst);
116 }
117
118 /* We do this 8 or 16-wide, but since the destination is UW we
119 * don't do compression in the 16-wide case.
120 */
121 brw_push_insn_state(p);
122 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
123 brw_ADD(p, dst, src, deltas);
124 brw_pop_insn_state(p);
125 }
126
127 void
128 fs_visitor::generate_linterp(fs_inst *inst,
129 struct brw_reg dst, struct brw_reg *src)
130 {
131 struct brw_reg delta_x = src[0];
132 struct brw_reg delta_y = src[1];
133 struct brw_reg interp = src[2];
134
135 if (brw->has_pln &&
136 delta_y.nr == delta_x.nr + 1 &&
137 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
138 brw_PLN(p, dst, interp, delta_x);
139 } else {
140 brw_LINE(p, brw_null_reg(), interp, delta_x);
141 brw_MAC(p, dst, suboffset(interp, 1), delta_y);
142 }
143 }
144
145 void
146 fs_visitor::generate_math1_gen7(fs_inst *inst,
147 struct brw_reg dst,
148 struct brw_reg src0)
149 {
150 assert(inst->mlen == 0);
151 brw_math(p, dst,
152 brw_math_function(inst->opcode),
153 inst->saturate ? BRW_MATH_SATURATE_SATURATE
154 : BRW_MATH_SATURATE_NONE,
155 0, src0,
156 BRW_MATH_DATA_VECTOR,
157 BRW_MATH_PRECISION_FULL);
158 }
159
160 void
161 fs_visitor::generate_math2_gen7(fs_inst *inst,
162 struct brw_reg dst,
163 struct brw_reg src0,
164 struct brw_reg src1)
165 {
166 assert(inst->mlen == 0);
167 brw_math2(p, dst, brw_math_function(inst->opcode), src0, src1);
168 }
169
170 void
171 fs_visitor::generate_math1_gen6(fs_inst *inst,
172 struct brw_reg dst,
173 struct brw_reg src0)
174 {
175 int op = brw_math_function(inst->opcode);
176
177 assert(inst->mlen == 0);
178
179 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
180 brw_math(p, dst,
181 op,
182 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
183 BRW_MATH_SATURATE_NONE,
184 0, src0,
185 BRW_MATH_DATA_VECTOR,
186 BRW_MATH_PRECISION_FULL);
187
188 if (c->dispatch_width == 16) {
189 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
190 brw_math(p, sechalf(dst),
191 op,
192 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
193 BRW_MATH_SATURATE_NONE,
194 0, sechalf(src0),
195 BRW_MATH_DATA_VECTOR,
196 BRW_MATH_PRECISION_FULL);
197 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
198 }
199 }
200
201 void
202 fs_visitor::generate_math2_gen6(fs_inst *inst,
203 struct brw_reg dst,
204 struct brw_reg src0,
205 struct brw_reg src1)
206 {
207 int op = brw_math_function(inst->opcode);
208
209 assert(inst->mlen == 0);
210
211 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
212 brw_math2(p, dst, op, src0, src1);
213
214 if (c->dispatch_width == 16) {
215 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
216 brw_math2(p, sechalf(dst), op, sechalf(src0), sechalf(src1));
217 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
218 }
219 }
220
221 void
222 fs_visitor::generate_math_gen4(fs_inst *inst,
223 struct brw_reg dst,
224 struct brw_reg src)
225 {
226 int op = brw_math_function(inst->opcode);
227
228 assert(inst->mlen >= 1);
229
230 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
231 brw_math(p, dst,
232 op,
233 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
234 BRW_MATH_SATURATE_NONE,
235 inst->base_mrf, src,
236 BRW_MATH_DATA_VECTOR,
237 BRW_MATH_PRECISION_FULL);
238
239 if (c->dispatch_width == 16) {
240 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
241 brw_math(p, sechalf(dst),
242 op,
243 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
244 BRW_MATH_SATURATE_NONE,
245 inst->base_mrf + 1, sechalf(src),
246 BRW_MATH_DATA_VECTOR,
247 BRW_MATH_PRECISION_FULL);
248
249 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
250 }
251 }
252
253 void
254 fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
255 {
256 int msg_type = -1;
257 int rlen = 4;
258 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
259 uint32_t return_format;
260
261 switch (dst.type) {
262 case BRW_REGISTER_TYPE_D:
263 return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
264 break;
265 case BRW_REGISTER_TYPE_UD:
266 return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
267 break;
268 default:
269 return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
270 break;
271 }
272
273 if (c->dispatch_width == 16)
274 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
275
276 if (intel->gen >= 5) {
277 switch (inst->opcode) {
278 case SHADER_OPCODE_TEX:
279 if (inst->shadow_compare) {
280 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
281 } else {
282 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
283 }
284 break;
285 case FS_OPCODE_TXB:
286 if (inst->shadow_compare) {
287 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
288 } else {
289 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
290 }
291 break;
292 case SHADER_OPCODE_TXL:
293 if (inst->shadow_compare) {
294 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
295 } else {
296 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
297 }
298 break;
299 case SHADER_OPCODE_TXS:
300 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
301 break;
302 case SHADER_OPCODE_TXD:
303 /* There is no sample_d_c message; comparisons are done manually */
304 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
305 break;
306 case SHADER_OPCODE_TXF:
307 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
308 break;
309 default:
310 assert(!"not reached");
311 break;
312 }
313 } else {
314 switch (inst->opcode) {
315 case SHADER_OPCODE_TEX:
316 /* Note that G45 and older determines shadow compare and dispatch width
317 * from message length for most messages.
318 */
319 assert(c->dispatch_width == 8);
320 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
321 if (inst->shadow_compare) {
322 assert(inst->mlen == 6);
323 } else {
324 assert(inst->mlen <= 4);
325 }
326 break;
327 case FS_OPCODE_TXB:
328 if (inst->shadow_compare) {
329 assert(inst->mlen == 6);
330 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
331 } else {
332 assert(inst->mlen == 9);
333 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
334 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
335 }
336 break;
337 case SHADER_OPCODE_TXL:
338 if (inst->shadow_compare) {
339 assert(inst->mlen == 6);
340 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
341 } else {
342 assert(inst->mlen == 9);
343 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD;
344 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
345 }
346 break;
347 case SHADER_OPCODE_TXD:
348 /* There is no sample_d_c message; comparisons are done manually */
349 assert(inst->mlen == 7 || inst->mlen == 10);
350 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
351 break;
352 case SHADER_OPCODE_TXF:
353 assert(inst->mlen == 9);
354 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD;
355 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
356 break;
357 case SHADER_OPCODE_TXS:
358 assert(inst->mlen == 3);
359 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_RESINFO;
360 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
361 break;
362 default:
363 assert(!"not reached");
364 break;
365 }
366 }
367 assert(msg_type != -1);
368
369 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
370 rlen = 8;
371 dst = vec16(dst);
372 }
373
374 brw_SAMPLE(p,
375 retype(dst, BRW_REGISTER_TYPE_UW),
376 inst->base_mrf,
377 src,
378 SURF_INDEX_TEXTURE(inst->sampler),
379 inst->sampler,
380 WRITEMASK_XYZW,
381 msg_type,
382 rlen,
383 inst->mlen,
384 inst->header_present,
385 simd_mode,
386 return_format);
387 }
388
389
390 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
391 * looking like:
392 *
393 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
394 *
395 * and we're trying to produce:
396 *
397 * DDX DDY
398 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
399 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
400 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
401 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
402 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
403 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
404 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
405 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
406 *
407 * and add another set of two more subspans if in 16-pixel dispatch mode.
408 *
409 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
410 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
411 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
412 * between each other. We could probably do it like ddx and swizzle the right
413 * order later, but bail for now and just produce
414 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
415 */
416 void
417 fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
418 {
419 struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
420 BRW_REGISTER_TYPE_F,
421 BRW_VERTICAL_STRIDE_2,
422 BRW_WIDTH_2,
423 BRW_HORIZONTAL_STRIDE_0,
424 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
425 struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
426 BRW_REGISTER_TYPE_F,
427 BRW_VERTICAL_STRIDE_2,
428 BRW_WIDTH_2,
429 BRW_HORIZONTAL_STRIDE_0,
430 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
431 brw_ADD(p, dst, src0, negate(src1));
432 }
433
434 void
435 fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
436 {
437 struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
438 BRW_REGISTER_TYPE_F,
439 BRW_VERTICAL_STRIDE_4,
440 BRW_WIDTH_4,
441 BRW_HORIZONTAL_STRIDE_0,
442 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
443 struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
444 BRW_REGISTER_TYPE_F,
445 BRW_VERTICAL_STRIDE_4,
446 BRW_WIDTH_4,
447 BRW_HORIZONTAL_STRIDE_0,
448 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
449 brw_ADD(p, dst, src0, negate(src1));
450 }
451
452 void
453 fs_visitor::generate_discard(fs_inst *inst)
454 {
455 struct brw_reg f0 = brw_flag_reg();
456
457 if (intel->gen >= 6) {
458 struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
459 struct brw_reg some_register;
460
461 /* As of gen6, we no longer have the mask register to look at,
462 * so life gets a bit more complicated.
463 */
464
465 /* Load the flag register with all ones. */
466 brw_push_insn_state(p);
467 brw_set_mask_control(p, BRW_MASK_DISABLE);
468 brw_MOV(p, f0, brw_imm_uw(0xffff));
469 brw_pop_insn_state(p);
470
471 /* Do a comparison that should always fail, to produce 0s in the flag
472 * reg where we have active channels.
473 */
474 some_register = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
475 brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
476 BRW_CONDITIONAL_NZ, some_register, some_register);
477
478 /* Undo CMP's whacking of predication*/
479 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
480
481 brw_push_insn_state(p);
482 brw_set_mask_control(p, BRW_MASK_DISABLE);
483 brw_AND(p, g1, f0, g1);
484 brw_pop_insn_state(p);
485 } else {
486 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
487
488 brw_push_insn_state(p);
489 brw_set_mask_control(p, BRW_MASK_DISABLE);
490 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
491
492 /* Unlike the 965, we have the mask reg, so we just need
493 * somewhere to invert that (containing channels to be disabled)
494 * so it can be ANDed with the mask of pixels still to be
495 * written. Use the flag reg for consistency with gen6+.
496 */
497 brw_NOT(p, f0, brw_mask_reg(1)); /* IMASK */
498 brw_AND(p, g0, f0, g0);
499
500 brw_pop_insn_state(p);
501 }
502 }
503
504 void
505 fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src)
506 {
507 assert(inst->mlen != 0);
508
509 brw_MOV(p,
510 retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD),
511 retype(src, BRW_REGISTER_TYPE_UD));
512 brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1,
513 inst->offset);
514 }
515
516 void
517 fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst)
518 {
519 assert(inst->mlen != 0);
520
521 /* Clear any post destination dependencies that would be ignored by
522 * the block read. See the B-Spec for pre-gen5 send instruction.
523 *
524 * This could use a better solution, since texture sampling and
525 * math reads could potentially run into it as well -- anywhere
526 * that we have a SEND with a destination that is a register that
527 * was written but not read within the last N instructions (what's
528 * N? unsure). This is rare because of dead code elimination, but
529 * not impossible.
530 */
531 if (intel->gen == 4 && !intel->is_g4x)
532 brw_MOV(p, brw_null_reg(), dst);
533
534 brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1,
535 inst->offset);
536
537 if (intel->gen == 4 && !intel->is_g4x) {
538 /* gen4 errata: destination from a send can't be used as a
539 * destination until it's been read. Just read it so we don't
540 * have to worry.
541 */
542 brw_MOV(p, brw_null_reg(), dst);
543 }
544 }
545
546 void
547 fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
548 {
549 assert(inst->mlen != 0);
550
551 /* Clear any post destination dependencies that would be ignored by
552 * the block read. See the B-Spec for pre-gen5 send instruction.
553 *
554 * This could use a better solution, since texture sampling and
555 * math reads could potentially run into it as well -- anywhere
556 * that we have a SEND with a destination that is a register that
557 * was written but not read within the last N instructions (what's
558 * N? unsure). This is rare because of dead code elimination, but
559 * not impossible.
560 */
561 if (intel->gen == 4 && !intel->is_g4x)
562 brw_MOV(p, brw_null_reg(), dst);
563
564 brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
565 inst->offset, SURF_INDEX_FRAG_CONST_BUFFER);
566
567 if (intel->gen == 4 && !intel->is_g4x) {
568 /* gen4 errata: destination from a send can't be used as a
569 * destination until it's been read. Just read it so we don't
570 * have to worry.
571 */
572 brw_MOV(p, brw_null_reg(), dst);
573 }
574 }
575
576 static uint32_t brw_file_from_reg(fs_reg *reg)
577 {
578 switch (reg->file) {
579 case ARF:
580 return BRW_ARCHITECTURE_REGISTER_FILE;
581 case GRF:
582 return BRW_GENERAL_REGISTER_FILE;
583 case MRF:
584 return BRW_MESSAGE_REGISTER_FILE;
585 case IMM:
586 return BRW_IMMEDIATE_VALUE;
587 default:
588 assert(!"not reached");
589 return BRW_GENERAL_REGISTER_FILE;
590 }
591 }
592
593 static struct brw_reg
594 brw_reg_from_fs_reg(fs_reg *reg)
595 {
596 struct brw_reg brw_reg;
597
598 switch (reg->file) {
599 case GRF:
600 case ARF:
601 case MRF:
602 if (reg->smear == -1) {
603 brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0);
604 } else {
605 brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, reg->smear);
606 }
607 brw_reg = retype(brw_reg, reg->type);
608 if (reg->sechalf)
609 brw_reg = sechalf(brw_reg);
610 break;
611 case IMM:
612 switch (reg->type) {
613 case BRW_REGISTER_TYPE_F:
614 brw_reg = brw_imm_f(reg->imm.f);
615 break;
616 case BRW_REGISTER_TYPE_D:
617 brw_reg = brw_imm_d(reg->imm.i);
618 break;
619 case BRW_REGISTER_TYPE_UD:
620 brw_reg = brw_imm_ud(reg->imm.u);
621 break;
622 default:
623 assert(!"not reached");
624 brw_reg = brw_null_reg();
625 break;
626 }
627 break;
628 case FIXED_HW_REG:
629 brw_reg = reg->fixed_hw_reg;
630 break;
631 case BAD_FILE:
632 /* Probably unused. */
633 brw_reg = brw_null_reg();
634 break;
635 case UNIFORM:
636 assert(!"not reached");
637 brw_reg = brw_null_reg();
638 break;
639 default:
640 assert(!"not reached");
641 brw_reg = brw_null_reg();
642 break;
643 }
644 if (reg->abs)
645 brw_reg = brw_abs(brw_reg);
646 if (reg->negate)
647 brw_reg = negate(brw_reg);
648
649 return brw_reg;
650 }
651
652 void
653 fs_visitor::generate_code()
654 {
655 int last_native_inst = p->nr_insn;
656 const char *last_annotation_string = NULL;
657 ir_instruction *last_annotation_ir = NULL;
658
659 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
660 printf("Native code for fragment shader %d (%d-wide dispatch):\n",
661 prog->Name, c->dispatch_width);
662 }
663
664 foreach_list(node, &this->instructions) {
665 fs_inst *inst = (fs_inst *)node;
666 struct brw_reg src[3], dst;
667
668 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
669 if (last_annotation_ir != inst->ir) {
670 last_annotation_ir = inst->ir;
671 if (last_annotation_ir) {
672 printf(" ");
673 last_annotation_ir->print();
674 printf("\n");
675 }
676 }
677 if (last_annotation_string != inst->annotation) {
678 last_annotation_string = inst->annotation;
679 if (last_annotation_string)
680 printf(" %s\n", last_annotation_string);
681 }
682 }
683
684 for (unsigned int i = 0; i < 3; i++) {
685 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
686
687 /* The accumulator result appears to get used for the
688 * conditional modifier generation. When negating a UD
689 * value, there is a 33rd bit generated for the sign in the
690 * accumulator value, so now you can't check, for example,
691 * equality with a 32-bit value. See piglit fs-op-neg-uvec4.
692 */
693 assert(!inst->conditional_mod ||
694 inst->src[i].type != BRW_REGISTER_TYPE_UD ||
695 !inst->src[i].negate);
696 }
697 dst = brw_reg_from_fs_reg(&inst->dst);
698
699 brw_set_conditionalmod(p, inst->conditional_mod);
700 brw_set_predicate_control(p, inst->predicated);
701 brw_set_predicate_inverse(p, inst->predicate_inverse);
702 brw_set_saturate(p, inst->saturate);
703
704 if (inst->force_uncompressed || c->dispatch_width == 8) {
705 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
706 } else if (inst->force_sechalf) {
707 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
708 } else {
709 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
710 }
711
712 switch (inst->opcode) {
713 case BRW_OPCODE_MOV:
714 brw_MOV(p, dst, src[0]);
715 break;
716 case BRW_OPCODE_ADD:
717 brw_ADD(p, dst, src[0], src[1]);
718 break;
719 case BRW_OPCODE_MUL:
720 brw_MUL(p, dst, src[0], src[1]);
721 break;
722 case BRW_OPCODE_MACH:
723 brw_set_acc_write_control(p, 1);
724 brw_MACH(p, dst, src[0], src[1]);
725 brw_set_acc_write_control(p, 0);
726 break;
727
728 case BRW_OPCODE_FRC:
729 brw_FRC(p, dst, src[0]);
730 break;
731 case BRW_OPCODE_RNDD:
732 brw_RNDD(p, dst, src[0]);
733 break;
734 case BRW_OPCODE_RNDE:
735 brw_RNDE(p, dst, src[0]);
736 break;
737 case BRW_OPCODE_RNDZ:
738 brw_RNDZ(p, dst, src[0]);
739 break;
740
741 case BRW_OPCODE_AND:
742 brw_AND(p, dst, src[0], src[1]);
743 break;
744 case BRW_OPCODE_OR:
745 brw_OR(p, dst, src[0], src[1]);
746 break;
747 case BRW_OPCODE_XOR:
748 brw_XOR(p, dst, src[0], src[1]);
749 break;
750 case BRW_OPCODE_NOT:
751 brw_NOT(p, dst, src[0]);
752 break;
753 case BRW_OPCODE_ASR:
754 brw_ASR(p, dst, src[0], src[1]);
755 break;
756 case BRW_OPCODE_SHR:
757 brw_SHR(p, dst, src[0], src[1]);
758 break;
759 case BRW_OPCODE_SHL:
760 brw_SHL(p, dst, src[0], src[1]);
761 break;
762
763 case BRW_OPCODE_CMP:
764 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
765 break;
766 case BRW_OPCODE_SEL:
767 brw_SEL(p, dst, src[0], src[1]);
768 break;
769
770 case BRW_OPCODE_IF:
771 if (inst->src[0].file != BAD_FILE) {
772 /* The instruction has an embedded compare (only allowed on gen6) */
773 assert(intel->gen == 6);
774 gen6_IF(p, inst->conditional_mod, src[0], src[1]);
775 } else {
776 brw_IF(p, c->dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);
777 }
778 break;
779
780 case BRW_OPCODE_ELSE:
781 brw_ELSE(p);
782 break;
783 case BRW_OPCODE_ENDIF:
784 brw_ENDIF(p);
785 break;
786
787 case BRW_OPCODE_DO:
788 brw_DO(p, BRW_EXECUTE_8);
789 break;
790
791 case BRW_OPCODE_BREAK:
792 brw_BREAK(p);
793 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
794 break;
795 case BRW_OPCODE_CONTINUE:
796 /* FINISHME: We need to write the loop instruction support still. */
797 if (intel->gen >= 6)
798 gen6_CONT(p);
799 else
800 brw_CONT(p);
801 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
802 break;
803
804 case BRW_OPCODE_WHILE:
805 brw_WHILE(p);
806 break;
807
808 case SHADER_OPCODE_RCP:
809 case SHADER_OPCODE_RSQ:
810 case SHADER_OPCODE_SQRT:
811 case SHADER_OPCODE_EXP2:
812 case SHADER_OPCODE_LOG2:
813 case SHADER_OPCODE_SIN:
814 case SHADER_OPCODE_COS:
815 if (intel->gen >= 7) {
816 generate_math1_gen7(inst, dst, src[0]);
817 } else if (intel->gen == 6) {
818 generate_math1_gen6(inst, dst, src[0]);
819 } else {
820 generate_math_gen4(inst, dst, src[0]);
821 }
822 break;
823 case SHADER_OPCODE_INT_QUOTIENT:
824 case SHADER_OPCODE_INT_REMAINDER:
825 case SHADER_OPCODE_POW:
826 if (intel->gen >= 7) {
827 generate_math2_gen7(inst, dst, src[0], src[1]);
828 } else if (intel->gen == 6) {
829 generate_math2_gen6(inst, dst, src[0], src[1]);
830 } else {
831 generate_math_gen4(inst, dst, src[0]);
832 }
833 break;
834 case FS_OPCODE_PIXEL_X:
835 generate_pixel_xy(dst, true);
836 break;
837 case FS_OPCODE_PIXEL_Y:
838 generate_pixel_xy(dst, false);
839 break;
840 case FS_OPCODE_CINTERP:
841 brw_MOV(p, dst, src[0]);
842 break;
843 case FS_OPCODE_LINTERP:
844 generate_linterp(inst, dst, src);
845 break;
846 case SHADER_OPCODE_TEX:
847 case FS_OPCODE_TXB:
848 case SHADER_OPCODE_TXD:
849 case SHADER_OPCODE_TXF:
850 case SHADER_OPCODE_TXL:
851 case SHADER_OPCODE_TXS:
852 generate_tex(inst, dst, src[0]);
853 break;
854 case FS_OPCODE_DISCARD:
855 generate_discard(inst);
856 break;
857 case FS_OPCODE_DDX:
858 generate_ddx(inst, dst, src[0]);
859 break;
860 case FS_OPCODE_DDY:
861 generate_ddy(inst, dst, src[0]);
862 break;
863
864 case FS_OPCODE_SPILL:
865 generate_spill(inst, src[0]);
866 break;
867
868 case FS_OPCODE_UNSPILL:
869 generate_unspill(inst, dst);
870 break;
871
872 case FS_OPCODE_PULL_CONSTANT_LOAD:
873 generate_pull_constant_load(inst, dst);
874 break;
875
876 case FS_OPCODE_FB_WRITE:
877 generate_fb_write(inst);
878 break;
879 default:
880 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
881 _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
882 brw_opcodes[inst->opcode].name);
883 } else {
884 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
885 }
886 fail("unsupported opcode in FS\n");
887 }
888
889 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
890 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
891 if (0) {
892 printf("0x%08x 0x%08x 0x%08x 0x%08x ",
893 ((uint32_t *)&p->store[i])[3],
894 ((uint32_t *)&p->store[i])[2],
895 ((uint32_t *)&p->store[i])[1],
896 ((uint32_t *)&p->store[i])[0]);
897 }
898 brw_disasm(stdout, &p->store[i], intel->gen);
899 }
900 }
901
902 last_native_inst = p->nr_insn;
903 }
904
905 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
906 printf("\n");
907 }
908
909 brw_set_uip_jip(p);
910
911 /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS
912 * emit issues, it doesn't get the jump distances into the output,
913 * which is often something we want to debug. So this is here in
914 * case you're doing that.
915 */
916 if (0) {
917 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
918 for (unsigned int i = 0; i < p->nr_insn; i++) {
919 printf("0x%08x 0x%08x 0x%08x 0x%08x ",
920 ((uint32_t *)&p->store[i])[3],
921 ((uint32_t *)&p->store[i])[2],
922 ((uint32_t *)&p->store[i])[1],
923 ((uint32_t *)&p->store[i])[0]);
924 brw_disasm(stdout, &p->store[i], intel->gen);
925 }
926 }
927 }
928 }