i965/vs: Implement vec4_visitor::generate_tex().
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_emit.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_fs_emit.cpp
25 *
26 * This file supports emitting code from the FS LIR to the actual
27 * native instructions.
28 */
29
30 extern "C" {
31 #include "main/macros.h"
32 #include "brw_context.h"
33 #include "brw_eu.h"
34 } /* extern "C" */
35
36 #include "brw_fs.h"
37 #include "glsl/ir_print_visitor.h"
38
39 void
40 fs_visitor::generate_fb_write(fs_inst *inst)
41 {
42 bool eot = inst->eot;
43 struct brw_reg implied_header;
44
45 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
46 * move, here's g1.
47 */
48 brw_push_insn_state(p);
49 brw_set_mask_control(p, BRW_MASK_DISABLE);
50 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
51
52 if (inst->header_present) {
53 if (intel->gen >= 6) {
54 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
55 brw_MOV(p,
56 retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
57 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
58 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
59
60 if (inst->target > 0) {
61 /* Set the render target index for choosing BLEND_STATE. */
62 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
63 inst->base_mrf, 2),
64 BRW_REGISTER_TYPE_UD),
65 brw_imm_ud(inst->target));
66 }
67
68 implied_header = brw_null_reg();
69 } else {
70 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
71
72 brw_MOV(p,
73 brw_message_reg(inst->base_mrf + 1),
74 brw_vec8_grf(1, 0));
75 }
76 } else {
77 implied_header = brw_null_reg();
78 }
79
80 brw_pop_insn_state(p);
81
82 brw_fb_WRITE(p,
83 c->dispatch_width,
84 inst->base_mrf,
85 implied_header,
86 inst->target,
87 inst->mlen,
88 0,
89 eot,
90 inst->header_present);
91 }
92
93 /* Computes the integer pixel x,y values from the origin.
94 *
95 * This is the basis of gl_FragCoord computation, but is also used
96 * pre-gen6 for computing the deltas from v0 for computing
97 * interpolation.
98 */
99 void
100 fs_visitor::generate_pixel_xy(struct brw_reg dst, bool is_x)
101 {
102 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
103 struct brw_reg src;
104 struct brw_reg deltas;
105
106 if (is_x) {
107 src = stride(suboffset(g1_uw, 4), 2, 4, 0);
108 deltas = brw_imm_v(0x10101010);
109 } else {
110 src = stride(suboffset(g1_uw, 5), 2, 4, 0);
111 deltas = brw_imm_v(0x11001100);
112 }
113
114 if (c->dispatch_width == 16) {
115 dst = vec16(dst);
116 }
117
118 /* We do this 8 or 16-wide, but since the destination is UW we
119 * don't do compression in the 16-wide case.
120 */
121 brw_push_insn_state(p);
122 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
123 brw_ADD(p, dst, src, deltas);
124 brw_pop_insn_state(p);
125 }
126
127 void
128 fs_visitor::generate_linterp(fs_inst *inst,
129 struct brw_reg dst, struct brw_reg *src)
130 {
131 struct brw_reg delta_x = src[0];
132 struct brw_reg delta_y = src[1];
133 struct brw_reg interp = src[2];
134
135 if (brw->has_pln &&
136 delta_y.nr == delta_x.nr + 1 &&
137 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
138 brw_PLN(p, dst, interp, delta_x);
139 } else {
140 brw_LINE(p, brw_null_reg(), interp, delta_x);
141 brw_MAC(p, dst, suboffset(interp, 1), delta_y);
142 }
143 }
144
145 void
146 fs_visitor::generate_math1_gen7(fs_inst *inst,
147 struct brw_reg dst,
148 struct brw_reg src0)
149 {
150 assert(inst->mlen == 0);
151 brw_math(p, dst,
152 brw_math_function(inst->opcode),
153 inst->saturate ? BRW_MATH_SATURATE_SATURATE
154 : BRW_MATH_SATURATE_NONE,
155 0, src0,
156 BRW_MATH_DATA_VECTOR,
157 BRW_MATH_PRECISION_FULL);
158 }
159
160 void
161 fs_visitor::generate_math2_gen7(fs_inst *inst,
162 struct brw_reg dst,
163 struct brw_reg src0,
164 struct brw_reg src1)
165 {
166 assert(inst->mlen == 0);
167 brw_math2(p, dst, brw_math_function(inst->opcode), src0, src1);
168 }
169
170 void
171 fs_visitor::generate_math1_gen6(fs_inst *inst,
172 struct brw_reg dst,
173 struct brw_reg src0)
174 {
175 int op = brw_math_function(inst->opcode);
176
177 assert(inst->mlen == 0);
178
179 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
180 brw_math(p, dst,
181 op,
182 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
183 BRW_MATH_SATURATE_NONE,
184 0, src0,
185 BRW_MATH_DATA_VECTOR,
186 BRW_MATH_PRECISION_FULL);
187
188 if (c->dispatch_width == 16) {
189 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
190 brw_math(p, sechalf(dst),
191 op,
192 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
193 BRW_MATH_SATURATE_NONE,
194 0, sechalf(src0),
195 BRW_MATH_DATA_VECTOR,
196 BRW_MATH_PRECISION_FULL);
197 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
198 }
199 }
200
201 void
202 fs_visitor::generate_math2_gen6(fs_inst *inst,
203 struct brw_reg dst,
204 struct brw_reg src0,
205 struct brw_reg src1)
206 {
207 int op = brw_math_function(inst->opcode);
208
209 assert(inst->mlen == 0);
210
211 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
212 brw_math2(p, dst, op, src0, src1);
213
214 if (c->dispatch_width == 16) {
215 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
216 brw_math2(p, sechalf(dst), op, sechalf(src0), sechalf(src1));
217 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
218 }
219 }
220
221 void
222 fs_visitor::generate_math_gen4(fs_inst *inst,
223 struct brw_reg dst,
224 struct brw_reg src)
225 {
226 int op = brw_math_function(inst->opcode);
227
228 assert(inst->mlen >= 1);
229
230 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
231 brw_math(p, dst,
232 op,
233 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
234 BRW_MATH_SATURATE_NONE,
235 inst->base_mrf, src,
236 BRW_MATH_DATA_VECTOR,
237 BRW_MATH_PRECISION_FULL);
238
239 if (c->dispatch_width == 16) {
240 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
241 brw_math(p, sechalf(dst),
242 op,
243 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
244 BRW_MATH_SATURATE_NONE,
245 inst->base_mrf + 1, sechalf(src),
246 BRW_MATH_DATA_VECTOR,
247 BRW_MATH_PRECISION_FULL);
248
249 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
250 }
251 }
252
253 void
254 fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
255 {
256 int msg_type = -1;
257 int rlen = 4;
258 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
259 uint32_t return_format;
260
261 switch (dst.type) {
262 case BRW_REGISTER_TYPE_D:
263 return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
264 break;
265 case BRW_REGISTER_TYPE_UD:
266 return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
267 break;
268 default:
269 return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
270 break;
271 }
272
273 if (c->dispatch_width == 16)
274 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
275
276 if (intel->gen >= 5) {
277 switch (inst->opcode) {
278 case SHADER_OPCODE_TEX:
279 if (inst->shadow_compare) {
280 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
281 } else {
282 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
283 }
284 break;
285 case FS_OPCODE_TXB:
286 if (inst->shadow_compare) {
287 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
288 } else {
289 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
290 }
291 break;
292 case SHADER_OPCODE_TXL:
293 if (inst->shadow_compare) {
294 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
295 } else {
296 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
297 }
298 break;
299 case SHADER_OPCODE_TXS:
300 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
301 break;
302 case SHADER_OPCODE_TXD:
303 /* There is no sample_d_c message; comparisons are done manually */
304 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
305 break;
306 case SHADER_OPCODE_TXF:
307 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
308 break;
309 default:
310 assert(!"not reached");
311 break;
312 }
313 } else {
314 switch (inst->opcode) {
315 case SHADER_OPCODE_TEX:
316 /* Note that G45 and older determines shadow compare and dispatch width
317 * from message length for most messages.
318 */
319 assert(c->dispatch_width == 8);
320 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
321 if (inst->shadow_compare) {
322 assert(inst->mlen == 6);
323 } else {
324 assert(inst->mlen <= 4);
325 }
326 break;
327 case FS_OPCODE_TXB:
328 if (inst->shadow_compare) {
329 assert(inst->mlen == 6);
330 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
331 } else {
332 assert(inst->mlen == 9);
333 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
334 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
335 }
336 break;
337 case SHADER_OPCODE_TXL:
338 if (inst->shadow_compare) {
339 assert(inst->mlen == 6);
340 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
341 } else {
342 assert(inst->mlen == 9);
343 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD;
344 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
345 }
346 break;
347 case SHADER_OPCODE_TXD:
348 /* There is no sample_d_c message; comparisons are done manually */
349 assert(inst->mlen == 7 || inst->mlen == 10);
350 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
351 break;
352 case SHADER_OPCODE_TXF:
353 assert(inst->mlen == 9);
354 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD;
355 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
356 break;
357 case SHADER_OPCODE_TXS:
358 assert(inst->mlen == 3);
359 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_RESINFO;
360 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
361 break;
362 default:
363 assert(!"not reached");
364 break;
365 }
366 }
367 assert(msg_type != -1);
368
369 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
370 rlen = 8;
371 dst = vec16(dst);
372 }
373
374 brw_SAMPLE(p,
375 retype(dst, BRW_REGISTER_TYPE_UW),
376 inst->base_mrf,
377 src,
378 SURF_INDEX_TEXTURE(inst->sampler),
379 inst->sampler,
380 WRITEMASK_XYZW,
381 msg_type,
382 rlen,
383 inst->mlen,
384 inst->header_present,
385 simd_mode,
386 return_format);
387 }
388
389
390 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
391 * looking like:
392 *
393 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
394 *
395 * and we're trying to produce:
396 *
397 * DDX DDY
398 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
399 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
400 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
401 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
402 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
403 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
404 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
405 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
406 *
407 * and add another set of two more subspans if in 16-pixel dispatch mode.
408 *
409 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
410 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
411 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
412 * between each other. We could probably do it like ddx and swizzle the right
413 * order later, but bail for now and just produce
414 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
415 */
416 void
417 fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
418 {
419 struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
420 BRW_REGISTER_TYPE_F,
421 BRW_VERTICAL_STRIDE_2,
422 BRW_WIDTH_2,
423 BRW_HORIZONTAL_STRIDE_0,
424 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
425 struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
426 BRW_REGISTER_TYPE_F,
427 BRW_VERTICAL_STRIDE_2,
428 BRW_WIDTH_2,
429 BRW_HORIZONTAL_STRIDE_0,
430 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
431 brw_ADD(p, dst, src0, negate(src1));
432 }
433
434 void
435 fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
436 {
437 struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
438 BRW_REGISTER_TYPE_F,
439 BRW_VERTICAL_STRIDE_4,
440 BRW_WIDTH_4,
441 BRW_HORIZONTAL_STRIDE_0,
442 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
443 struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
444 BRW_REGISTER_TYPE_F,
445 BRW_VERTICAL_STRIDE_4,
446 BRW_WIDTH_4,
447 BRW_HORIZONTAL_STRIDE_0,
448 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
449 brw_ADD(p, dst, src0, negate(src1));
450 }
451
452 void
453 fs_visitor::generate_discard(fs_inst *inst)
454 {
455 struct brw_reg f0 = brw_flag_reg();
456
457 if (intel->gen >= 6) {
458 struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
459 struct brw_reg some_register;
460
461 /* As of gen6, we no longer have the mask register to look at,
462 * so life gets a bit more complicated.
463 */
464
465 /* Load the flag register with all ones. */
466 brw_push_insn_state(p);
467 brw_set_mask_control(p, BRW_MASK_DISABLE);
468 brw_MOV(p, f0, brw_imm_uw(0xffff));
469 brw_pop_insn_state(p);
470
471 /* Do a comparison that should always fail, to produce 0s in the flag
472 * reg where we have active channels.
473 */
474 some_register = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
475 brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
476 BRW_CONDITIONAL_NZ, some_register, some_register);
477
478 /* Undo CMP's whacking of predication*/
479 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
480
481 brw_push_insn_state(p);
482 brw_set_mask_control(p, BRW_MASK_DISABLE);
483 brw_AND(p, g1, f0, g1);
484 brw_pop_insn_state(p);
485 } else {
486 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
487
488 brw_push_insn_state(p);
489 brw_set_mask_control(p, BRW_MASK_DISABLE);
490 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
491
492 /* Unlike the 965, we have the mask reg, so we just need
493 * somewhere to invert that (containing channels to be disabled)
494 * so it can be ANDed with the mask of pixels still to be
495 * written. Use the flag reg for consistency with gen6+.
496 */
497 brw_NOT(p, f0, brw_mask_reg(1)); /* IMASK */
498 brw_AND(p, g0, f0, g0);
499
500 brw_pop_insn_state(p);
501 }
502 }
503
504 void
505 fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src)
506 {
507 assert(inst->mlen != 0);
508
509 brw_MOV(p,
510 retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD),
511 retype(src, BRW_REGISTER_TYPE_UD));
512 brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1,
513 inst->offset);
514 }
515
516 void
517 fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst)
518 {
519 assert(inst->mlen != 0);
520
521 /* Clear any post destination dependencies that would be ignored by
522 * the block read. See the B-Spec for pre-gen5 send instruction.
523 *
524 * This could use a better solution, since texture sampling and
525 * math reads could potentially run into it as well -- anywhere
526 * that we have a SEND with a destination that is a register that
527 * was written but not read within the last N instructions (what's
528 * N? unsure). This is rare because of dead code elimination, but
529 * not impossible.
530 */
531 if (intel->gen == 4 && !intel->is_g4x)
532 brw_MOV(p, brw_null_reg(), dst);
533
534 brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1,
535 inst->offset);
536
537 if (intel->gen == 4 && !intel->is_g4x) {
538 /* gen4 errata: destination from a send can't be used as a
539 * destination until it's been read. Just read it so we don't
540 * have to worry.
541 */
542 brw_MOV(p, brw_null_reg(), dst);
543 }
544 }
545
546 void
547 fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
548 {
549 assert(inst->mlen != 0);
550
551 /* Clear any post destination dependencies that would be ignored by
552 * the block read. See the B-Spec for pre-gen5 send instruction.
553 *
554 * This could use a better solution, since texture sampling and
555 * math reads could potentially run into it as well -- anywhere
556 * that we have a SEND with a destination that is a register that
557 * was written but not read within the last N instructions (what's
558 * N? unsure). This is rare because of dead code elimination, but
559 * not impossible.
560 */
561 if (intel->gen == 4 && !intel->is_g4x)
562 brw_MOV(p, brw_null_reg(), dst);
563
564 brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
565 inst->offset, SURF_INDEX_FRAG_CONST_BUFFER);
566
567 if (intel->gen == 4 && !intel->is_g4x) {
568 /* gen4 errata: destination from a send can't be used as a
569 * destination until it's been read. Just read it so we don't
570 * have to worry.
571 */
572 brw_MOV(p, brw_null_reg(), dst);
573 }
574 }
575
576 static uint32_t brw_file_from_reg(fs_reg *reg)
577 {
578 switch (reg->file) {
579 case ARF:
580 return BRW_ARCHITECTURE_REGISTER_FILE;
581 case GRF:
582 return BRW_GENERAL_REGISTER_FILE;
583 case MRF:
584 return BRW_MESSAGE_REGISTER_FILE;
585 case IMM:
586 return BRW_IMMEDIATE_VALUE;
587 default:
588 assert(!"not reached");
589 return BRW_GENERAL_REGISTER_FILE;
590 }
591 }
592
593 static struct brw_reg
594 brw_reg_from_fs_reg(fs_reg *reg)
595 {
596 struct brw_reg brw_reg;
597
598 switch (reg->file) {
599 case GRF:
600 case ARF:
601 case MRF:
602 if (reg->smear == -1) {
603 brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0);
604 } else {
605 brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, reg->smear);
606 }
607 brw_reg = retype(brw_reg, reg->type);
608 if (reg->sechalf)
609 brw_reg = sechalf(brw_reg);
610 break;
611 case IMM:
612 switch (reg->type) {
613 case BRW_REGISTER_TYPE_F:
614 brw_reg = brw_imm_f(reg->imm.f);
615 break;
616 case BRW_REGISTER_TYPE_D:
617 brw_reg = brw_imm_d(reg->imm.i);
618 break;
619 case BRW_REGISTER_TYPE_UD:
620 brw_reg = brw_imm_ud(reg->imm.u);
621 break;
622 default:
623 assert(!"not reached");
624 brw_reg = brw_null_reg();
625 break;
626 }
627 break;
628 case FIXED_HW_REG:
629 brw_reg = reg->fixed_hw_reg;
630 break;
631 case BAD_FILE:
632 /* Probably unused. */
633 brw_reg = brw_null_reg();
634 break;
635 case UNIFORM:
636 assert(!"not reached");
637 brw_reg = brw_null_reg();
638 break;
639 default:
640 assert(!"not reached");
641 brw_reg = brw_null_reg();
642 break;
643 }
644 if (reg->abs)
645 brw_reg = brw_abs(brw_reg);
646 if (reg->negate)
647 brw_reg = negate(brw_reg);
648
649 return brw_reg;
650 }
651
652 void
653 fs_visitor::generate_code()
654 {
655 int last_native_inst = p->nr_insn;
656 const char *last_annotation_string = NULL;
657 ir_instruction *last_annotation_ir = NULL;
658
659 int loop_stack_array_size = 16;
660 int loop_stack_depth = 0;
661 brw_instruction **loop_stack =
662 rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
663 int *if_depth_in_loop =
664 rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
665
666
667 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
668 printf("Native code for fragment shader %d (%d-wide dispatch):\n",
669 prog->Name, c->dispatch_width);
670 }
671
672 foreach_list(node, &this->instructions) {
673 fs_inst *inst = (fs_inst *)node;
674 struct brw_reg src[3], dst;
675
676 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
677 if (last_annotation_ir != inst->ir) {
678 last_annotation_ir = inst->ir;
679 if (last_annotation_ir) {
680 printf(" ");
681 last_annotation_ir->print();
682 printf("\n");
683 }
684 }
685 if (last_annotation_string != inst->annotation) {
686 last_annotation_string = inst->annotation;
687 if (last_annotation_string)
688 printf(" %s\n", last_annotation_string);
689 }
690 }
691
692 for (unsigned int i = 0; i < 3; i++) {
693 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
694
695 /* The accumulator result appears to get used for the
696 * conditional modifier generation. When negating a UD
697 * value, there is a 33rd bit generated for the sign in the
698 * accumulator value, so now you can't check, for example,
699 * equality with a 32-bit value. See piglit fs-op-neg-uvec4.
700 */
701 assert(!inst->conditional_mod ||
702 inst->src[i].type != BRW_REGISTER_TYPE_UD ||
703 !inst->src[i].negate);
704 }
705 dst = brw_reg_from_fs_reg(&inst->dst);
706
707 brw_set_conditionalmod(p, inst->conditional_mod);
708 brw_set_predicate_control(p, inst->predicated);
709 brw_set_predicate_inverse(p, inst->predicate_inverse);
710 brw_set_saturate(p, inst->saturate);
711
712 if (inst->force_uncompressed || c->dispatch_width == 8) {
713 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
714 } else if (inst->force_sechalf) {
715 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
716 } else {
717 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
718 }
719
720 switch (inst->opcode) {
721 case BRW_OPCODE_MOV:
722 brw_MOV(p, dst, src[0]);
723 break;
724 case BRW_OPCODE_ADD:
725 brw_ADD(p, dst, src[0], src[1]);
726 break;
727 case BRW_OPCODE_MUL:
728 brw_MUL(p, dst, src[0], src[1]);
729 break;
730 case BRW_OPCODE_MACH:
731 brw_set_acc_write_control(p, 1);
732 brw_MACH(p, dst, src[0], src[1]);
733 brw_set_acc_write_control(p, 0);
734 break;
735
736 case BRW_OPCODE_FRC:
737 brw_FRC(p, dst, src[0]);
738 break;
739 case BRW_OPCODE_RNDD:
740 brw_RNDD(p, dst, src[0]);
741 break;
742 case BRW_OPCODE_RNDE:
743 brw_RNDE(p, dst, src[0]);
744 break;
745 case BRW_OPCODE_RNDZ:
746 brw_RNDZ(p, dst, src[0]);
747 break;
748
749 case BRW_OPCODE_AND:
750 brw_AND(p, dst, src[0], src[1]);
751 break;
752 case BRW_OPCODE_OR:
753 brw_OR(p, dst, src[0], src[1]);
754 break;
755 case BRW_OPCODE_XOR:
756 brw_XOR(p, dst, src[0], src[1]);
757 break;
758 case BRW_OPCODE_NOT:
759 brw_NOT(p, dst, src[0]);
760 break;
761 case BRW_OPCODE_ASR:
762 brw_ASR(p, dst, src[0], src[1]);
763 break;
764 case BRW_OPCODE_SHR:
765 brw_SHR(p, dst, src[0], src[1]);
766 break;
767 case BRW_OPCODE_SHL:
768 brw_SHL(p, dst, src[0], src[1]);
769 break;
770
771 case BRW_OPCODE_CMP:
772 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
773 break;
774 case BRW_OPCODE_SEL:
775 brw_SEL(p, dst, src[0], src[1]);
776 break;
777
778 case BRW_OPCODE_IF:
779 if (inst->src[0].file != BAD_FILE) {
780 /* The instruction has an embedded compare (only allowed on gen6) */
781 assert(intel->gen == 6);
782 gen6_IF(p, inst->conditional_mod, src[0], src[1]);
783 } else {
784 brw_IF(p, c->dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);
785 }
786 if_depth_in_loop[loop_stack_depth]++;
787 break;
788
789 case BRW_OPCODE_ELSE:
790 brw_ELSE(p);
791 break;
792 case BRW_OPCODE_ENDIF:
793 brw_ENDIF(p);
794 if_depth_in_loop[loop_stack_depth]--;
795 break;
796
797 case BRW_OPCODE_DO:
798 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
799 if (loop_stack_array_size <= loop_stack_depth) {
800 loop_stack_array_size *= 2;
801 loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
802 loop_stack_array_size);
803 if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
804 loop_stack_array_size);
805 }
806 if_depth_in_loop[loop_stack_depth] = 0;
807 break;
808
809 case BRW_OPCODE_BREAK:
810 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
811 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
812 break;
813 case BRW_OPCODE_CONTINUE:
814 /* FINISHME: We need to write the loop instruction support still. */
815 if (intel->gen >= 6)
816 gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
817 else
818 brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
819 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
820 break;
821
822 case BRW_OPCODE_WHILE: {
823 struct brw_instruction *inst0, *inst1;
824 GLuint br = 1;
825
826 if (intel->gen >= 5)
827 br = 2;
828
829 assert(loop_stack_depth > 0);
830 loop_stack_depth--;
831 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
832 if (intel->gen < 6) {
833 /* patch all the BREAK/CONT instructions from last BGNLOOP */
834 while (inst0 > loop_stack[loop_stack_depth]) {
835 inst0--;
836 if (inst0->header.opcode == BRW_OPCODE_BREAK &&
837 inst0->bits3.if_else.jump_count == 0) {
838 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
839 }
840 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
841 inst0->bits3.if_else.jump_count == 0) {
842 inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
843 }
844 }
845 }
846 }
847 break;
848
849 case SHADER_OPCODE_RCP:
850 case SHADER_OPCODE_RSQ:
851 case SHADER_OPCODE_SQRT:
852 case SHADER_OPCODE_EXP2:
853 case SHADER_OPCODE_LOG2:
854 case SHADER_OPCODE_SIN:
855 case SHADER_OPCODE_COS:
856 if (intel->gen >= 7) {
857 generate_math1_gen7(inst, dst, src[0]);
858 } else if (intel->gen == 6) {
859 generate_math1_gen6(inst, dst, src[0]);
860 } else {
861 generate_math_gen4(inst, dst, src[0]);
862 }
863 break;
864 case SHADER_OPCODE_INT_QUOTIENT:
865 case SHADER_OPCODE_INT_REMAINDER:
866 case SHADER_OPCODE_POW:
867 if (intel->gen >= 7) {
868 generate_math2_gen7(inst, dst, src[0], src[1]);
869 } else if (intel->gen == 6) {
870 generate_math2_gen6(inst, dst, src[0], src[1]);
871 } else {
872 generate_math_gen4(inst, dst, src[0]);
873 }
874 break;
875 case FS_OPCODE_PIXEL_X:
876 generate_pixel_xy(dst, true);
877 break;
878 case FS_OPCODE_PIXEL_Y:
879 generate_pixel_xy(dst, false);
880 break;
881 case FS_OPCODE_CINTERP:
882 brw_MOV(p, dst, src[0]);
883 break;
884 case FS_OPCODE_LINTERP:
885 generate_linterp(inst, dst, src);
886 break;
887 case SHADER_OPCODE_TEX:
888 case FS_OPCODE_TXB:
889 case SHADER_OPCODE_TXD:
890 case SHADER_OPCODE_TXF:
891 case SHADER_OPCODE_TXL:
892 case SHADER_OPCODE_TXS:
893 generate_tex(inst, dst, src[0]);
894 break;
895 case FS_OPCODE_DISCARD:
896 generate_discard(inst);
897 break;
898 case FS_OPCODE_DDX:
899 generate_ddx(inst, dst, src[0]);
900 break;
901 case FS_OPCODE_DDY:
902 generate_ddy(inst, dst, src[0]);
903 break;
904
905 case FS_OPCODE_SPILL:
906 generate_spill(inst, src[0]);
907 break;
908
909 case FS_OPCODE_UNSPILL:
910 generate_unspill(inst, dst);
911 break;
912
913 case FS_OPCODE_PULL_CONSTANT_LOAD:
914 generate_pull_constant_load(inst, dst);
915 break;
916
917 case FS_OPCODE_FB_WRITE:
918 generate_fb_write(inst);
919 break;
920 default:
921 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
922 _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
923 brw_opcodes[inst->opcode].name);
924 } else {
925 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
926 }
927 fail("unsupported opcode in FS\n");
928 }
929
930 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
931 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
932 if (0) {
933 printf("0x%08x 0x%08x 0x%08x 0x%08x ",
934 ((uint32_t *)&p->store[i])[3],
935 ((uint32_t *)&p->store[i])[2],
936 ((uint32_t *)&p->store[i])[1],
937 ((uint32_t *)&p->store[i])[0]);
938 }
939 brw_disasm(stdout, &p->store[i], intel->gen);
940 }
941 }
942
943 last_native_inst = p->nr_insn;
944 }
945
946 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
947 printf("\n");
948 }
949
950 ralloc_free(loop_stack);
951 ralloc_free(if_depth_in_loop);
952
953 brw_set_uip_jip(p);
954
955 /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS
956 * emit issues, it doesn't get the jump distances into the output,
957 * which is often something we want to debug. So this is here in
958 * case you're doing that.
959 */
960 if (0) {
961 if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
962 for (unsigned int i = 0; i < p->nr_insn; i++) {
963 printf("0x%08x 0x%08x 0x%08x 0x%08x ",
964 ((uint32_t *)&p->store[i])[3],
965 ((uint32_t *)&p->store[i])[2],
966 ((uint32_t *)&p->store[i])[1],
967 ((uint32_t *)&p->store[i])[0]);
968 brw_disasm(stdout, &p->store[i], intel->gen);
969 }
970 }
971 }
972 }