i965: Use MESA_FORMAT_B8G8R8X8_SRGB for RGB visuals
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_builder.h
1 /* -*- c++ -*- */
2 /*
3 * Copyright © 2010-2015 Intel Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #ifndef BRW_FS_BUILDER_H
26 #define BRW_FS_BUILDER_H
27
28 #include "brw_ir_fs.h"
29 #include "brw_shader.h"
30 #include "brw_context.h"
31
32 namespace brw {
33 /**
34 * Toolbox to assemble an FS IR program out of individual instructions.
35 *
36 * This object is meant to have an interface consistent with
37 * brw::vec4_builder. They cannot be fully interchangeable because
38 * brw::fs_builder generates scalar code while brw::vec4_builder generates
39 * vector code.
40 */
41 class fs_builder {
42 public:
43 /** Type used in this IR to represent a source of an instruction. */
44 typedef fs_reg src_reg;
45
46 /** Type used in this IR to represent the destination of an instruction. */
47 typedef fs_reg dst_reg;
48
49 /** Type used in this IR to represent an instruction. */
50 typedef fs_inst instruction;
51
52 /**
53 * Construct an fs_builder that inserts instructions into \p shader.
54 * \p dispatch_width gives the native execution width of the program.
55 */
56 fs_builder(backend_shader *shader,
57 unsigned dispatch_width) :
58 shader(shader), block(NULL), cursor(NULL),
59 _dispatch_width(dispatch_width),
60 _group(0),
61 force_writemask_all(false),
62 annotation()
63 {
64 }
65
66 /**
67 * Construct an fs_builder that inserts instructions into \p shader
68 * before instruction \p inst in basic block \p block. The default
69 * execution controls and debug annotation are initialized from the
70 * instruction passed as argument.
71 */
72 fs_builder(backend_shader *shader, bblock_t *block, fs_inst *inst) :
73 shader(shader), block(block), cursor(inst),
74 _dispatch_width(inst->exec_size),
75 _group(inst->force_sechalf ? 8 : 0),
76 force_writemask_all(inst->force_writemask_all)
77 {
78 annotation.str = inst->annotation;
79 annotation.ir = inst->ir;
80 }
81
82 /**
83 * Construct an fs_builder that inserts instructions before \p cursor in
84 * basic block \p block, inheriting other code generation parameters
85 * from this.
86 */
87 fs_builder
88 at(bblock_t *block, exec_node *cursor) const
89 {
90 fs_builder bld = *this;
91 bld.block = block;
92 bld.cursor = cursor;
93 return bld;
94 }
95
96 /**
97 * Construct an fs_builder appending instructions at the end of the
98 * instruction list of the shader, inheriting other code generation
99 * parameters from this.
100 */
101 fs_builder
102 at_end() const
103 {
104 return at(NULL, (exec_node *)&shader->instructions.tail);
105 }
106
107 /**
108 * Construct a builder specifying the default SIMD width and group of
109 * channel enable signals, inheriting other code generation parameters
110 * from this.
111 *
112 * \p n gives the default SIMD width, \p i gives the slot group used for
113 * predication and control flow masking in multiples of \p n channels.
114 */
115 fs_builder
116 group(unsigned n, unsigned i) const
117 {
118 assert(force_writemask_all ||
119 (n <= dispatch_width() && i < dispatch_width() / n));
120 fs_builder bld = *this;
121 bld._dispatch_width = n;
122 bld._group += i * n;
123 return bld;
124 }
125
126 /**
127 * Alias for group() with width equal to eight.
128 */
129 fs_builder
130 half(unsigned i) const
131 {
132 return group(8, i);
133 }
134
135 /**
136 * Construct a builder with per-channel control flow execution masking
137 * disabled if \p b is true. If control flow execution masking is
138 * already disabled this has no effect.
139 */
140 fs_builder
141 exec_all(bool b = true) const
142 {
143 fs_builder bld = *this;
144 if (b)
145 bld.force_writemask_all = true;
146 return bld;
147 }
148
149 /**
150 * Construct a builder with the given debug annotation info.
151 */
152 fs_builder
153 annotate(const char *str, const void *ir = NULL) const
154 {
155 fs_builder bld = *this;
156 bld.annotation.str = str;
157 bld.annotation.ir = ir;
158 return bld;
159 }
160
161 /**
162 * Get the SIMD width in use.
163 */
164 unsigned
165 dispatch_width() const
166 {
167 return _dispatch_width;
168 }
169
170 /**
171 * Allocate a virtual register of natural vector size (one for this IR)
172 * and SIMD width. \p n gives the amount of space to allocate in
173 * dispatch_width units (which is just enough space for one logical
174 * component in this IR).
175 */
176 dst_reg
177 vgrf(enum brw_reg_type type, unsigned n = 1) const
178 {
179 assert(dispatch_width() <= 32);
180
181 if (n > 0)
182 return dst_reg(VGRF, shader->alloc.allocate(
183 DIV_ROUND_UP(n * type_sz(type) * dispatch_width(),
184 REG_SIZE)),
185 type);
186 else
187 return retype(null_reg_ud(), type);
188 }
189
190 /**
191 * Create a null register of floating type.
192 */
193 dst_reg
194 null_reg_f() const
195 {
196 return dst_reg(retype(brw_null_vec(dispatch_width()),
197 BRW_REGISTER_TYPE_F));
198 }
199
200 /**
201 * Create a null register of signed integer type.
202 */
203 dst_reg
204 null_reg_d() const
205 {
206 return dst_reg(retype(brw_null_vec(dispatch_width()),
207 BRW_REGISTER_TYPE_D));
208 }
209
210 /**
211 * Create a null register of unsigned integer type.
212 */
213 dst_reg
214 null_reg_ud() const
215 {
216 return dst_reg(retype(brw_null_vec(dispatch_width()),
217 BRW_REGISTER_TYPE_UD));
218 }
219
220 /**
221 * Get the mask of SIMD channels enabled by dispatch and not yet
222 * disabled by discard.
223 */
224 src_reg
225 sample_mask_reg() const
226 {
227 if (shader->stage != MESA_SHADER_FRAGMENT) {
228 return brw_imm_d(0xffff);
229 } else if (((brw_wm_prog_data *)shader->stage_prog_data)->uses_kill) {
230 return brw_flag_reg(0, 1);
231 } else {
232 return retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD);
233 }
234 }
235
236 /**
237 * Insert an instruction into the program.
238 */
239 instruction *
240 emit(const instruction &inst) const
241 {
242 return emit(new(shader->mem_ctx) instruction(inst));
243 }
244
245 /**
246 * Create and insert a nullary control instruction into the program.
247 */
248 instruction *
249 emit(enum opcode opcode) const
250 {
251 return emit(instruction(opcode, dispatch_width()));
252 }
253
254 /**
255 * Create and insert a nullary instruction into the program.
256 */
257 instruction *
258 emit(enum opcode opcode, const dst_reg &dst) const
259 {
260 return emit(instruction(opcode, dispatch_width(), dst));
261 }
262
263 /**
264 * Create and insert a unary instruction into the program.
265 */
266 instruction *
267 emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const
268 {
269 switch (opcode) {
270 case SHADER_OPCODE_RCP:
271 case SHADER_OPCODE_RSQ:
272 case SHADER_OPCODE_SQRT:
273 case SHADER_OPCODE_EXP2:
274 case SHADER_OPCODE_LOG2:
275 case SHADER_OPCODE_SIN:
276 case SHADER_OPCODE_COS:
277 return fix_math_instruction(
278 emit(instruction(opcode, dispatch_width(), dst,
279 fix_math_operand(src0))));
280
281 default:
282 return emit(instruction(opcode, dispatch_width(), dst, src0));
283 }
284 }
285
286 /**
287 * Create and insert a binary instruction into the program.
288 */
289 instruction *
290 emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
291 const src_reg &src1) const
292 {
293 switch (opcode) {
294 case SHADER_OPCODE_POW:
295 case SHADER_OPCODE_INT_QUOTIENT:
296 case SHADER_OPCODE_INT_REMAINDER:
297 return fix_math_instruction(
298 emit(instruction(opcode, dispatch_width(), dst,
299 fix_math_operand(src0),
300 fix_math_operand(src1))));
301
302 default:
303 return emit(instruction(opcode, dispatch_width(), dst, src0, src1));
304
305 }
306 }
307
308 /**
309 * Create and insert a ternary instruction into the program.
310 */
311 instruction *
312 emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
313 const src_reg &src1, const src_reg &src2) const
314 {
315 switch (opcode) {
316 case BRW_OPCODE_BFE:
317 case BRW_OPCODE_BFI2:
318 case BRW_OPCODE_MAD:
319 case BRW_OPCODE_LRP:
320 return emit(instruction(opcode, dispatch_width(), dst,
321 fix_3src_operand(src0),
322 fix_3src_operand(src1),
323 fix_3src_operand(src2)));
324
325 default:
326 return emit(instruction(opcode, dispatch_width(), dst,
327 src0, src1, src2));
328 }
329 }
330
331 /**
332 * Create and insert an instruction with a variable number of sources
333 * into the program.
334 */
335 instruction *
336 emit(enum opcode opcode, const dst_reg &dst, const src_reg srcs[],
337 unsigned n) const
338 {
339 return emit(instruction(opcode, dispatch_width(), dst, srcs, n));
340 }
341
342 /**
343 * Insert a preallocated instruction into the program.
344 */
345 instruction *
346 emit(instruction *inst) const
347 {
348 assert(inst->exec_size <= 32);
349 assert(inst->exec_size == dispatch_width() ||
350 force_writemask_all);
351 assert(_group == 0 || _group == 8);
352
353 inst->force_sechalf = (_group == 8);
354 inst->force_writemask_all = force_writemask_all;
355 inst->annotation = annotation.str;
356 inst->ir = annotation.ir;
357
358 if (block)
359 static_cast<instruction *>(cursor)->insert_before(block, inst);
360 else
361 cursor->insert_before(inst);
362
363 return inst;
364 }
365
366 /**
367 * Select \p src0 if the comparison of both sources with the given
368 * conditional mod evaluates to true, otherwise select \p src1.
369 *
370 * Generally useful to get the minimum or maximum of two values.
371 */
372 void
373 emit_minmax(const dst_reg &dst, const src_reg &src0,
374 const src_reg &src1, brw_conditional_mod mod) const
375 {
376 assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);
377
378 if (shader->devinfo->gen >= 6) {
379 set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
380 fix_unsigned_negate(src1)));
381 } else {
382 CMP(null_reg_d(), src0, src1, mod);
383 set_predicate(BRW_PREDICATE_NORMAL,
384 SEL(dst, src0, src1));
385 }
386 }
387
388 /**
389 * Copy any live channel from \p src to the first channel of the result.
390 */
391 src_reg
392 emit_uniformize(const src_reg &src) const
393 {
394 /* FIXME: We use a vector chan_index and dst to allow constant and
395 * copy propagration to move result all the way into the consuming
396 * instruction (typically a surface index or sampler index for a
397 * send). This uses 1 or 3 extra hw registers in 16 or 32 wide
398 * dispatch. Once we teach const/copy propagation about scalars we
399 * should go back to scalar destinations here.
400 */
401 const fs_builder ubld = exec_all();
402 const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD);
403 const dst_reg dst = vgrf(src.type);
404
405 ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
406 ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, component(chan_index, 0));
407
408 return src_reg(component(dst, 0));
409 }
410
411 /**
412 * Assorted arithmetic ops.
413 * @{
414 */
415 #define ALU1(op) \
416 instruction * \
417 op(const dst_reg &dst, const src_reg &src0) const \
418 { \
419 return emit(BRW_OPCODE_##op, dst, src0); \
420 }
421
422 #define ALU2(op) \
423 instruction * \
424 op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
425 { \
426 return emit(BRW_OPCODE_##op, dst, src0, src1); \
427 }
428
429 #define ALU2_ACC(op) \
430 instruction * \
431 op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
432 { \
433 instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1); \
434 inst->writes_accumulator = true; \
435 return inst; \
436 }
437
438 #define ALU3(op) \
439 instruction * \
440 op(const dst_reg &dst, const src_reg &src0, const src_reg &src1, \
441 const src_reg &src2) const \
442 { \
443 return emit(BRW_OPCODE_##op, dst, src0, src1, src2); \
444 }
445
446 ALU2(ADD)
447 ALU2_ACC(ADDC)
448 ALU2(AND)
449 ALU2(ASR)
450 ALU2(AVG)
451 ALU3(BFE)
452 ALU2(BFI1)
453 ALU3(BFI2)
454 ALU1(BFREV)
455 ALU1(CBIT)
456 ALU2(CMPN)
457 ALU3(CSEL)
458 ALU2(DP2)
459 ALU2(DP3)
460 ALU2(DP4)
461 ALU2(DPH)
462 ALU1(F16TO32)
463 ALU1(F32TO16)
464 ALU1(FBH)
465 ALU1(FBL)
466 ALU1(FRC)
467 ALU2(LINE)
468 ALU1(LZD)
469 ALU2(MAC)
470 ALU2_ACC(MACH)
471 ALU3(MAD)
472 ALU1(MOV)
473 ALU2(MUL)
474 ALU1(NOT)
475 ALU2(OR)
476 ALU2(PLN)
477 ALU1(RNDD)
478 ALU1(RNDE)
479 ALU1(RNDU)
480 ALU1(RNDZ)
481 ALU2(SAD2)
482 ALU2_ACC(SADA2)
483 ALU2(SEL)
484 ALU2(SHL)
485 ALU2(SHR)
486 ALU2_ACC(SUBB)
487 ALU2(XOR)
488
489 #undef ALU3
490 #undef ALU2_ACC
491 #undef ALU2
492 #undef ALU1
493 /** @} */
494
495 /**
496 * CMP: Sets the low bit of the destination channels with the result
497 * of the comparison, while the upper bits are undefined, and updates
498 * the flag register with the packed 16 bits of the result.
499 */
500 instruction *
501 CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
502 brw_conditional_mod condition) const
503 {
504 /* Take the instruction:
505 *
506 * CMP null<d> src0<f> src1<f>
507 *
508 * Original gen4 does type conversion to the destination type
509 * before comparison, producing garbage results for floating
510 * point comparisons.
511 *
512 * The destination type doesn't matter on newer generations,
513 * so we set the type to match src0 so we can compact the
514 * instruction.
515 */
516 return set_condmod(condition,
517 emit(BRW_OPCODE_CMP, retype(dst, src0.type),
518 fix_unsigned_negate(src0),
519 fix_unsigned_negate(src1)));
520 }
521
522 /**
523 * Gen4 predicated IF.
524 */
525 instruction *
526 IF(brw_predicate predicate) const
527 {
528 return set_predicate(predicate, emit(BRW_OPCODE_IF));
529 }
530
531 /**
532 * Emit a linear interpolation instruction.
533 */
534 instruction *
535 LRP(const dst_reg &dst, const src_reg &x, const src_reg &y,
536 const src_reg &a) const
537 {
538 if (shader->devinfo->gen >= 6) {
539 /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
540 * we need to reorder the operands.
541 */
542 return emit(BRW_OPCODE_LRP, dst, a, y, x);
543
544 } else {
545 /* We can't use the LRP instruction. Emit x*(1-a) + y*a. */
546 const dst_reg y_times_a = vgrf(dst.type);
547 const dst_reg one_minus_a = vgrf(dst.type);
548 const dst_reg x_times_one_minus_a = vgrf(dst.type);
549
550 MUL(y_times_a, y, a);
551 ADD(one_minus_a, negate(a), brw_imm_f(1.0f));
552 MUL(x_times_one_minus_a, x, src_reg(one_minus_a));
553 return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a));
554 }
555 }
556
557 /**
558 * Collect a number of registers in a contiguous range of registers.
559 */
560 instruction *
561 LOAD_PAYLOAD(const dst_reg &dst, const src_reg *src,
562 unsigned sources, unsigned header_size) const
563 {
564 instruction *inst = emit(SHADER_OPCODE_LOAD_PAYLOAD, dst, src, sources);
565 inst->header_size = header_size;
566 inst->regs_written = header_size +
567 (sources - header_size) * (dispatch_width() / 8);
568
569 return inst;
570 }
571
572 backend_shader *shader;
573
574 private:
575 /**
576 * Workaround for negation of UD registers. See comment in
577 * fs_generator::generate_code() for more details.
578 */
579 src_reg
580 fix_unsigned_negate(const src_reg &src) const
581 {
582 if (src.type == BRW_REGISTER_TYPE_UD &&
583 src.negate) {
584 dst_reg temp = vgrf(BRW_REGISTER_TYPE_UD);
585 MOV(temp, src);
586 return src_reg(temp);
587 } else {
588 return src;
589 }
590 }
591
592 /**
593 * Workaround for source register modes not supported by the ternary
594 * instruction encoding.
595 */
596 src_reg
597 fix_3src_operand(const src_reg &src) const
598 {
599 if (src.file == VGRF || src.file == UNIFORM || src.stride > 1) {
600 return src;
601 } else {
602 dst_reg expanded = vgrf(src.type);
603 MOV(expanded, src);
604 return expanded;
605 }
606 }
607
608 /**
609 * Workaround for source register modes not supported by the math
610 * instruction.
611 */
612 src_reg
613 fix_math_operand(const src_reg &src) const
614 {
615 /* Can't do hstride == 0 args on gen6 math, so expand it out. We
616 * might be able to do better by doing execsize = 1 math and then
617 * expanding that result out, but we would need to be careful with
618 * masking.
619 *
620 * Gen6 hardware ignores source modifiers (negate and abs) on math
621 * instructions, so we also move to a temp to set those up.
622 *
623 * Gen7 relaxes most of the above restrictions, but still can't use IMM
624 * operands to math
625 */
626 if ((shader->devinfo->gen == 6 &&
627 (src.file == IMM || src.file == UNIFORM ||
628 src.abs || src.negate)) ||
629 (shader->devinfo->gen == 7 && src.file == IMM)) {
630 const dst_reg tmp = vgrf(src.type);
631 MOV(tmp, src);
632 return tmp;
633 } else {
634 return src;
635 }
636 }
637
638 /**
639 * Workaround other weirdness of the math instruction.
640 */
641 instruction *
642 fix_math_instruction(instruction *inst) const
643 {
644 if (shader->devinfo->gen < 6) {
645 inst->base_mrf = 2;
646 inst->mlen = inst->sources * dispatch_width() / 8;
647
648 if (inst->sources > 1) {
649 /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
650 * "Message Payload":
651 *
652 * "Operand0[7]. For the INT DIV functions, this operand is the
653 * denominator."
654 * ...
655 * "Operand1[7]. For the INT DIV functions, this operand is the
656 * numerator."
657 */
658 const bool is_int_div = inst->opcode != SHADER_OPCODE_POW;
659 const fs_reg src0 = is_int_div ? inst->src[1] : inst->src[0];
660 const fs_reg src1 = is_int_div ? inst->src[0] : inst->src[1];
661
662 inst->resize_sources(1);
663 inst->src[0] = src0;
664
665 at(block, inst).MOV(fs_reg(MRF, inst->base_mrf + 1, src1.type),
666 src1);
667 }
668 }
669
670 return inst;
671 }
672
673 bblock_t *block;
674 exec_node *cursor;
675
676 unsigned _dispatch_width;
677 unsigned _group;
678 bool force_writemask_all;
679
680 /** Debug annotation info. */
681 struct {
682 const char *str;
683 const void *ir;
684 } annotation;
685 };
686 }
687
688 #endif