intel/fs: Add an UNDEF instruction to avoid excess live ranges
[mesa.git] / src / intel / compiler / brw_fs_builder.h
1 /* -*- c++ -*- */
2 /*
3 * Copyright © 2010-2015 Intel Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #ifndef BRW_FS_BUILDER_H
26 #define BRW_FS_BUILDER_H
27
28 #include "brw_ir_fs.h"
29 #include "brw_shader.h"
30
31 namespace brw {
32 /**
33 * Toolbox to assemble an FS IR program out of individual instructions.
34 *
35 * This object is meant to have an interface consistent with
36 * brw::vec4_builder. They cannot be fully interchangeable because
37 * brw::fs_builder generates scalar code while brw::vec4_builder generates
38 * vector code.
39 */
40 class fs_builder {
41 public:
42 /** Type used in this IR to represent a source of an instruction. */
43 typedef fs_reg src_reg;
44
45 /** Type used in this IR to represent the destination of an instruction. */
46 typedef fs_reg dst_reg;
47
48 /** Type used in this IR to represent an instruction. */
49 typedef fs_inst instruction;
50
51 /**
52 * Construct an fs_builder that inserts instructions into \p shader.
53 * \p dispatch_width gives the native execution width of the program.
54 */
55 fs_builder(backend_shader *shader,
56 unsigned dispatch_width) :
57 shader(shader), block(NULL), cursor(NULL),
58 _dispatch_width(dispatch_width),
59 _group(0),
60 force_writemask_all(false),
61 annotation()
62 {
63 }
64
65 /**
66 * Construct an fs_builder that inserts instructions into \p shader
67 * before instruction \p inst in basic block \p block. The default
68 * execution controls and debug annotation are initialized from the
69 * instruction passed as argument.
70 */
71 fs_builder(backend_shader *shader, bblock_t *block, fs_inst *inst) :
72 shader(shader), block(block), cursor(inst),
73 _dispatch_width(inst->exec_size),
74 _group(inst->group),
75 force_writemask_all(inst->force_writemask_all)
76 {
77 annotation.str = inst->annotation;
78 annotation.ir = inst->ir;
79 }
80
81 /**
82 * Construct an fs_builder that inserts instructions before \p cursor in
83 * basic block \p block, inheriting other code generation parameters
84 * from this.
85 */
86 fs_builder
87 at(bblock_t *block, exec_node *cursor) const
88 {
89 fs_builder bld = *this;
90 bld.block = block;
91 bld.cursor = cursor;
92 return bld;
93 }
94
95 /**
96 * Construct an fs_builder appending instructions at the end of the
97 * instruction list of the shader, inheriting other code generation
98 * parameters from this.
99 */
100 fs_builder
101 at_end() const
102 {
103 return at(NULL, (exec_node *)&shader->instructions.tail_sentinel);
104 }
105
106 /**
107 * Construct a builder specifying the default SIMD width and group of
108 * channel enable signals, inheriting other code generation parameters
109 * from this.
110 *
111 * \p n gives the default SIMD width, \p i gives the slot group used for
112 * predication and control flow masking in multiples of \p n channels.
113 */
114 fs_builder
115 group(unsigned n, unsigned i) const
116 {
117 fs_builder bld = *this;
118
119 if (n <= dispatch_width() && i < dispatch_width() / n) {
120 bld._group += i * n;
121 } else {
122 /* The requested channel group isn't a subset of the channel group
123 * of this builder, which means that the resulting instructions
124 * would use (potentially undefined) channel enable signals not
125 * specified by the parent builder. That's only valid if the
126 * instruction doesn't have per-channel semantics, in which case
127 * we should clear off the default group index in order to prevent
128 * emitting instructions with channel group not aligned to their
129 * own execution size.
130 */
131 assert(force_writemask_all);
132 bld._group = 0;
133 }
134
135 bld._dispatch_width = n;
136 return bld;
137 }
138
139 /**
140 * Alias for group() with width equal to eight.
141 */
142 fs_builder
143 half(unsigned i) const
144 {
145 return group(8, i);
146 }
147
148 /**
149 * Construct a builder with per-channel control flow execution masking
150 * disabled if \p b is true. If control flow execution masking is
151 * already disabled this has no effect.
152 */
153 fs_builder
154 exec_all(bool b = true) const
155 {
156 fs_builder bld = *this;
157 if (b)
158 bld.force_writemask_all = true;
159 return bld;
160 }
161
162 /**
163 * Construct a builder with the given debug annotation info.
164 */
165 fs_builder
166 annotate(const char *str, const void *ir = NULL) const
167 {
168 fs_builder bld = *this;
169 bld.annotation.str = str;
170 bld.annotation.ir = ir;
171 return bld;
172 }
173
174 /**
175 * Get the SIMD width in use.
176 */
177 unsigned
178 dispatch_width() const
179 {
180 return _dispatch_width;
181 }
182
183 /**
184 * Get the channel group in use.
185 */
186 unsigned
187 group() const
188 {
189 return _group;
190 }
191
192 /**
193 * Allocate a virtual register of natural vector size (one for this IR)
194 * and SIMD width. \p n gives the amount of space to allocate in
195 * dispatch_width units (which is just enough space for one logical
196 * component in this IR).
197 */
198 dst_reg
199 vgrf(enum brw_reg_type type, unsigned n = 1) const
200 {
201 assert(dispatch_width() <= 32);
202
203 if (n > 0)
204 return dst_reg(VGRF, shader->alloc.allocate(
205 DIV_ROUND_UP(n * type_sz(type) * dispatch_width(),
206 REG_SIZE)),
207 type);
208 else
209 return retype(null_reg_ud(), type);
210 }
211
212 /**
213 * Create a null register of floating type.
214 */
215 dst_reg
216 null_reg_f() const
217 {
218 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_F));
219 }
220
221 dst_reg
222 null_reg_df() const
223 {
224 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_DF));
225 }
226
227 /**
228 * Create a null register of signed integer type.
229 */
230 dst_reg
231 null_reg_d() const
232 {
233 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
234 }
235
236 /**
237 * Create a null register of unsigned integer type.
238 */
239 dst_reg
240 null_reg_ud() const
241 {
242 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
243 }
244
245 /**
246 * Get the mask of SIMD channels enabled by dispatch and not yet
247 * disabled by discard.
248 */
249 src_reg
250 sample_mask_reg() const
251 {
252 if (shader->stage != MESA_SHADER_FRAGMENT) {
253 return brw_imm_d(0xffffffff);
254 } else if (brw_wm_prog_data(shader->stage_prog_data)->uses_kill) {
255 return brw_flag_reg(0, 1);
256 } else {
257 assert(shader->devinfo->gen >= 6 && dispatch_width() <= 16);
258 return retype(brw_vec1_grf((_group >= 16 ? 2 : 1), 7),
259 BRW_REGISTER_TYPE_UD);
260 }
261 }
262
263 /**
264 * Insert an instruction into the program.
265 */
266 instruction *
267 emit(const instruction &inst) const
268 {
269 return emit(new(shader->mem_ctx) instruction(inst));
270 }
271
272 /**
273 * Create and insert a nullary control instruction into the program.
274 */
275 instruction *
276 emit(enum opcode opcode) const
277 {
278 return emit(instruction(opcode, dispatch_width()));
279 }
280
281 /**
282 * Create and insert a nullary instruction into the program.
283 */
284 instruction *
285 emit(enum opcode opcode, const dst_reg &dst) const
286 {
287 return emit(instruction(opcode, dispatch_width(), dst));
288 }
289
290 /**
291 * Create and insert a unary instruction into the program.
292 */
293 instruction *
294 emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const
295 {
296 switch (opcode) {
297 case SHADER_OPCODE_RCP:
298 case SHADER_OPCODE_RSQ:
299 case SHADER_OPCODE_SQRT:
300 case SHADER_OPCODE_EXP2:
301 case SHADER_OPCODE_LOG2:
302 case SHADER_OPCODE_SIN:
303 case SHADER_OPCODE_COS:
304 return emit(instruction(opcode, dispatch_width(), dst,
305 fix_math_operand(src0)));
306
307 default:
308 return emit(instruction(opcode, dispatch_width(), dst, src0));
309 }
310 }
311
312 /**
313 * Create and insert a binary instruction into the program.
314 */
315 instruction *
316 emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
317 const src_reg &src1) const
318 {
319 switch (opcode) {
320 case SHADER_OPCODE_POW:
321 case SHADER_OPCODE_INT_QUOTIENT:
322 case SHADER_OPCODE_INT_REMAINDER:
323 return emit(instruction(opcode, dispatch_width(), dst,
324 fix_math_operand(src0),
325 fix_math_operand(src1)));
326
327 default:
328 return emit(instruction(opcode, dispatch_width(), dst, src0, src1));
329
330 }
331 }
332
333 /**
334 * Create and insert a ternary instruction into the program.
335 */
336 instruction *
337 emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
338 const src_reg &src1, const src_reg &src2) const
339 {
340 switch (opcode) {
341 case BRW_OPCODE_BFE:
342 case BRW_OPCODE_BFI2:
343 case BRW_OPCODE_MAD:
344 case BRW_OPCODE_LRP:
345 return emit(instruction(opcode, dispatch_width(), dst,
346 fix_3src_operand(src0),
347 fix_3src_operand(src1),
348 fix_3src_operand(src2)));
349
350 default:
351 return emit(instruction(opcode, dispatch_width(), dst,
352 src0, src1, src2));
353 }
354 }
355
356 /**
357 * Create and insert an instruction with a variable number of sources
358 * into the program.
359 */
360 instruction *
361 emit(enum opcode opcode, const dst_reg &dst, const src_reg srcs[],
362 unsigned n) const
363 {
364 return emit(instruction(opcode, dispatch_width(), dst, srcs, n));
365 }
366
367 /**
368 * Insert a preallocated instruction into the program.
369 */
370 instruction *
371 emit(instruction *inst) const
372 {
373 assert(inst->exec_size <= 32);
374 assert(inst->exec_size == dispatch_width() ||
375 force_writemask_all);
376
377 inst->group = _group;
378 inst->force_writemask_all = force_writemask_all;
379 inst->annotation = annotation.str;
380 inst->ir = annotation.ir;
381
382 if (block)
383 static_cast<instruction *>(cursor)->insert_before(block, inst);
384 else
385 cursor->insert_before(inst);
386
387 return inst;
388 }
389
390 /**
391 * Select \p src0 if the comparison of both sources with the given
392 * conditional mod evaluates to true, otherwise select \p src1.
393 *
394 * Generally useful to get the minimum or maximum of two values.
395 */
396 instruction *
397 emit_minmax(const dst_reg &dst, const src_reg &src0,
398 const src_reg &src1, brw_conditional_mod mod) const
399 {
400 assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);
401
402 return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
403 fix_unsigned_negate(src1)));
404 }
405
406 /**
407 * Copy any live channel from \p src to the first channel of the result.
408 */
409 src_reg
410 emit_uniformize(const src_reg &src) const
411 {
412 /* FIXME: We use a vector chan_index and dst to allow constant and
413 * copy propagration to move result all the way into the consuming
414 * instruction (typically a surface index or sampler index for a
415 * send). This uses 1 or 3 extra hw registers in 16 or 32 wide
416 * dispatch. Once we teach const/copy propagation about scalars we
417 * should go back to scalar destinations here.
418 */
419 const fs_builder ubld = exec_all();
420 const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD);
421 const dst_reg dst = vgrf(src.type);
422
423 ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index)->flag_subreg = 2;
424 ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, component(chan_index, 0));
425
426 return src_reg(component(dst, 0));
427 }
428
429 src_reg
430 move_to_vgrf(const src_reg &src, unsigned num_components) const
431 {
432 src_reg *const src_comps = new src_reg[num_components];
433 for (unsigned i = 0; i < num_components; i++)
434 src_comps[i] = offset(src, dispatch_width(), i);
435
436 const dst_reg dst = vgrf(src.type, num_components);
437 LOAD_PAYLOAD(dst, src_comps, num_components, 0);
438
439 delete[] src_comps;
440
441 return src_reg(dst);
442 }
443
444 void
445 emit_scan(enum opcode opcode, const dst_reg &tmp,
446 unsigned cluster_size, brw_conditional_mod mod) const
447 {
448 assert(dispatch_width() >= 8);
449
450 /* The instruction splitting code isn't advanced enough to split
451 * these so we need to handle that ourselves.
452 */
453 if (dispatch_width() * type_sz(tmp.type) > 2 * REG_SIZE) {
454 const unsigned half_width = dispatch_width() / 2;
455 const fs_builder ubld = exec_all().group(half_width, 0);
456 dst_reg left = tmp;
457 dst_reg right = horiz_offset(tmp, half_width);
458 ubld.emit_scan(opcode, left, cluster_size, mod);
459 ubld.emit_scan(opcode, right, cluster_size, mod);
460 if (cluster_size > half_width) {
461 src_reg left_comp = component(left, half_width - 1);
462 set_condmod(mod, ubld.emit(opcode, right, left_comp, right));
463 }
464 return;
465 }
466
467 if (cluster_size > 1) {
468 const fs_builder ubld = exec_all().group(dispatch_width() / 2, 0);
469 const dst_reg left = horiz_stride(tmp, 2);
470 const dst_reg right = horiz_stride(horiz_offset(tmp, 1), 2);
471 set_condmod(mod, ubld.emit(opcode, right, left, right));
472 }
473
474 if (cluster_size > 2) {
475 if (type_sz(tmp.type) <= 4) {
476 const fs_builder ubld =
477 exec_all().group(dispatch_width() / 4, 0);
478 src_reg left = horiz_stride(horiz_offset(tmp, 1), 4);
479
480 dst_reg right = horiz_stride(horiz_offset(tmp, 2), 4);
481 set_condmod(mod, ubld.emit(opcode, right, left, right));
482
483 right = horiz_stride(horiz_offset(tmp, 3), 4);
484 set_condmod(mod, ubld.emit(opcode, right, left, right));
485 } else {
486 /* For 64-bit types, we have to do things differently because
487 * the code above would land us with destination strides that
488 * the hardware can't handle. Fortunately, we'll only be
489 * 8-wide in that case and it's the same number of
490 * instructions.
491 */
492 const fs_builder ubld = exec_all().group(2, 0);
493
494 for (unsigned i = 0; i < dispatch_width(); i += 4) {
495 src_reg left = component(tmp, i + 1);
496 dst_reg right = horiz_offset(tmp, i + 2);
497 set_condmod(mod, ubld.emit(opcode, right, left, right));
498 }
499 }
500 }
501
502 if (cluster_size > 4) {
503 const fs_builder ubld = exec_all().group(4, 0);
504 src_reg left = component(tmp, 3);
505 dst_reg right = horiz_offset(tmp, 4);
506 set_condmod(mod, ubld.emit(opcode, right, left, right));
507
508 if (dispatch_width() > 8) {
509 left = component(tmp, 8 + 3);
510 right = horiz_offset(tmp, 8 + 4);
511 set_condmod(mod, ubld.emit(opcode, right, left, right));
512 }
513 }
514
515 if (cluster_size > 8 && dispatch_width() > 8) {
516 const fs_builder ubld = exec_all().group(8, 0);
517 src_reg left = component(tmp, 7);
518 dst_reg right = horiz_offset(tmp, 8);
519 set_condmod(mod, ubld.emit(opcode, right, left, right));
520 }
521 }
522
523 /**
524 * Assorted arithmetic ops.
525 * @{
526 */
527 #define ALU1(op) \
528 instruction * \
529 op(const dst_reg &dst, const src_reg &src0) const \
530 { \
531 return emit(BRW_OPCODE_##op, dst, src0); \
532 }
533
534 #define ALU2(op) \
535 instruction * \
536 op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
537 { \
538 return emit(BRW_OPCODE_##op, dst, src0, src1); \
539 }
540
541 #define ALU2_ACC(op) \
542 instruction * \
543 op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
544 { \
545 instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1); \
546 inst->writes_accumulator = true; \
547 return inst; \
548 }
549
550 #define ALU3(op) \
551 instruction * \
552 op(const dst_reg &dst, const src_reg &src0, const src_reg &src1, \
553 const src_reg &src2) const \
554 { \
555 return emit(BRW_OPCODE_##op, dst, src0, src1, src2); \
556 }
557
558 ALU2(ADD)
559 ALU2_ACC(ADDC)
560 ALU2(AND)
561 ALU2(ASR)
562 ALU2(AVG)
563 ALU3(BFE)
564 ALU2(BFI1)
565 ALU3(BFI2)
566 ALU1(BFREV)
567 ALU1(CBIT)
568 ALU2(CMPN)
569 ALU1(DIM)
570 ALU2(DP2)
571 ALU2(DP3)
572 ALU2(DP4)
573 ALU2(DPH)
574 ALU1(F16TO32)
575 ALU1(F32TO16)
576 ALU1(FBH)
577 ALU1(FBL)
578 ALU1(FRC)
579 ALU2(LINE)
580 ALU1(LZD)
581 ALU2(MAC)
582 ALU2_ACC(MACH)
583 ALU3(MAD)
584 ALU1(MOV)
585 ALU2(MUL)
586 ALU1(NOT)
587 ALU2(OR)
588 ALU2(PLN)
589 ALU1(RNDD)
590 ALU1(RNDE)
591 ALU1(RNDU)
592 ALU1(RNDZ)
593 ALU2(SAD2)
594 ALU2_ACC(SADA2)
595 ALU2(SEL)
596 ALU2(SHL)
597 ALU2(SHR)
598 ALU2_ACC(SUBB)
599 ALU2(XOR)
600
601 #undef ALU3
602 #undef ALU2_ACC
603 #undef ALU2
604 #undef ALU1
605 /** @} */
606
607 /**
608 * CMP: Sets the low bit of the destination channels with the result
609 * of the comparison, while the upper bits are undefined, and updates
610 * the flag register with the packed 16 bits of the result.
611 */
612 instruction *
613 CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
614 brw_conditional_mod condition) const
615 {
616 /* Take the instruction:
617 *
618 * CMP null<d> src0<f> src1<f>
619 *
620 * Original gen4 does type conversion to the destination type
621 * before comparison, producing garbage results for floating
622 * point comparisons.
623 *
624 * The destination type doesn't matter on newer generations,
625 * so we set the type to match src0 so we can compact the
626 * instruction.
627 */
628 return set_condmod(condition,
629 emit(BRW_OPCODE_CMP, retype(dst, src0.type),
630 fix_unsigned_negate(src0),
631 fix_unsigned_negate(src1)));
632 }
633
634 /**
635 * Gen4 predicated IF.
636 */
637 instruction *
638 IF(brw_predicate predicate) const
639 {
640 return set_predicate(predicate, emit(BRW_OPCODE_IF));
641 }
642
643 /**
644 * CSEL: dst = src2 <op> 0.0f ? src0 : src1
645 */
646 instruction *
647 CSEL(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
648 const src_reg &src2, brw_conditional_mod condition) const
649 {
650 /* CSEL only operates on floats, so we can't do integer </<=/>=/>
651 * comparisons. Zero/non-zero (== and !=) comparisons almost work.
652 * 0x80000000 fails because it is -0.0, and -0.0 == 0.0.
653 */
654 assert(src2.type == BRW_REGISTER_TYPE_F);
655
656 return set_condmod(condition,
657 emit(BRW_OPCODE_CSEL,
658 retype(dst, BRW_REGISTER_TYPE_F),
659 retype(src0, BRW_REGISTER_TYPE_F),
660 retype(src1, BRW_REGISTER_TYPE_F),
661 src2));
662 }
663
664 /**
665 * Emit a linear interpolation instruction.
666 */
667 instruction *
668 LRP(const dst_reg &dst, const src_reg &x, const src_reg &y,
669 const src_reg &a) const
670 {
671 if (shader->devinfo->gen >= 6 && shader->devinfo->gen <= 10) {
672 /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
673 * we need to reorder the operands.
674 */
675 return emit(BRW_OPCODE_LRP, dst, a, y, x);
676
677 } else {
678 /* We can't use the LRP instruction. Emit x*(1-a) + y*a. */
679 const dst_reg y_times_a = vgrf(dst.type);
680 const dst_reg one_minus_a = vgrf(dst.type);
681 const dst_reg x_times_one_minus_a = vgrf(dst.type);
682
683 MUL(y_times_a, y, a);
684 ADD(one_minus_a, negate(a), brw_imm_f(1.0f));
685 MUL(x_times_one_minus_a, x, src_reg(one_minus_a));
686 return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a));
687 }
688 }
689
690 /**
691 * Collect a number of registers in a contiguous range of registers.
692 */
693 instruction *
694 LOAD_PAYLOAD(const dst_reg &dst, const src_reg *src,
695 unsigned sources, unsigned header_size) const
696 {
697 instruction *inst = emit(SHADER_OPCODE_LOAD_PAYLOAD, dst, src, sources);
698 inst->header_size = header_size;
699 inst->size_written = header_size * REG_SIZE;
700 for (unsigned i = header_size; i < sources; i++) {
701 inst->size_written +=
702 ALIGN(dispatch_width() * type_sz(src[i].type) * dst.stride,
703 REG_SIZE);
704 }
705
706 return inst;
707 }
708
709 instruction *
710 UNDEF(const dst_reg &dst) const
711 {
712 assert(dst.file == VGRF);
713 instruction *inst = emit(SHADER_OPCODE_UNDEF,
714 retype(dst, BRW_REGISTER_TYPE_UD));
715 inst->size_written = shader->alloc.sizes[dst.nr] * REG_SIZE;
716
717 return inst;
718 }
719
720 backend_shader *shader;
721
722 private:
723 /**
724 * Workaround for negation of UD registers. See comment in
725 * fs_generator::generate_code() for more details.
726 */
727 src_reg
728 fix_unsigned_negate(const src_reg &src) const
729 {
730 if (src.type == BRW_REGISTER_TYPE_UD &&
731 src.negate) {
732 dst_reg temp = vgrf(BRW_REGISTER_TYPE_UD);
733 MOV(temp, src);
734 return src_reg(temp);
735 } else {
736 return src;
737 }
738 }
739
740 /**
741 * Workaround for source register modes not supported by the ternary
742 * instruction encoding.
743 */
744 src_reg
745 fix_3src_operand(const src_reg &src) const
746 {
747 switch (src.file) {
748 case FIXED_GRF:
749 /* FINISHME: Could handle scalar region, other stride=1 regions */
750 if (src.vstride != BRW_VERTICAL_STRIDE_8 ||
751 src.width != BRW_WIDTH_8 ||
752 src.hstride != BRW_HORIZONTAL_STRIDE_1)
753 break;
754 /* fallthrough */
755 case ATTR:
756 case VGRF:
757 case UNIFORM:
758 case IMM:
759 return src;
760 default:
761 break;
762 }
763
764 dst_reg expanded = vgrf(src.type);
765 MOV(expanded, src);
766 return expanded;
767 }
768
769 /**
770 * Workaround for source register modes not supported by the math
771 * instruction.
772 */
773 src_reg
774 fix_math_operand(const src_reg &src) const
775 {
776 /* Can't do hstride == 0 args on gen6 math, so expand it out. We
777 * might be able to do better by doing execsize = 1 math and then
778 * expanding that result out, but we would need to be careful with
779 * masking.
780 *
781 * Gen6 hardware ignores source modifiers (negate and abs) on math
782 * instructions, so we also move to a temp to set those up.
783 *
784 * Gen7 relaxes most of the above restrictions, but still can't use IMM
785 * operands to math
786 */
787 if ((shader->devinfo->gen == 6 &&
788 (src.file == IMM || src.file == UNIFORM ||
789 src.abs || src.negate)) ||
790 (shader->devinfo->gen == 7 && src.file == IMM)) {
791 const dst_reg tmp = vgrf(src.type);
792 MOV(tmp, src);
793 return tmp;
794 } else {
795 return src;
796 }
797 }
798
799 bblock_t *block;
800 exec_node *cursor;
801
802 unsigned _dispatch_width;
803 unsigned _group;
804 bool force_writemask_all;
805
806 /** Debug annotation info. */
807 struct {
808 const char *str;
809 const void *ir;
810 } annotation;
811 };
812 }
813
814 #endif