954f147fe7e83dc50c6813a221d63ec69ff68370
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_visitor.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25 #include "brw_cfg.h"
26 #include "brw_eu.h"
27 #include "brw_program.h"
28
29 namespace brw {
30
31 vec4_instruction::vec4_instruction(enum opcode opcode, const dst_reg &dst,
32 const src_reg &src0, const src_reg &src1,
33 const src_reg &src2)
34 {
35 this->opcode = opcode;
36 this->dst = dst;
37 this->src[0] = src0;
38 this->src[1] = src1;
39 this->src[2] = src2;
40 this->saturate = false;
41 this->force_writemask_all = false;
42 this->no_dd_clear = false;
43 this->no_dd_check = false;
44 this->writes_accumulator = false;
45 this->conditional_mod = BRW_CONDITIONAL_NONE;
46 this->predicate = BRW_PREDICATE_NONE;
47 this->predicate_inverse = false;
48 this->target = 0;
49 this->size_written = (dst.file == BAD_FILE ? 0 : REG_SIZE);
50 this->shadow_compare = false;
51 this->ir = NULL;
52 this->urb_write_flags = BRW_URB_WRITE_NO_FLAGS;
53 this->header_size = 0;
54 this->flag_subreg = 0;
55 this->mlen = 0;
56 this->base_mrf = 0;
57 this->offset = 0;
58 this->annotation = NULL;
59 }
60
61 vec4_instruction *
62 vec4_visitor::emit(vec4_instruction *inst)
63 {
64 inst->ir = this->base_ir;
65 inst->annotation = this->current_annotation;
66
67 this->instructions.push_tail(inst);
68
69 return inst;
70 }
71
72 vec4_instruction *
73 vec4_visitor::emit_before(bblock_t *block, vec4_instruction *inst,
74 vec4_instruction *new_inst)
75 {
76 new_inst->ir = inst->ir;
77 new_inst->annotation = inst->annotation;
78
79 inst->insert_before(block, new_inst);
80
81 return inst;
82 }
83
84 vec4_instruction *
85 vec4_visitor::emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
86 const src_reg &src1, const src_reg &src2)
87 {
88 return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0, src1, src2));
89 }
90
91
92 vec4_instruction *
93 vec4_visitor::emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
94 const src_reg &src1)
95 {
96 return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0, src1));
97 }
98
99 vec4_instruction *
100 vec4_visitor::emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0)
101 {
102 return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0));
103 }
104
105 vec4_instruction *
106 vec4_visitor::emit(enum opcode opcode, const dst_reg &dst)
107 {
108 return emit(new(mem_ctx) vec4_instruction(opcode, dst));
109 }
110
111 vec4_instruction *
112 vec4_visitor::emit(enum opcode opcode)
113 {
114 return emit(new(mem_ctx) vec4_instruction(opcode, dst_reg()));
115 }
116
117 #define ALU1(op) \
118 vec4_instruction * \
119 vec4_visitor::op(const dst_reg &dst, const src_reg &src0) \
120 { \
121 return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, src0); \
122 }
123
124 #define ALU2(op) \
125 vec4_instruction * \
126 vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
127 const src_reg &src1) \
128 { \
129 return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, \
130 src0, src1); \
131 }
132
133 #define ALU2_ACC(op) \
134 vec4_instruction * \
135 vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
136 const src_reg &src1) \
137 { \
138 vec4_instruction *inst = new(mem_ctx) vec4_instruction( \
139 BRW_OPCODE_##op, dst, src0, src1); \
140 inst->writes_accumulator = true; \
141 return inst; \
142 }
143
144 #define ALU3(op) \
145 vec4_instruction * \
146 vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
147 const src_reg &src1, const src_reg &src2) \
148 { \
149 assert(devinfo->gen >= 6); \
150 return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, \
151 src0, src1, src2); \
152 }
153
154 ALU1(NOT)
155 ALU1(MOV)
156 ALU1(FRC)
157 ALU1(RNDD)
158 ALU1(RNDE)
159 ALU1(RNDZ)
160 ALU1(F32TO16)
161 ALU1(F16TO32)
162 ALU2(ADD)
163 ALU2(MUL)
164 ALU2_ACC(MACH)
165 ALU2(AND)
166 ALU2(OR)
167 ALU2(XOR)
168 ALU2(DP3)
169 ALU2(DP4)
170 ALU2(DPH)
171 ALU2(SHL)
172 ALU2(SHR)
173 ALU2(ASR)
174 ALU3(LRP)
175 ALU1(BFREV)
176 ALU3(BFE)
177 ALU2(BFI1)
178 ALU3(BFI2)
179 ALU1(FBH)
180 ALU1(FBL)
181 ALU1(CBIT)
182 ALU3(MAD)
183 ALU2_ACC(ADDC)
184 ALU2_ACC(SUBB)
185 ALU2(MAC)
186 ALU1(DIM)
187
188 /** Gen4 predicated IF. */
189 vec4_instruction *
190 vec4_visitor::IF(enum brw_predicate predicate)
191 {
192 vec4_instruction *inst;
193
194 inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_IF);
195 inst->predicate = predicate;
196
197 return inst;
198 }
199
200 /** Gen6 IF with embedded comparison. */
201 vec4_instruction *
202 vec4_visitor::IF(src_reg src0, src_reg src1,
203 enum brw_conditional_mod condition)
204 {
205 assert(devinfo->gen == 6);
206
207 vec4_instruction *inst;
208
209 resolve_ud_negate(&src0);
210 resolve_ud_negate(&src1);
211
212 inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_IF, dst_null_d(),
213 src0, src1);
214 inst->conditional_mod = condition;
215
216 return inst;
217 }
218
219 /**
220 * CMP: Sets the low bit of the destination channels with the result
221 * of the comparison, while the upper bits are undefined, and updates
222 * the flag register with the packed 16 bits of the result.
223 */
224 vec4_instruction *
225 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1,
226 enum brw_conditional_mod condition)
227 {
228 vec4_instruction *inst;
229
230 /* Take the instruction:
231 *
232 * CMP null<d> src0<f> src1<f>
233 *
234 * Original gen4 does type conversion to the destination type before
235 * comparison, producing garbage results for floating point comparisons.
236 *
237 * The destination type doesn't matter on newer generations, so we set the
238 * type to match src0 so we can compact the instruction.
239 */
240 dst.type = src0.type;
241
242 resolve_ud_negate(&src0);
243 resolve_ud_negate(&src1);
244
245 inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_CMP, dst, src0, src1);
246 inst->conditional_mod = condition;
247
248 return inst;
249 }
250
251 vec4_instruction *
252 vec4_visitor::SCRATCH_READ(const dst_reg &dst, const src_reg &index)
253 {
254 vec4_instruction *inst;
255
256 inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_READ,
257 dst, index);
258 inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
259 inst->mlen = 2;
260
261 return inst;
262 }
263
264 vec4_instruction *
265 vec4_visitor::SCRATCH_WRITE(const dst_reg &dst, const src_reg &src,
266 const src_reg &index)
267 {
268 vec4_instruction *inst;
269
270 inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_WRITE,
271 dst, src, index);
272 inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen);
273 inst->mlen = 3;
274
275 return inst;
276 }
277
278 src_reg
279 vec4_visitor::fix_3src_operand(const src_reg &src)
280 {
281 /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
282 * able to use vertical stride of zero to replicate the vec4 uniform, like
283 *
284 * g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
285 *
286 * But you can't, since vertical stride is always four in three-source
287 * instructions. Instead, insert a MOV instruction to do the replication so
288 * that the three-source instruction can consume it.
289 */
290
291 /* The MOV is only needed if the source is a uniform or immediate. */
292 if (src.file != UNIFORM && src.file != IMM)
293 return src;
294
295 if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle))
296 return src;
297
298 dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
299 expanded.type = src.type;
300 emit(VEC4_OPCODE_UNPACK_UNIFORM, expanded, src);
301 return src_reg(expanded);
302 }
303
304 src_reg
305 vec4_visitor::resolve_source_modifiers(const src_reg &src)
306 {
307 if (!src.abs && !src.negate)
308 return src;
309
310 dst_reg resolved = dst_reg(this, glsl_type::ivec4_type);
311 resolved.type = src.type;
312 emit(MOV(resolved, src));
313
314 return src_reg(resolved);
315 }
316
317 src_reg
318 vec4_visitor::fix_math_operand(const src_reg &src)
319 {
320 if (devinfo->gen < 6 || devinfo->gen >= 8 || src.file == BAD_FILE)
321 return src;
322
323 /* The gen6 math instruction ignores the source modifiers --
324 * swizzle, abs, negate, and at least some parts of the register
325 * region description.
326 *
327 * Rather than trying to enumerate all these cases, *always* expand the
328 * operand to a temp GRF for gen6.
329 *
330 * For gen7, keep the operand as-is, except if immediate, which gen7 still
331 * can't use.
332 */
333
334 if (devinfo->gen == 7 && src.file != IMM)
335 return src;
336
337 dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
338 expanded.type = src.type;
339 emit(MOV(expanded, src));
340 return src_reg(expanded);
341 }
342
343 vec4_instruction *
344 vec4_visitor::emit_math(enum opcode opcode,
345 const dst_reg &dst,
346 const src_reg &src0, const src_reg &src1)
347 {
348 vec4_instruction *math =
349 emit(opcode, dst, fix_math_operand(src0), fix_math_operand(src1));
350
351 if (devinfo->gen == 6 && dst.writemask != WRITEMASK_XYZW) {
352 /* MATH on Gen6 must be align1, so we can't do writemasks. */
353 math->dst = dst_reg(this, glsl_type::vec4_type);
354 math->dst.type = dst.type;
355 math = emit(MOV(dst, src_reg(math->dst)));
356 } else if (devinfo->gen < 6) {
357 math->base_mrf = 1;
358 math->mlen = src1.file == BAD_FILE ? 1 : 2;
359 }
360
361 return math;
362 }
363
364 void
365 vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0)
366 {
367 if (devinfo->gen < 7) {
368 unreachable("ir_unop_pack_half_2x16 should be lowered");
369 }
370
371 assert(dst.type == BRW_REGISTER_TYPE_UD);
372 assert(src0.type == BRW_REGISTER_TYPE_F);
373
374 /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
375 *
376 * Because this instruction does not have a 16-bit floating-point type,
377 * the destination data type must be Word (W).
378 *
379 * The destination must be DWord-aligned and specify a horizontal stride
380 * (HorzStride) of 2. The 16-bit result is stored in the lower word of
381 * each destination channel and the upper word is not modified.
382 *
383 * The above restriction implies that the f32to16 instruction must use
384 * align1 mode, because only in align1 mode is it possible to specify
385 * horizontal stride. We choose here to defy the hardware docs and emit
386 * align16 instructions.
387 *
388 * (I [chadv] did attempt to emit align1 instructions for VS f32to16
389 * instructions. I was partially successful in that the code passed all
390 * tests. However, the code was dubiously correct and fragile, and the
391 * tests were not harsh enough to probe that frailty. Not trusting the
392 * code, I chose instead to remain in align16 mode in defiance of the hw
393 * docs).
394 *
395 * I've [chadv] experimentally confirmed that, on gen7 hardware and the
396 * simulator, emitting a f32to16 in align16 mode with UD as destination
397 * data type is safe. The behavior differs from that specified in the PRM
398 * in that the upper word of each destination channel is cleared to 0.
399 */
400
401 dst_reg tmp_dst(this, glsl_type::uvec2_type);
402 src_reg tmp_src(tmp_dst);
403
404 #if 0
405 /* Verify the undocumented behavior on which the following instructions
406 * rely. If f32to16 fails to clear the upper word of the X and Y channels,
407 * then the result of the bit-or instruction below will be incorrect.
408 *
409 * You should inspect the disasm output in order to verify that the MOV is
410 * not optimized away.
411 */
412 emit(MOV(tmp_dst, brw_imm_ud(0x12345678u)));
413 #endif
414
415 /* Give tmp the form below, where "." means untouched.
416 *
417 * w z y x w z y x
418 * |.|.|0x0000hhhh|0x0000llll|.|.|0x0000hhhh|0x0000llll|
419 *
420 * That the upper word of each write-channel be 0 is required for the
421 * following bit-shift and bit-or instructions to work. Note that this
422 * relies on the undocumented hardware behavior mentioned above.
423 */
424 tmp_dst.writemask = WRITEMASK_XY;
425 emit(F32TO16(tmp_dst, src0));
426
427 /* Give the write-channels of dst the form:
428 * 0xhhhh0000
429 */
430 tmp_src.swizzle = BRW_SWIZZLE_YYYY;
431 emit(SHL(dst, tmp_src, brw_imm_ud(16u)));
432
433 /* Finally, give the write-channels of dst the form of packHalf2x16's
434 * output:
435 * 0xhhhhllll
436 */
437 tmp_src.swizzle = BRW_SWIZZLE_XXXX;
438 emit(OR(dst, src_reg(dst), tmp_src));
439 }
440
441 void
442 vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0)
443 {
444 if (devinfo->gen < 7) {
445 unreachable("ir_unop_unpack_half_2x16 should be lowered");
446 }
447
448 assert(dst.type == BRW_REGISTER_TYPE_F);
449 assert(src0.type == BRW_REGISTER_TYPE_UD);
450
451 /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
452 *
453 * Because this instruction does not have a 16-bit floating-point type,
454 * the source data type must be Word (W). The destination type must be
455 * F (Float).
456 *
457 * To use W as the source data type, we must adjust horizontal strides,
458 * which is only possible in align1 mode. All my [chadv] attempts at
459 * emitting align1 instructions for unpackHalf2x16 failed to pass the
460 * Piglit tests, so I gave up.
461 *
462 * I've verified that, on gen7 hardware and the simulator, it is safe to
463 * emit f16to32 in align16 mode with UD as source data type.
464 */
465
466 dst_reg tmp_dst(this, glsl_type::uvec2_type);
467 src_reg tmp_src(tmp_dst);
468
469 tmp_dst.writemask = WRITEMASK_X;
470 emit(AND(tmp_dst, src0, brw_imm_ud(0xffffu)));
471
472 tmp_dst.writemask = WRITEMASK_Y;
473 emit(SHR(tmp_dst, src0, brw_imm_ud(16u)));
474
475 dst.writemask = WRITEMASK_XY;
476 emit(F16TO32(dst, tmp_src));
477 }
478
479 void
480 vec4_visitor::emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0)
481 {
482 /* Instead of splitting the 32-bit integer, shifting, and ORing it back
483 * together, we can shift it by <0, 8, 16, 24>. The packed integer immediate
484 * is not suitable to generate the shift values, but we can use the packed
485 * vector float and a type-converting MOV.
486 */
487 dst_reg shift(this, glsl_type::uvec4_type);
488 emit(MOV(shift, brw_imm_vf4(0x00, 0x60, 0x70, 0x78)));
489
490 dst_reg shifted(this, glsl_type::uvec4_type);
491 src0.swizzle = BRW_SWIZZLE_XXXX;
492 emit(SHR(shifted, src0, src_reg(shift)));
493
494 shifted.type = BRW_REGISTER_TYPE_UB;
495 dst_reg f(this, glsl_type::vec4_type);
496 emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted));
497
498 emit(MUL(dst, src_reg(f), brw_imm_f(1.0f / 255.0f)));
499 }
500
501 void
502 vec4_visitor::emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0)
503 {
504 /* Instead of splitting the 32-bit integer, shifting, and ORing it back
505 * together, we can shift it by <0, 8, 16, 24>. The packed integer immediate
506 * is not suitable to generate the shift values, but we can use the packed
507 * vector float and a type-converting MOV.
508 */
509 dst_reg shift(this, glsl_type::uvec4_type);
510 emit(MOV(shift, brw_imm_vf4(0x00, 0x60, 0x70, 0x78)));
511
512 dst_reg shifted(this, glsl_type::uvec4_type);
513 src0.swizzle = BRW_SWIZZLE_XXXX;
514 emit(SHR(shifted, src0, src_reg(shift)));
515
516 shifted.type = BRW_REGISTER_TYPE_B;
517 dst_reg f(this, glsl_type::vec4_type);
518 emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted));
519
520 dst_reg scaled(this, glsl_type::vec4_type);
521 emit(MUL(scaled, src_reg(f), brw_imm_f(1.0f / 127.0f)));
522
523 dst_reg max(this, glsl_type::vec4_type);
524 emit_minmax(BRW_CONDITIONAL_GE, max, src_reg(scaled), brw_imm_f(-1.0f));
525 emit_minmax(BRW_CONDITIONAL_L, dst, src_reg(max), brw_imm_f(1.0f));
526 }
527
528 void
529 vec4_visitor::emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0)
530 {
531 dst_reg saturated(this, glsl_type::vec4_type);
532 vec4_instruction *inst = emit(MOV(saturated, src0));
533 inst->saturate = true;
534
535 dst_reg scaled(this, glsl_type::vec4_type);
536 emit(MUL(scaled, src_reg(saturated), brw_imm_f(255.0f)));
537
538 dst_reg rounded(this, glsl_type::vec4_type);
539 emit(RNDE(rounded, src_reg(scaled)));
540
541 dst_reg u(this, glsl_type::uvec4_type);
542 emit(MOV(u, src_reg(rounded)));
543
544 src_reg bytes(u);
545 emit(VEC4_OPCODE_PACK_BYTES, dst, bytes);
546 }
547
548 void
549 vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0)
550 {
551 dst_reg max(this, glsl_type::vec4_type);
552 emit_minmax(BRW_CONDITIONAL_GE, max, src0, brw_imm_f(-1.0f));
553
554 dst_reg min(this, glsl_type::vec4_type);
555 emit_minmax(BRW_CONDITIONAL_L, min, src_reg(max), brw_imm_f(1.0f));
556
557 dst_reg scaled(this, glsl_type::vec4_type);
558 emit(MUL(scaled, src_reg(min), brw_imm_f(127.0f)));
559
560 dst_reg rounded(this, glsl_type::vec4_type);
561 emit(RNDE(rounded, src_reg(scaled)));
562
563 dst_reg i(this, glsl_type::ivec4_type);
564 emit(MOV(i, src_reg(rounded)));
565
566 src_reg bytes(i);
567 emit(VEC4_OPCODE_PACK_BYTES, dst, bytes);
568 }
569
570 /*
571 * Returns the minimum number of vec4 (as_vec4 == true) or dvec4 (as_vec4 ==
572 * false) elements needed to pack a type.
573 */
574 static int
575 type_size_xvec4(const struct glsl_type *type, bool as_vec4)
576 {
577 unsigned int i;
578 int size;
579
580 switch (type->base_type) {
581 case GLSL_TYPE_UINT:
582 case GLSL_TYPE_INT:
583 case GLSL_TYPE_FLOAT:
584 case GLSL_TYPE_BOOL:
585 case GLSL_TYPE_DOUBLE:
586 if (type->is_matrix()) {
587 const glsl_type *col_type = type->column_type();
588 unsigned col_slots =
589 (as_vec4 && col_type->is_dual_slot()) ? 2 : 1;
590 return type->matrix_columns * col_slots;
591 } else {
592 /* Regardless of size of vector, it gets a vec4. This is bad
593 * packing for things like floats, but otherwise arrays become a
594 * mess. Hopefully a later pass over the code can pack scalars
595 * down if appropriate.
596 */
597 return (as_vec4 && type->is_dual_slot()) ? 2 : 1;
598 }
599 case GLSL_TYPE_ARRAY:
600 assert(type->length > 0);
601 return type_size_xvec4(type->fields.array, as_vec4) * type->length;
602 case GLSL_TYPE_STRUCT:
603 size = 0;
604 for (i = 0; i < type->length; i++) {
605 size += type_size_xvec4(type->fields.structure[i].type, as_vec4);
606 }
607 return size;
608 case GLSL_TYPE_SUBROUTINE:
609 return 1;
610
611 case GLSL_TYPE_SAMPLER:
612 /* Samplers take up no register space, since they're baked in at
613 * link time.
614 */
615 return 0;
616 case GLSL_TYPE_ATOMIC_UINT:
617 return 0;
618 case GLSL_TYPE_IMAGE:
619 return DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4);
620 case GLSL_TYPE_VOID:
621 case GLSL_TYPE_ERROR:
622 case GLSL_TYPE_INTERFACE:
623 case GLSL_TYPE_FUNCTION:
624 unreachable("not reached");
625 }
626
627 return 0;
628 }
629
630 /**
631 * Returns the minimum number of vec4 elements needed to pack a type.
632 *
633 * For simple types, it will return 1 (a single vec4); for matrices, the
634 * number of columns; for array and struct, the sum of the vec4_size of
635 * each of its elements; and for sampler and atomic, zero.
636 *
637 * This method is useful to calculate how much register space is needed to
638 * store a particular type.
639 */
640 extern "C" int
641 type_size_vec4(const struct glsl_type *type)
642 {
643 return type_size_xvec4(type, true);
644 }
645
646 /**
647 * Returns the minimum number of dvec4 elements needed to pack a type.
648 *
649 * For simple types, it will return 1 (a single dvec4); for matrices, the
650 * number of columns; for array and struct, the sum of the dvec4_size of
651 * each of its elements; and for sampler and atomic, zero.
652 *
653 * This method is useful to calculate how much register space is needed to
654 * store a particular type.
655 *
656 * Measuring double-precision vertex inputs as dvec4 is required because
657 * ARB_vertex_attrib_64bit states that these uses the same number of locations
658 * than the single-precision version. That is, two consecutives dvec4 would be
659 * located in location "x" and location "x+1", not "x+2".
660 *
661 * In order to map vec4/dvec4 vertex inputs in the proper ATTRs,
662 * remap_vs_attrs() will take in account both the location and also if the
663 * type fits in one or two vec4 slots.
664 */
665 extern "C" int
666 type_size_dvec4(const struct glsl_type *type)
667 {
668 return type_size_xvec4(type, false);
669 }
670
671 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
672 {
673 init();
674
675 this->file = VGRF;
676 this->nr = v->alloc.allocate(type_size_vec4(type));
677
678 if (type->is_array() || type->is_record()) {
679 this->swizzle = BRW_SWIZZLE_NOOP;
680 } else {
681 this->swizzle = brw_swizzle_for_size(type->vector_elements);
682 }
683
684 this->type = brw_type_for_base_type(type);
685 }
686
687 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size)
688 {
689 assert(size > 0);
690
691 init();
692
693 this->file = VGRF;
694 this->nr = v->alloc.allocate(type_size_vec4(type) * size);
695
696 this->swizzle = BRW_SWIZZLE_NOOP;
697
698 this->type = brw_type_for_base_type(type);
699 }
700
701 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
702 {
703 init();
704
705 this->file = VGRF;
706 this->nr = v->alloc.allocate(type_size_vec4(type));
707
708 if (type->is_array() || type->is_record()) {
709 this->writemask = WRITEMASK_XYZW;
710 } else {
711 this->writemask = (1 << type->vector_elements) - 1;
712 }
713
714 this->type = brw_type_for_base_type(type);
715 }
716
717 vec4_instruction *
718 vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
719 src_reg src0, src_reg src1)
720 {
721 vec4_instruction *inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
722 inst->conditional_mod = conditionalmod;
723 return inst;
724 }
725
726 vec4_instruction *
727 vec4_visitor::emit_lrp(const dst_reg &dst,
728 const src_reg &x, const src_reg &y, const src_reg &a)
729 {
730 if (devinfo->gen >= 6) {
731 /* Note that the instruction's argument order is reversed from GLSL
732 * and the IR.
733 */
734 return emit(LRP(dst, fix_3src_operand(a), fix_3src_operand(y),
735 fix_3src_operand(x)));
736 } else {
737 /* Earlier generations don't support three source operations, so we
738 * need to emit x*(1-a) + y*a.
739 */
740 dst_reg y_times_a = dst_reg(this, glsl_type::vec4_type);
741 dst_reg one_minus_a = dst_reg(this, glsl_type::vec4_type);
742 dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type);
743 y_times_a.writemask = dst.writemask;
744 one_minus_a.writemask = dst.writemask;
745 x_times_one_minus_a.writemask = dst.writemask;
746
747 emit(MUL(y_times_a, y, a));
748 emit(ADD(one_minus_a, negate(a), brw_imm_f(1.0f)));
749 emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a)));
750 return emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)));
751 }
752 }
753
754 /**
755 * Emits the instructions needed to perform a pull constant load. before_block
756 * and before_inst can be NULL in which case the instruction will be appended
757 * to the end of the instruction list.
758 */
759 void
760 vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
761 src_reg surf_index,
762 src_reg offset_reg,
763 bblock_t *before_block,
764 vec4_instruction *before_inst)
765 {
766 assert((before_inst == NULL && before_block == NULL) ||
767 (before_inst && before_block));
768
769 vec4_instruction *pull;
770
771 if (devinfo->gen >= 9) {
772 /* Gen9+ needs a message header in order to use SIMD4x2 mode */
773 src_reg header(this, glsl_type::uvec4_type, 2);
774
775 pull = new(mem_ctx)
776 vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
777 dst_reg(header));
778
779 if (before_inst)
780 emit_before(before_block, before_inst, pull);
781 else
782 emit(pull);
783
784 dst_reg index_reg = retype(byte_offset(dst_reg(header), REG_SIZE),
785 offset_reg.type);
786 pull = MOV(writemask(index_reg, WRITEMASK_X), offset_reg);
787
788 if (before_inst)
789 emit_before(before_block, before_inst, pull);
790 else
791 emit(pull);
792
793 pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
794 dst,
795 surf_index,
796 header);
797 pull->mlen = 2;
798 pull->header_size = 1;
799 } else if (devinfo->gen >= 7) {
800 dst_reg grf_offset = dst_reg(this, glsl_type::uint_type);
801
802 grf_offset.type = offset_reg.type;
803
804 pull = MOV(grf_offset, offset_reg);
805
806 if (before_inst)
807 emit_before(before_block, before_inst, pull);
808 else
809 emit(pull);
810
811 pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
812 dst,
813 surf_index,
814 src_reg(grf_offset));
815 pull->mlen = 1;
816 } else {
817 pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD,
818 dst,
819 surf_index,
820 offset_reg);
821 pull->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->gen) + 1;
822 pull->mlen = 1;
823 }
824
825 if (before_inst)
826 emit_before(before_block, before_inst, pull);
827 else
828 emit(pull);
829 }
830
831 src_reg
832 vec4_visitor::emit_uniformize(const src_reg &src)
833 {
834 const src_reg chan_index(this, glsl_type::uint_type);
835 const dst_reg dst = retype(dst_reg(this, glsl_type::uint_type),
836 src.type);
837
838 emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, dst_reg(chan_index))
839 ->force_writemask_all = true;
840 emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index)
841 ->force_writemask_all = true;
842
843 return src_reg(dst);
844 }
845
846 src_reg
847 vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type,
848 src_reg coordinate, src_reg surface)
849 {
850 vec4_instruction *inst =
851 new(mem_ctx) vec4_instruction(SHADER_OPCODE_TXF_MCS,
852 dst_reg(this, glsl_type::uvec4_type));
853 inst->base_mrf = 2;
854 inst->src[1] = surface;
855 inst->src[2] = surface;
856
857 int param_base;
858
859 if (devinfo->gen >= 9) {
860 /* Gen9+ needs a message header in order to use SIMD4x2 mode */
861 vec4_instruction *header_inst = new(mem_ctx)
862 vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
863 dst_reg(MRF, inst->base_mrf));
864
865 emit(header_inst);
866
867 inst->mlen = 2;
868 inst->header_size = 1;
869 param_base = inst->base_mrf + 1;
870 } else {
871 inst->mlen = 1;
872 param_base = inst->base_mrf;
873 }
874
875 /* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */
876 int coord_mask = (1 << coordinate_type->vector_elements) - 1;
877 int zero_mask = 0xf & ~coord_mask;
878
879 emit(MOV(dst_reg(MRF, param_base, coordinate_type, coord_mask),
880 coordinate));
881
882 emit(MOV(dst_reg(MRF, param_base, coordinate_type, zero_mask),
883 brw_imm_d(0)));
884
885 emit(inst);
886 return src_reg(inst->dst);
887 }
888
889 bool
890 vec4_visitor::is_high_sampler(src_reg sampler)
891 {
892 if (devinfo->gen < 8 && !devinfo->is_haswell)
893 return false;
894
895 return sampler.file != IMM || sampler.ud >= 16;
896 }
897
898 void
899 vec4_visitor::emit_texture(ir_texture_opcode op,
900 dst_reg dest,
901 const glsl_type *dest_type,
902 src_reg coordinate,
903 int coord_components,
904 src_reg shadow_comparitor,
905 src_reg lod, src_reg lod2,
906 src_reg sample_index,
907 uint32_t constant_offset,
908 src_reg offset_value,
909 src_reg mcs,
910 uint32_t surface,
911 src_reg surface_reg,
912 src_reg sampler_reg)
913 {
914 /* The sampler can only meaningfully compute LOD for fragment shader
915 * messages. For all other stages, we change the opcode to TXL and hardcode
916 * the LOD to 0.
917 *
918 * textureQueryLevels() is implemented in terms of TXS so we need to pass a
919 * valid LOD argument.
920 */
921 if (op == ir_tex || op == ir_query_levels) {
922 assert(lod.file == BAD_FILE);
923 lod = brw_imm_f(0.0f);
924 }
925
926 enum opcode opcode;
927 switch (op) {
928 case ir_tex: opcode = SHADER_OPCODE_TXL; break;
929 case ir_txl: opcode = SHADER_OPCODE_TXL; break;
930 case ir_txd: opcode = SHADER_OPCODE_TXD; break;
931 case ir_txf: opcode = SHADER_OPCODE_TXF; break;
932 case ir_txf_ms: opcode = (devinfo->gen >= 9 ? SHADER_OPCODE_TXF_CMS_W :
933 SHADER_OPCODE_TXF_CMS); break;
934 case ir_txs: opcode = SHADER_OPCODE_TXS; break;
935 case ir_tg4: opcode = offset_value.file != BAD_FILE
936 ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
937 case ir_query_levels: opcode = SHADER_OPCODE_TXS; break;
938 case ir_texture_samples: opcode = SHADER_OPCODE_SAMPLEINFO; break;
939 case ir_txb:
940 unreachable("TXB is not valid for vertex shaders.");
941 case ir_lod:
942 unreachable("LOD is not valid for vertex shaders.");
943 case ir_samples_identical: {
944 /* There are some challenges implementing this for vec4, and it seems
945 * unlikely to be used anyway. For now, just return false ways.
946 */
947 emit(MOV(dest, brw_imm_ud(0u)));
948 return;
949 }
950 default:
951 unreachable("Unrecognized tex op");
952 }
953
954 vec4_instruction *inst = new(mem_ctx) vec4_instruction(opcode, dest);
955
956 inst->offset = constant_offset;
957
958 /* The message header is necessary for:
959 * - Gen4 (always)
960 * - Gen9+ for selecting SIMD4x2
961 * - Texel offsets
962 * - Gather channel selection
963 * - Sampler indices too large to fit in a 4-bit value.
964 * - Sampleinfo message - takes no parameters, but mlen = 0 is illegal
965 */
966 inst->header_size =
967 (devinfo->gen < 5 || devinfo->gen >= 9 ||
968 inst->offset != 0 || op == ir_tg4 ||
969 op == ir_texture_samples ||
970 is_high_sampler(sampler_reg)) ? 1 : 0;
971 inst->base_mrf = 2;
972 inst->mlen = inst->header_size;
973 inst->dst.writemask = WRITEMASK_XYZW;
974 inst->shadow_compare = shadow_comparitor.file != BAD_FILE;
975
976 inst->src[1] = surface_reg;
977 inst->src[2] = sampler_reg;
978
979 /* MRF for the first parameter */
980 int param_base = inst->base_mrf + inst->header_size;
981
982 if (op == ir_txs || op == ir_query_levels) {
983 int writemask = devinfo->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
984 emit(MOV(dst_reg(MRF, param_base, lod.type, writemask), lod));
985 inst->mlen++;
986 } else if (op == ir_texture_samples) {
987 inst->dst.writemask = WRITEMASK_X;
988 } else {
989 /* Load the coordinate */
990 /* FINISHME: gl_clamp_mask and saturate */
991 int coord_mask = (1 << coord_components) - 1;
992 int zero_mask = 0xf & ~coord_mask;
993
994 emit(MOV(dst_reg(MRF, param_base, coordinate.type, coord_mask),
995 coordinate));
996 inst->mlen++;
997
998 if (zero_mask != 0) {
999 emit(MOV(dst_reg(MRF, param_base, coordinate.type, zero_mask),
1000 brw_imm_d(0)));
1001 }
1002 /* Load the shadow comparitor */
1003 if (shadow_comparitor.file != BAD_FILE && op != ir_txd && (op != ir_tg4 || offset_value.file == BAD_FILE)) {
1004 emit(MOV(dst_reg(MRF, param_base + 1, shadow_comparitor.type,
1005 WRITEMASK_X),
1006 shadow_comparitor));
1007 inst->mlen++;
1008 }
1009
1010 /* Load the LOD info */
1011 if (op == ir_tex || op == ir_txl) {
1012 int mrf, writemask;
1013 if (devinfo->gen >= 5) {
1014 mrf = param_base + 1;
1015 if (shadow_comparitor.file != BAD_FILE) {
1016 writemask = WRITEMASK_Y;
1017 /* mlen already incremented */
1018 } else {
1019 writemask = WRITEMASK_X;
1020 inst->mlen++;
1021 }
1022 } else /* devinfo->gen == 4 */ {
1023 mrf = param_base;
1024 writemask = WRITEMASK_W;
1025 }
1026 emit(MOV(dst_reg(MRF, mrf, lod.type, writemask), lod));
1027 } else if (op == ir_txf) {
1028 emit(MOV(dst_reg(MRF, param_base, lod.type, WRITEMASK_W), lod));
1029 } else if (op == ir_txf_ms) {
1030 emit(MOV(dst_reg(MRF, param_base + 1, sample_index.type, WRITEMASK_X),
1031 sample_index));
1032 if (opcode == SHADER_OPCODE_TXF_CMS_W) {
1033 /* MCS data is stored in the first two channels of ‘mcs’, but we
1034 * need to get it into the .y and .z channels of the second vec4
1035 * of params.
1036 */
1037 mcs.swizzle = BRW_SWIZZLE4(0, 0, 1, 1);
1038 emit(MOV(dst_reg(MRF, param_base + 1,
1039 glsl_type::uint_type, WRITEMASK_YZ),
1040 mcs));
1041 } else if (devinfo->gen >= 7) {
1042 /* MCS data is in the first channel of `mcs`, but we need to get it into
1043 * the .y channel of the second vec4 of params, so replicate .x across
1044 * the whole vec4 and then mask off everything except .y
1045 */
1046 mcs.swizzle = BRW_SWIZZLE_XXXX;
1047 emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::uint_type, WRITEMASK_Y),
1048 mcs));
1049 }
1050 inst->mlen++;
1051 } else if (op == ir_txd) {
1052 const brw_reg_type type = lod.type;
1053
1054 if (devinfo->gen >= 5) {
1055 lod.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
1056 lod2.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
1057 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), lod));
1058 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), lod2));
1059 inst->mlen++;
1060
1061 if (dest_type->vector_elements == 3 || shadow_comparitor.file != BAD_FILE) {
1062 lod.swizzle = BRW_SWIZZLE_ZZZZ;
1063 lod2.swizzle = BRW_SWIZZLE_ZZZZ;
1064 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), lod));
1065 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), lod2));
1066 inst->mlen++;
1067
1068 if (shadow_comparitor.file != BAD_FILE) {
1069 emit(MOV(dst_reg(MRF, param_base + 2,
1070 shadow_comparitor.type, WRITEMASK_Z),
1071 shadow_comparitor));
1072 }
1073 }
1074 } else /* devinfo->gen == 4 */ {
1075 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), lod));
1076 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), lod2));
1077 inst->mlen += 2;
1078 }
1079 } else if (op == ir_tg4 && offset_value.file != BAD_FILE) {
1080 if (shadow_comparitor.file != BAD_FILE) {
1081 emit(MOV(dst_reg(MRF, param_base, shadow_comparitor.type, WRITEMASK_W),
1082 shadow_comparitor));
1083 }
1084
1085 emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::ivec2_type, WRITEMASK_XY),
1086 offset_value));
1087 inst->mlen++;
1088 }
1089 }
1090
1091 emit(inst);
1092
1093 /* fixup num layers (z) for cube arrays: hardware returns faces * layers;
1094 * spec requires layers.
1095 */
1096 if (op == ir_txs && devinfo->gen < 7) {
1097 /* Gen4-6 return 0 instead of 1 for single layer surfaces. */
1098 emit_minmax(BRW_CONDITIONAL_GE, writemask(inst->dst, WRITEMASK_Z),
1099 src_reg(inst->dst), brw_imm_d(1));
1100 }
1101
1102 if (devinfo->gen == 6 && op == ir_tg4) {
1103 emit_gen6_gather_wa(key_tex->gen6_gather_wa[surface], inst->dst);
1104 }
1105
1106 if (op == ir_query_levels) {
1107 /* # levels is in .w */
1108 src_reg swizzled(dest);
1109 swizzled.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W,
1110 SWIZZLE_W, SWIZZLE_W);
1111 emit(MOV(dest, swizzled));
1112 }
1113 }
1114
1115 /**
1116 * Apply workarounds for Gen6 gather with UINT/SINT
1117 */
1118 void
1119 vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst)
1120 {
1121 if (!wa)
1122 return;
1123
1124 int width = (wa & WA_8BIT) ? 8 : 16;
1125 dst_reg dst_f = dst;
1126 dst_f.type = BRW_REGISTER_TYPE_F;
1127
1128 /* Convert from UNORM to UINT */
1129 emit(MUL(dst_f, src_reg(dst_f), brw_imm_f((float)((1 << width) - 1))));
1130 emit(MOV(dst, src_reg(dst_f)));
1131
1132 if (wa & WA_SIGN) {
1133 /* Reinterpret the UINT value as a signed INT value by
1134 * shifting the sign bit into place, then shifting back
1135 * preserving sign.
1136 */
1137 emit(SHL(dst, src_reg(dst), brw_imm_d(32 - width)));
1138 emit(ASR(dst, src_reg(dst), brw_imm_d(32 - width)));
1139 }
1140 }
1141
1142 void
1143 vec4_visitor::gs_emit_vertex(int /* stream_id */)
1144 {
1145 unreachable("not reached");
1146 }
1147
1148 void
1149 vec4_visitor::gs_end_primitive()
1150 {
1151 unreachable("not reached");
1152 }
1153
1154 void
1155 vec4_visitor::emit_ndc_computation()
1156 {
1157 if (output_reg[VARYING_SLOT_POS].file == BAD_FILE)
1158 return;
1159
1160 /* Get the position */
1161 src_reg pos = src_reg(output_reg[VARYING_SLOT_POS]);
1162
1163 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
1164 dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
1165 output_reg[BRW_VARYING_SLOT_NDC] = ndc;
1166
1167 current_annotation = "NDC";
1168 dst_reg ndc_w = ndc;
1169 ndc_w.writemask = WRITEMASK_W;
1170 src_reg pos_w = pos;
1171 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
1172 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
1173
1174 dst_reg ndc_xyz = ndc;
1175 ndc_xyz.writemask = WRITEMASK_XYZ;
1176
1177 emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
1178 }
1179
1180 void
1181 vec4_visitor::emit_psiz_and_flags(dst_reg reg)
1182 {
1183 if (devinfo->gen < 6 &&
1184 ((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) ||
1185 output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE ||
1186 devinfo->has_negative_rhw_bug)) {
1187 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
1188 dst_reg header1_w = header1;
1189 header1_w.writemask = WRITEMASK_W;
1190
1191 emit(MOV(header1, brw_imm_ud(0u)));
1192
1193 if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
1194 src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]);
1195
1196 current_annotation = "Point size";
1197 emit(MUL(header1_w, psiz, brw_imm_f((float)(1 << 11))));
1198 emit(AND(header1_w, src_reg(header1_w), brw_imm_d(0x7ff << 8)));
1199 }
1200
1201 if (output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE) {
1202 current_annotation = "Clipping flags";
1203 dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
1204 dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
1205
1206 emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L));
1207 emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, brw_imm_d(0));
1208 emit(OR(header1_w, src_reg(header1_w), src_reg(flags0)));
1209
1210 emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), brw_imm_f(0.0f), BRW_CONDITIONAL_L));
1211 emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, brw_imm_d(0));
1212 emit(SHL(flags1, src_reg(flags1), brw_imm_d(4)));
1213 emit(OR(header1_w, src_reg(header1_w), src_reg(flags1)));
1214 }
1215
1216 /* i965 clipping workaround:
1217 * 1) Test for -ve rhw
1218 * 2) If set,
1219 * set ndc = (0,0,0,0)
1220 * set ucp[6] = 1
1221 *
1222 * Later, clipping will detect ucp[6] and ensure the primitive is
1223 * clipped against all fixed planes.
1224 */
1225 if (devinfo->has_negative_rhw_bug &&
1226 output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE) {
1227 src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]);
1228 ndc_w.swizzle = BRW_SWIZZLE_WWWW;
1229 emit(CMP(dst_null_f(), ndc_w, brw_imm_f(0.0f), BRW_CONDITIONAL_L));
1230 vec4_instruction *inst;
1231 inst = emit(OR(header1_w, src_reg(header1_w), brw_imm_ud(1u << 6)));
1232 inst->predicate = BRW_PREDICATE_NORMAL;
1233 output_reg[BRW_VARYING_SLOT_NDC].type = BRW_REGISTER_TYPE_F;
1234 inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], brw_imm_f(0.0f)));
1235 inst->predicate = BRW_PREDICATE_NORMAL;
1236 }
1237
1238 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
1239 } else if (devinfo->gen < 6) {
1240 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u)));
1241 } else {
1242 emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), brw_imm_d(0)));
1243 if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
1244 dst_reg reg_w = reg;
1245 reg_w.writemask = WRITEMASK_W;
1246 src_reg reg_as_src = src_reg(output_reg[VARYING_SLOT_PSIZ]);
1247 reg_as_src.type = reg_w.type;
1248 reg_as_src.swizzle = brw_swizzle_for_size(1);
1249 emit(MOV(reg_w, reg_as_src));
1250 }
1251 if (prog_data->vue_map.slots_valid & VARYING_BIT_LAYER) {
1252 dst_reg reg_y = reg;
1253 reg_y.writemask = WRITEMASK_Y;
1254 reg_y.type = BRW_REGISTER_TYPE_D;
1255 output_reg[VARYING_SLOT_LAYER].type = reg_y.type;
1256 emit(MOV(reg_y, src_reg(output_reg[VARYING_SLOT_LAYER])));
1257 }
1258 if (prog_data->vue_map.slots_valid & VARYING_BIT_VIEWPORT) {
1259 dst_reg reg_z = reg;
1260 reg_z.writemask = WRITEMASK_Z;
1261 reg_z.type = BRW_REGISTER_TYPE_D;
1262 output_reg[VARYING_SLOT_VIEWPORT].type = reg_z.type;
1263 emit(MOV(reg_z, src_reg(output_reg[VARYING_SLOT_VIEWPORT])));
1264 }
1265 }
1266 }
1267
1268 vec4_instruction *
1269 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
1270 {
1271 assert(varying < VARYING_SLOT_MAX);
1272 assert(output_reg[varying].type == reg.type);
1273 current_annotation = output_reg_annotation[varying];
1274 if (output_reg[varying].file != BAD_FILE) {
1275 return emit(MOV(reg, src_reg(output_reg[varying])));
1276 } else
1277 return NULL;
1278 }
1279
1280 void
1281 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying, int component)
1282 {
1283 assert(varying < VARYING_SLOT_MAX);
1284 assert(varying >= VARYING_SLOT_VAR0);
1285 varying = varying - VARYING_SLOT_VAR0;
1286
1287 unsigned num_comps = output_generic_num_components[varying][component];
1288 if (num_comps == 0)
1289 return;
1290
1291 assert(output_generic_reg[varying][component].type == reg.type);
1292 current_annotation = output_reg_annotation[varying];
1293 if (output_generic_reg[varying][component].file != BAD_FILE) {
1294 src_reg src = src_reg(output_generic_reg[varying][component]);
1295 src.swizzle = BRW_SWZ_COMP_OUTPUT(component);
1296 reg.writemask =
1297 brw_writemask_for_component_packing(num_comps, component);
1298 emit(MOV(reg, src));
1299 }
1300 }
1301
1302 void
1303 vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
1304 {
1305 reg.type = BRW_REGISTER_TYPE_F;
1306 output_reg[varying].type = reg.type;
1307
1308 switch (varying) {
1309 case VARYING_SLOT_PSIZ:
1310 {
1311 /* PSIZ is always in slot 0, and is coupled with other flags. */
1312 current_annotation = "indices, point width, clip flags";
1313 emit_psiz_and_flags(reg);
1314 break;
1315 }
1316 case BRW_VARYING_SLOT_NDC:
1317 current_annotation = "NDC";
1318 if (output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE)
1319 emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
1320 break;
1321 case VARYING_SLOT_POS:
1322 current_annotation = "gl_Position";
1323 if (output_reg[VARYING_SLOT_POS].file != BAD_FILE)
1324 emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
1325 break;
1326 case VARYING_SLOT_EDGE:
1327 /* This is present when doing unfilled polygons. We're supposed to copy
1328 * the edge flag from the user-provided vertex array
1329 * (glEdgeFlagPointer), or otherwise we'll copy from the current value
1330 * of that attribute (starts as 1.0f). This is then used in clipping to
1331 * determine which edges should be drawn as wireframe.
1332 */
1333 current_annotation = "edge flag";
1334 emit(MOV(reg, src_reg(dst_reg(ATTR, VERT_ATTRIB_EDGEFLAG,
1335 glsl_type::float_type, WRITEMASK_XYZW))));
1336 break;
1337 case BRW_VARYING_SLOT_PAD:
1338 /* No need to write to this slot */
1339 break;
1340 default:
1341 if (varying >= VARYING_SLOT_VAR0) {
1342 for (int i = 0; i < 4; i++) {
1343 emit_generic_urb_slot(reg, varying, i);
1344 }
1345 } else {
1346 emit_generic_urb_slot(reg, varying);
1347 }
1348 break;
1349 }
1350 }
1351
1352 static int
1353 align_interleaved_urb_mlen(const struct gen_device_info *devinfo, int mlen)
1354 {
1355 if (devinfo->gen >= 6) {
1356 /* URB data written (does not include the message header reg) must
1357 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
1358 * section 5.4.3.2.2: URB_INTERLEAVED.
1359 *
1360 * URB entries are allocated on a multiple of 1024 bits, so an
1361 * extra 128 bits written here to make the end align to 256 is
1362 * no problem.
1363 */
1364 if ((mlen % 2) != 1)
1365 mlen++;
1366 }
1367
1368 return mlen;
1369 }
1370
1371
1372 /**
1373 * Generates the VUE payload plus the necessary URB write instructions to
1374 * output it.
1375 *
1376 * The VUE layout is documented in Volume 2a.
1377 */
1378 void
1379 vec4_visitor::emit_vertex()
1380 {
1381 /* MRF 0 is reserved for the debugger, so start with message header
1382 * in MRF 1.
1383 */
1384 int base_mrf = 1;
1385 int mrf = base_mrf;
1386 /* In the process of generating our URB write message contents, we
1387 * may need to unspill a register or load from an array. Those
1388 * reads would use MRFs 14-15.
1389 */
1390 int max_usable_mrf = FIRST_SPILL_MRF(devinfo->gen);
1391
1392 /* The following assertion verifies that max_usable_mrf causes an
1393 * even-numbered amount of URB write data, which will meet gen6's
1394 * requirements for length alignment.
1395 */
1396 assert ((max_usable_mrf - base_mrf) % 2 == 0);
1397
1398 /* First mrf is the g0-based message header containing URB handles and
1399 * such.
1400 */
1401 emit_urb_write_header(mrf++);
1402
1403 if (devinfo->gen < 6) {
1404 emit_ndc_computation();
1405 }
1406
1407 /* We may need to split this up into several URB writes, so do them in a
1408 * loop.
1409 */
1410 int slot = 0;
1411 bool complete = false;
1412 do {
1413 /* URB offset is in URB row increments, and each of our MRFs is half of
1414 * one of those, since we're doing interleaved writes.
1415 */
1416 int offset = slot / 2;
1417
1418 mrf = base_mrf + 1;
1419 for (; slot < prog_data->vue_map.num_slots; ++slot) {
1420 emit_urb_slot(dst_reg(MRF, mrf++),
1421 prog_data->vue_map.slot_to_varying[slot]);
1422
1423 /* If this was max_usable_mrf, we can't fit anything more into this
1424 * URB WRITE. Same thing if we reached the maximum length available.
1425 */
1426 if (mrf > max_usable_mrf ||
1427 align_interleaved_urb_mlen(devinfo, mrf - base_mrf + 1) > BRW_MAX_MSG_LENGTH) {
1428 slot++;
1429 break;
1430 }
1431 }
1432
1433 complete = slot >= prog_data->vue_map.num_slots;
1434 current_annotation = "URB write";
1435 vec4_instruction *inst = emit_urb_write_opcode(complete);
1436 inst->base_mrf = base_mrf;
1437 inst->mlen = align_interleaved_urb_mlen(devinfo, mrf - base_mrf);
1438 inst->offset += offset;
1439 } while(!complete);
1440 }
1441
1442
1443 src_reg
1444 vec4_visitor::get_scratch_offset(bblock_t *block, vec4_instruction *inst,
1445 src_reg *reladdr, int reg_offset)
1446 {
1447 /* Because we store the values to scratch interleaved like our
1448 * vertex data, we need to scale the vec4 index by 2.
1449 */
1450 int message_header_scale = 2;
1451
1452 /* Pre-gen6, the message header uses byte offsets instead of vec4
1453 * (16-byte) offset units.
1454 */
1455 if (devinfo->gen < 6)
1456 message_header_scale *= 16;
1457
1458 if (reladdr) {
1459 src_reg index = src_reg(this, glsl_type::int_type);
1460
1461 emit_before(block, inst, ADD(dst_reg(index), *reladdr,
1462 brw_imm_d(reg_offset)));
1463 emit_before(block, inst, MUL(dst_reg(index), index,
1464 brw_imm_d(message_header_scale)));
1465
1466 return index;
1467 } else {
1468 return brw_imm_d(reg_offset * message_header_scale);
1469 }
1470 }
1471
1472 /**
1473 * Emits an instruction before @inst to load the value named by @orig_src
1474 * from scratch space at @base_offset to @temp.
1475 *
1476 * @base_offset is measured in 32-byte units (the size of a register).
1477 */
1478 void
1479 vec4_visitor::emit_scratch_read(bblock_t *block, vec4_instruction *inst,
1480 dst_reg temp, src_reg orig_src,
1481 int base_offset)
1482 {
1483 assert(orig_src.offset % REG_SIZE == 0);
1484 int reg_offset = base_offset + orig_src.offset / REG_SIZE;
1485 src_reg index = get_scratch_offset(block, inst, orig_src.reladdr,
1486 reg_offset);
1487
1488 emit_before(block, inst, SCRATCH_READ(temp, index));
1489 }
1490
1491 /**
1492 * Emits an instruction after @inst to store the value to be written
1493 * to @orig_dst to scratch space at @base_offset, from @temp.
1494 *
1495 * @base_offset is measured in 32-byte units (the size of a register).
1496 */
1497 void
1498 vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst,
1499 int base_offset)
1500 {
1501 assert(inst->dst.offset % REG_SIZE == 0);
1502 int reg_offset = base_offset + inst->dst.offset / REG_SIZE;
1503 src_reg index = get_scratch_offset(block, inst, inst->dst.reladdr,
1504 reg_offset);
1505
1506 /* Create a temporary register to store *inst's result in.
1507 *
1508 * We have to be careful in MOVing from our temporary result register in
1509 * the scratch write. If we swizzle from channels of the temporary that
1510 * weren't initialized, it will confuse live interval analysis, which will
1511 * make spilling fail to make progress.
1512 */
1513 const src_reg temp = swizzle(retype(src_reg(this, glsl_type::vec4_type),
1514 inst->dst.type),
1515 brw_swizzle_for_mask(inst->dst.writemask));
1516 dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
1517 inst->dst.writemask));
1518 vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
1519 if (inst->opcode != BRW_OPCODE_SEL)
1520 write->predicate = inst->predicate;
1521 write->ir = inst->ir;
1522 write->annotation = inst->annotation;
1523 inst->insert_after(block, write);
1524
1525 inst->dst.file = temp.file;
1526 inst->dst.nr = temp.nr;
1527 inst->dst.offset %= REG_SIZE;
1528 inst->dst.reladdr = NULL;
1529 }
1530
1531 /**
1532 * Checks if \p src and/or \p src.reladdr require a scratch read, and if so,
1533 * adds the scratch read(s) before \p inst. The function also checks for
1534 * recursive reladdr scratch accesses, issuing the corresponding scratch
1535 * loads and rewriting reladdr references accordingly.
1536 *
1537 * \return \p src if it did not require a scratch load, otherwise, the
1538 * register holding the result of the scratch load that the caller should
1539 * use to rewrite src.
1540 */
1541 src_reg
1542 vec4_visitor::emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
1543 vec4_instruction *inst, src_reg src)
1544 {
1545 /* Resolve recursive reladdr scratch access by calling ourselves
1546 * with src.reladdr
1547 */
1548 if (src.reladdr)
1549 *src.reladdr = emit_resolve_reladdr(scratch_loc, block, inst,
1550 *src.reladdr);
1551
1552 /* Now handle scratch access on src */
1553 if (src.file == VGRF && scratch_loc[src.nr] != -1) {
1554 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
1555 emit_scratch_read(block, inst, temp, src, scratch_loc[src.nr]);
1556 src.nr = temp.nr;
1557 src.offset %= REG_SIZE;
1558 src.reladdr = NULL;
1559 }
1560
1561 return src;
1562 }
1563
1564 /**
1565 * We can't generally support array access in GRF space, because a
1566 * single instruction's destination can only span 2 contiguous
1567 * registers. So, we send all GRF arrays that get variable index
1568 * access to scratch space.
1569 */
1570 void
1571 vec4_visitor::move_grf_array_access_to_scratch()
1572 {
1573 int scratch_loc[this->alloc.count];
1574 memset(scratch_loc, -1, sizeof(scratch_loc));
1575
1576 /* First, calculate the set of virtual GRFs that need to be punted
1577 * to scratch due to having any array access on them, and where in
1578 * scratch.
1579 */
1580 foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
1581 if (inst->dst.file == VGRF && inst->dst.reladdr) {
1582 if (scratch_loc[inst->dst.nr] == -1) {
1583 scratch_loc[inst->dst.nr] = last_scratch;
1584 last_scratch += this->alloc.sizes[inst->dst.nr];
1585 }
1586
1587 for (src_reg *iter = inst->dst.reladdr;
1588 iter->reladdr;
1589 iter = iter->reladdr) {
1590 if (iter->file == VGRF && scratch_loc[iter->nr] == -1) {
1591 scratch_loc[iter->nr] = last_scratch;
1592 last_scratch += this->alloc.sizes[iter->nr];
1593 }
1594 }
1595 }
1596
1597 for (int i = 0 ; i < 3; i++) {
1598 for (src_reg *iter = &inst->src[i];
1599 iter->reladdr;
1600 iter = iter->reladdr) {
1601 if (iter->file == VGRF && scratch_loc[iter->nr] == -1) {
1602 scratch_loc[iter->nr] = last_scratch;
1603 last_scratch += this->alloc.sizes[iter->nr];
1604 }
1605 }
1606 }
1607 }
1608
1609 /* Now, for anything that will be accessed through scratch, rewrite
1610 * it to load/store. Note that this is a _safe list walk, because
1611 * we may generate a new scratch_write instruction after the one
1612 * we're processing.
1613 */
1614 foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
1615 /* Set up the annotation tracking for new generated instructions. */
1616 base_ir = inst->ir;
1617 current_annotation = inst->annotation;
1618
1619 /* First handle scratch access on the dst. Notice we have to handle
1620 * the case where the dst's reladdr also points to scratch space.
1621 */
1622 if (inst->dst.reladdr)
1623 *inst->dst.reladdr = emit_resolve_reladdr(scratch_loc, block, inst,
1624 *inst->dst.reladdr);
1625
1626 /* Now that we have handled any (possibly recursive) reladdr scratch
1627 * accesses for dst we can safely do the scratch write for dst itself
1628 */
1629 if (inst->dst.file == VGRF && scratch_loc[inst->dst.nr] != -1)
1630 emit_scratch_write(block, inst, scratch_loc[inst->dst.nr]);
1631
1632 /* Now handle scratch access on any src. In this case, since inst->src[i]
1633 * already is a src_reg, we can just call emit_resolve_reladdr with
1634 * inst->src[i] and it will take care of handling scratch loads for
1635 * both src and src.reladdr (recursively).
1636 */
1637 for (int i = 0 ; i < 3; i++) {
1638 inst->src[i] = emit_resolve_reladdr(scratch_loc, block, inst,
1639 inst->src[i]);
1640 }
1641 }
1642 }
1643
1644 /**
1645 * Emits an instruction before @inst to load the value named by @orig_src
1646 * from the pull constant buffer (surface) at @base_offset to @temp.
1647 */
1648 void
1649 vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
1650 dst_reg temp, src_reg orig_src,
1651 int base_offset, src_reg indirect)
1652 {
1653 assert(orig_src.offset % 16 == 0);
1654 int reg_offset = base_offset + orig_src.offset / 16;
1655 const unsigned index = prog_data->base.binding_table.pull_constants_start;
1656
1657 src_reg offset;
1658 if (indirect.file != BAD_FILE) {
1659 offset = src_reg(this, glsl_type::uint_type);
1660
1661 emit_before(block, inst, ADD(dst_reg(offset), indirect,
1662 brw_imm_ud(reg_offset * 16)));
1663 } else if (devinfo->gen >= 8) {
1664 /* Store the offset in a GRF so we can send-from-GRF. */
1665 offset = src_reg(this, glsl_type::uint_type);
1666 emit_before(block, inst, MOV(dst_reg(offset), brw_imm_ud(reg_offset * 16)));
1667 } else {
1668 offset = brw_imm_d(reg_offset * 16);
1669 }
1670
1671 emit_pull_constant_load_reg(temp,
1672 brw_imm_ud(index),
1673 offset,
1674 block, inst);
1675
1676 brw_mark_surface_used(&prog_data->base, index);
1677 }
1678
1679 /**
1680 * Implements array access of uniforms by inserting a
1681 * PULL_CONSTANT_LOAD instruction.
1682 *
1683 * Unlike temporary GRF array access (where we don't support it due to
1684 * the difficulty of doing relative addressing on instruction
1685 * destinations), we could potentially do array access of uniforms
1686 * that were loaded in GRF space as push constants. In real-world
1687 * usage we've seen, though, the arrays being used are always larger
1688 * than we could load as push constants, so just always move all
1689 * uniform array access out to a pull constant buffer.
1690 */
1691 void
1692 vec4_visitor::move_uniform_array_access_to_pull_constants()
1693 {
1694 /* The vulkan dirver doesn't support pull constants other than UBOs so
1695 * everything has to be pushed regardless.
1696 */
1697 if (stage_prog_data->pull_param == NULL) {
1698 split_uniform_registers();
1699 return;
1700 }
1701
1702 int pull_constant_loc[this->uniforms];
1703 memset(pull_constant_loc, -1, sizeof(pull_constant_loc));
1704
1705 /* First, walk through the instructions and determine which things need to
1706 * be pulled. We mark something as needing to be pulled by setting
1707 * pull_constant_loc to 0.
1708 */
1709 foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
1710 /* We only care about MOV_INDIRECT of a uniform */
1711 if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT ||
1712 inst->src[0].file != UNIFORM)
1713 continue;
1714
1715 int uniform_nr = inst->src[0].nr + inst->src[0].offset / 16;
1716
1717 for (unsigned j = 0; j < DIV_ROUND_UP(inst->src[2].ud, 16); j++)
1718 pull_constant_loc[uniform_nr + j] = 0;
1719 }
1720
1721 /* Next, we walk the list of uniforms and assign real pull constant
1722 * locations and set their corresponding entries in pull_param.
1723 */
1724 for (int j = 0; j < this->uniforms; j++) {
1725 if (pull_constant_loc[j] < 0)
1726 continue;
1727
1728 pull_constant_loc[j] = stage_prog_data->nr_pull_params / 4;
1729
1730 for (int i = 0; i < 4; i++) {
1731 stage_prog_data->pull_param[stage_prog_data->nr_pull_params++]
1732 = stage_prog_data->param[j * 4 + i];
1733 }
1734 }
1735
1736 /* Finally, we can walk through the instructions and lower MOV_INDIRECT
1737 * instructions to actual uniform pulls.
1738 */
1739 foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
1740 /* We only care about MOV_INDIRECT of a uniform */
1741 if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT ||
1742 inst->src[0].file != UNIFORM)
1743 continue;
1744
1745 int uniform_nr = inst->src[0].nr + inst->src[0].offset / 16;
1746
1747 assert(inst->src[0].swizzle == BRW_SWIZZLE_NOOP);
1748
1749 emit_pull_constant_load(block, inst, inst->dst, inst->src[0],
1750 pull_constant_loc[uniform_nr], inst->src[1]);
1751 inst->remove(block);
1752 }
1753
1754 /* Now there are no accesses of the UNIFORM file with a reladdr, so
1755 * no need to track them as larger-than-vec4 objects. This will be
1756 * relied on in cutting out unused uniform vectors from push
1757 * constants.
1758 */
1759 split_uniform_registers();
1760 }
1761
1762 void
1763 vec4_visitor::resolve_ud_negate(src_reg *reg)
1764 {
1765 if (reg->type != BRW_REGISTER_TYPE_UD ||
1766 !reg->negate)
1767 return;
1768
1769 src_reg temp = src_reg(this, glsl_type::uvec4_type);
1770 emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
1771 *reg = temp;
1772 }
1773
1774 vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
1775 void *log_data,
1776 const struct brw_sampler_prog_key_data *key_tex,
1777 struct brw_vue_prog_data *prog_data,
1778 const nir_shader *shader,
1779 void *mem_ctx,
1780 bool no_spills,
1781 int shader_time_index)
1782 : backend_shader(compiler, log_data, mem_ctx, shader, &prog_data->base),
1783 key_tex(key_tex),
1784 prog_data(prog_data),
1785 fail_msg(NULL),
1786 first_non_payload_grf(0),
1787 need_all_constants_in_pull_buffer(false),
1788 no_spills(no_spills),
1789 shader_time_index(shader_time_index),
1790 last_scratch(0)
1791 {
1792 this->failed = false;
1793
1794 this->base_ir = NULL;
1795 this->current_annotation = NULL;
1796 memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
1797
1798 memset(this->output_generic_num_components, 0,
1799 sizeof(this->output_generic_num_components));
1800
1801 this->virtual_grf_start = NULL;
1802 this->virtual_grf_end = NULL;
1803 this->live_intervals = NULL;
1804
1805 this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
1806
1807 this->uniforms = 0;
1808 }
1809
1810 vec4_visitor::~vec4_visitor()
1811 {
1812 }
1813
1814
1815 void
1816 vec4_visitor::fail(const char *format, ...)
1817 {
1818 va_list va;
1819 char *msg;
1820
1821 if (failed)
1822 return;
1823
1824 failed = true;
1825
1826 va_start(va, format);
1827 msg = ralloc_vasprintf(mem_ctx, format, va);
1828 va_end(va);
1829 msg = ralloc_asprintf(mem_ctx, "%s compile failed: %s\n", stage_abbrev, msg);
1830
1831 this->fail_msg = msg;
1832
1833 if (debug_enabled) {
1834 fprintf(stderr, "%s", msg);
1835 }
1836 }
1837
1838 } /* namespace brw */