Merge remote-tracking branch 'mesa-public/master' into vulkan
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_visitor.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25 #include "brw_cfg.h"
26 #include "glsl/ir_uniform.h"
27 #include "program/sampler.h"
28
29 namespace brw {
30
31 vec4_instruction::vec4_instruction(enum opcode opcode, const dst_reg &dst,
32 const src_reg &src0, const src_reg &src1,
33 const src_reg &src2)
34 {
35 this->opcode = opcode;
36 this->dst = dst;
37 this->src[0] = src0;
38 this->src[1] = src1;
39 this->src[2] = src2;
40 this->saturate = false;
41 this->force_writemask_all = false;
42 this->no_dd_clear = false;
43 this->no_dd_check = false;
44 this->writes_accumulator = false;
45 this->conditional_mod = BRW_CONDITIONAL_NONE;
46 this->predicate = BRW_PREDICATE_NONE;
47 this->predicate_inverse = false;
48 this->target = 0;
49 this->regs_written = (dst.file == BAD_FILE ? 0 : 1);
50 this->shadow_compare = false;
51 this->ir = NULL;
52 this->urb_write_flags = BRW_URB_WRITE_NO_FLAGS;
53 this->header_size = 0;
54 this->flag_subreg = 0;
55 this->mlen = 0;
56 this->base_mrf = 0;
57 this->offset = 0;
58 this->annotation = NULL;
59 }
60
61 vec4_instruction *
62 vec4_visitor::emit(vec4_instruction *inst)
63 {
64 inst->ir = this->base_ir;
65 inst->annotation = this->current_annotation;
66
67 this->instructions.push_tail(inst);
68
69 return inst;
70 }
71
72 vec4_instruction *
73 vec4_visitor::emit_before(bblock_t *block, vec4_instruction *inst,
74 vec4_instruction *new_inst)
75 {
76 new_inst->ir = inst->ir;
77 new_inst->annotation = inst->annotation;
78
79 inst->insert_before(block, new_inst);
80
81 return inst;
82 }
83
84 vec4_instruction *
85 vec4_visitor::emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
86 const src_reg &src1, const src_reg &src2)
87 {
88 return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0, src1, src2));
89 }
90
91
92 vec4_instruction *
93 vec4_visitor::emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
94 const src_reg &src1)
95 {
96 return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0, src1));
97 }
98
99 vec4_instruction *
100 vec4_visitor::emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0)
101 {
102 return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0));
103 }
104
105 vec4_instruction *
106 vec4_visitor::emit(enum opcode opcode, const dst_reg &dst)
107 {
108 return emit(new(mem_ctx) vec4_instruction(opcode, dst));
109 }
110
111 vec4_instruction *
112 vec4_visitor::emit(enum opcode opcode)
113 {
114 return emit(new(mem_ctx) vec4_instruction(opcode, dst_reg()));
115 }
116
117 #define ALU1(op) \
118 vec4_instruction * \
119 vec4_visitor::op(const dst_reg &dst, const src_reg &src0) \
120 { \
121 return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, src0); \
122 }
123
124 #define ALU2(op) \
125 vec4_instruction * \
126 vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
127 const src_reg &src1) \
128 { \
129 return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, \
130 src0, src1); \
131 }
132
133 #define ALU2_ACC(op) \
134 vec4_instruction * \
135 vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
136 const src_reg &src1) \
137 { \
138 vec4_instruction *inst = new(mem_ctx) vec4_instruction( \
139 BRW_OPCODE_##op, dst, src0, src1); \
140 inst->writes_accumulator = true; \
141 return inst; \
142 }
143
144 #define ALU3(op) \
145 vec4_instruction * \
146 vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
147 const src_reg &src1, const src_reg &src2) \
148 { \
149 assert(devinfo->gen >= 6); \
150 return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, \
151 src0, src1, src2); \
152 }
153
154 ALU1(NOT)
155 ALU1(MOV)
156 ALU1(FRC)
157 ALU1(RNDD)
158 ALU1(RNDE)
159 ALU1(RNDZ)
160 ALU1(F32TO16)
161 ALU1(F16TO32)
162 ALU2(ADD)
163 ALU2(MUL)
164 ALU2_ACC(MACH)
165 ALU2(AND)
166 ALU2(OR)
167 ALU2(XOR)
168 ALU2(DP3)
169 ALU2(DP4)
170 ALU2(DPH)
171 ALU2(SHL)
172 ALU2(SHR)
173 ALU2(ASR)
174 ALU3(LRP)
175 ALU1(BFREV)
176 ALU3(BFE)
177 ALU2(BFI1)
178 ALU3(BFI2)
179 ALU1(FBH)
180 ALU1(FBL)
181 ALU1(CBIT)
182 ALU3(MAD)
183 ALU2_ACC(ADDC)
184 ALU2_ACC(SUBB)
185 ALU2(MAC)
186
187 /** Gen4 predicated IF. */
188 vec4_instruction *
189 vec4_visitor::IF(enum brw_predicate predicate)
190 {
191 vec4_instruction *inst;
192
193 inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_IF);
194 inst->predicate = predicate;
195
196 return inst;
197 }
198
199 /** Gen6 IF with embedded comparison. */
200 vec4_instruction *
201 vec4_visitor::IF(src_reg src0, src_reg src1,
202 enum brw_conditional_mod condition)
203 {
204 assert(devinfo->gen == 6);
205
206 vec4_instruction *inst;
207
208 resolve_ud_negate(&src0);
209 resolve_ud_negate(&src1);
210
211 inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_IF, dst_null_d(),
212 src0, src1);
213 inst->conditional_mod = condition;
214
215 return inst;
216 }
217
218 /**
219 * CMP: Sets the low bit of the destination channels with the result
220 * of the comparison, while the upper bits are undefined, and updates
221 * the flag register with the packed 16 bits of the result.
222 */
223 vec4_instruction *
224 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1,
225 enum brw_conditional_mod condition)
226 {
227 vec4_instruction *inst;
228
229 /* Take the instruction:
230 *
231 * CMP null<d> src0<f> src1<f>
232 *
233 * Original gen4 does type conversion to the destination type before
234 * comparison, producing garbage results for floating point comparisons.
235 *
236 * The destination type doesn't matter on newer generations, so we set the
237 * type to match src0 so we can compact the instruction.
238 */
239 dst.type = src0.type;
240 if (dst.file == HW_REG)
241 dst.fixed_hw_reg.type = dst.type;
242
243 resolve_ud_negate(&src0);
244 resolve_ud_negate(&src1);
245
246 inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_CMP, dst, src0, src1);
247 inst->conditional_mod = condition;
248
249 return inst;
250 }
251
252 vec4_instruction *
253 vec4_visitor::SCRATCH_READ(const dst_reg &dst, const src_reg &index)
254 {
255 vec4_instruction *inst;
256
257 inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_READ,
258 dst, index);
259 inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
260 inst->mlen = 2;
261
262 return inst;
263 }
264
265 vec4_instruction *
266 vec4_visitor::SCRATCH_WRITE(const dst_reg &dst, const src_reg &src,
267 const src_reg &index)
268 {
269 vec4_instruction *inst;
270
271 inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_WRITE,
272 dst, src, index);
273 inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen);
274 inst->mlen = 3;
275
276 return inst;
277 }
278
279 src_reg
280 vec4_visitor::fix_3src_operand(const src_reg &src)
281 {
282 /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
283 * able to use vertical stride of zero to replicate the vec4 uniform, like
284 *
285 * g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
286 *
287 * But you can't, since vertical stride is always four in three-source
288 * instructions. Instead, insert a MOV instruction to do the replication so
289 * that the three-source instruction can consume it.
290 */
291
292 /* The MOV is only needed if the source is a uniform or immediate. */
293 if (src.file != UNIFORM && src.file != IMM)
294 return src;
295
296 if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle))
297 return src;
298
299 dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
300 expanded.type = src.type;
301 emit(VEC4_OPCODE_UNPACK_UNIFORM, expanded, src);
302 return src_reg(expanded);
303 }
304
305 src_reg
306 vec4_visitor::resolve_source_modifiers(const src_reg &src)
307 {
308 if (!src.abs && !src.negate)
309 return src;
310
311 dst_reg resolved = dst_reg(this, glsl_type::ivec4_type);
312 resolved.type = src.type;
313 emit(MOV(resolved, src));
314
315 return src_reg(resolved);
316 }
317
318 src_reg
319 vec4_visitor::fix_math_operand(const src_reg &src)
320 {
321 if (devinfo->gen < 6 || devinfo->gen >= 8 || src.file == BAD_FILE)
322 return src;
323
324 /* The gen6 math instruction ignores the source modifiers --
325 * swizzle, abs, negate, and at least some parts of the register
326 * region description.
327 *
328 * Rather than trying to enumerate all these cases, *always* expand the
329 * operand to a temp GRF for gen6.
330 *
331 * For gen7, keep the operand as-is, except if immediate, which gen7 still
332 * can't use.
333 */
334
335 if (devinfo->gen == 7 && src.file != IMM)
336 return src;
337
338 dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
339 expanded.type = src.type;
340 emit(MOV(expanded, src));
341 return src_reg(expanded);
342 }
343
344 vec4_instruction *
345 vec4_visitor::emit_math(enum opcode opcode,
346 const dst_reg &dst,
347 const src_reg &src0, const src_reg &src1)
348 {
349 vec4_instruction *math =
350 emit(opcode, dst, fix_math_operand(src0), fix_math_operand(src1));
351
352 if (devinfo->gen == 6 && dst.writemask != WRITEMASK_XYZW) {
353 /* MATH on Gen6 must be align1, so we can't do writemasks. */
354 math->dst = dst_reg(this, glsl_type::vec4_type);
355 math->dst.type = dst.type;
356 math = emit(MOV(dst, src_reg(math->dst)));
357 } else if (devinfo->gen < 6) {
358 math->base_mrf = 1;
359 math->mlen = src1.file == BAD_FILE ? 1 : 2;
360 }
361
362 return math;
363 }
364
365 void
366 vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0)
367 {
368 if (devinfo->gen < 7) {
369 unreachable("ir_unop_pack_half_2x16 should be lowered");
370 }
371
372 assert(dst.type == BRW_REGISTER_TYPE_UD);
373 assert(src0.type == BRW_REGISTER_TYPE_F);
374
375 /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
376 *
377 * Because this instruction does not have a 16-bit floating-point type,
378 * the destination data type must be Word (W).
379 *
380 * The destination must be DWord-aligned and specify a horizontal stride
381 * (HorzStride) of 2. The 16-bit result is stored in the lower word of
382 * each destination channel and the upper word is not modified.
383 *
384 * The above restriction implies that the f32to16 instruction must use
385 * align1 mode, because only in align1 mode is it possible to specify
386 * horizontal stride. We choose here to defy the hardware docs and emit
387 * align16 instructions.
388 *
389 * (I [chadv] did attempt to emit align1 instructions for VS f32to16
390 * instructions. I was partially successful in that the code passed all
391 * tests. However, the code was dubiously correct and fragile, and the
392 * tests were not harsh enough to probe that frailty. Not trusting the
393 * code, I chose instead to remain in align16 mode in defiance of the hw
394 * docs).
395 *
396 * I've [chadv] experimentally confirmed that, on gen7 hardware and the
397 * simulator, emitting a f32to16 in align16 mode with UD as destination
398 * data type is safe. The behavior differs from that specified in the PRM
399 * in that the upper word of each destination channel is cleared to 0.
400 */
401
402 dst_reg tmp_dst(this, glsl_type::uvec2_type);
403 src_reg tmp_src(tmp_dst);
404
405 #if 0
406 /* Verify the undocumented behavior on which the following instructions
407 * rely. If f32to16 fails to clear the upper word of the X and Y channels,
408 * then the result of the bit-or instruction below will be incorrect.
409 *
410 * You should inspect the disasm output in order to verify that the MOV is
411 * not optimized away.
412 */
413 emit(MOV(tmp_dst, src_reg(0x12345678u)));
414 #endif
415
416 /* Give tmp the form below, where "." means untouched.
417 *
418 * w z y x w z y x
419 * |.|.|0x0000hhhh|0x0000llll|.|.|0x0000hhhh|0x0000llll|
420 *
421 * That the upper word of each write-channel be 0 is required for the
422 * following bit-shift and bit-or instructions to work. Note that this
423 * relies on the undocumented hardware behavior mentioned above.
424 */
425 tmp_dst.writemask = WRITEMASK_XY;
426 emit(F32TO16(tmp_dst, src0));
427
428 /* Give the write-channels of dst the form:
429 * 0xhhhh0000
430 */
431 tmp_src.swizzle = BRW_SWIZZLE_YYYY;
432 emit(SHL(dst, tmp_src, src_reg(16u)));
433
434 /* Finally, give the write-channels of dst the form of packHalf2x16's
435 * output:
436 * 0xhhhhllll
437 */
438 tmp_src.swizzle = BRW_SWIZZLE_XXXX;
439 emit(OR(dst, src_reg(dst), tmp_src));
440 }
441
442 void
443 vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0)
444 {
445 if (devinfo->gen < 7) {
446 unreachable("ir_unop_unpack_half_2x16 should be lowered");
447 }
448
449 assert(dst.type == BRW_REGISTER_TYPE_F);
450 assert(src0.type == BRW_REGISTER_TYPE_UD);
451
452 /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
453 *
454 * Because this instruction does not have a 16-bit floating-point type,
455 * the source data type must be Word (W). The destination type must be
456 * F (Float).
457 *
458 * To use W as the source data type, we must adjust horizontal strides,
459 * which is only possible in align1 mode. All my [chadv] attempts at
460 * emitting align1 instructions for unpackHalf2x16 failed to pass the
461 * Piglit tests, so I gave up.
462 *
463 * I've verified that, on gen7 hardware and the simulator, it is safe to
464 * emit f16to32 in align16 mode with UD as source data type.
465 */
466
467 dst_reg tmp_dst(this, glsl_type::uvec2_type);
468 src_reg tmp_src(tmp_dst);
469
470 tmp_dst.writemask = WRITEMASK_X;
471 emit(AND(tmp_dst, src0, src_reg(0xffffu)));
472
473 tmp_dst.writemask = WRITEMASK_Y;
474 emit(SHR(tmp_dst, src0, src_reg(16u)));
475
476 dst.writemask = WRITEMASK_XY;
477 emit(F16TO32(dst, tmp_src));
478 }
479
480 void
481 vec4_visitor::emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0)
482 {
483 /* Instead of splitting the 32-bit integer, shifting, and ORing it back
484 * together, we can shift it by <0, 8, 16, 24>. The packed integer immediate
485 * is not suitable to generate the shift values, but we can use the packed
486 * vector float and a type-converting MOV.
487 */
488 dst_reg shift(this, glsl_type::uvec4_type);
489 emit(MOV(shift, src_reg(0x00, 0x60, 0x70, 0x78)));
490
491 dst_reg shifted(this, glsl_type::uvec4_type);
492 src0.swizzle = BRW_SWIZZLE_XXXX;
493 emit(SHR(shifted, src0, src_reg(shift)));
494
495 shifted.type = BRW_REGISTER_TYPE_UB;
496 dst_reg f(this, glsl_type::vec4_type);
497 emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted));
498
499 emit(MUL(dst, src_reg(f), src_reg(1.0f / 255.0f)));
500 }
501
502 void
503 vec4_visitor::emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0)
504 {
505 /* Instead of splitting the 32-bit integer, shifting, and ORing it back
506 * together, we can shift it by <0, 8, 16, 24>. The packed integer immediate
507 * is not suitable to generate the shift values, but we can use the packed
508 * vector float and a type-converting MOV.
509 */
510 dst_reg shift(this, glsl_type::uvec4_type);
511 emit(MOV(shift, src_reg(0x00, 0x60, 0x70, 0x78)));
512
513 dst_reg shifted(this, glsl_type::uvec4_type);
514 src0.swizzle = BRW_SWIZZLE_XXXX;
515 emit(SHR(shifted, src0, src_reg(shift)));
516
517 shifted.type = BRW_REGISTER_TYPE_B;
518 dst_reg f(this, glsl_type::vec4_type);
519 emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted));
520
521 dst_reg scaled(this, glsl_type::vec4_type);
522 emit(MUL(scaled, src_reg(f), src_reg(1.0f / 127.0f)));
523
524 dst_reg max(this, glsl_type::vec4_type);
525 emit_minmax(BRW_CONDITIONAL_GE, max, src_reg(scaled), src_reg(-1.0f));
526 emit_minmax(BRW_CONDITIONAL_L, dst, src_reg(max), src_reg(1.0f));
527 }
528
529 void
530 vec4_visitor::emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0)
531 {
532 dst_reg saturated(this, glsl_type::vec4_type);
533 vec4_instruction *inst = emit(MOV(saturated, src0));
534 inst->saturate = true;
535
536 dst_reg scaled(this, glsl_type::vec4_type);
537 emit(MUL(scaled, src_reg(saturated), src_reg(255.0f)));
538
539 dst_reg rounded(this, glsl_type::vec4_type);
540 emit(RNDE(rounded, src_reg(scaled)));
541
542 dst_reg u(this, glsl_type::uvec4_type);
543 emit(MOV(u, src_reg(rounded)));
544
545 src_reg bytes(u);
546 emit(VEC4_OPCODE_PACK_BYTES, dst, bytes);
547 }
548
549 void
550 vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0)
551 {
552 dst_reg max(this, glsl_type::vec4_type);
553 emit_minmax(BRW_CONDITIONAL_GE, max, src0, src_reg(-1.0f));
554
555 dst_reg min(this, glsl_type::vec4_type);
556 emit_minmax(BRW_CONDITIONAL_L, min, src_reg(max), src_reg(1.0f));
557
558 dst_reg scaled(this, glsl_type::vec4_type);
559 emit(MUL(scaled, src_reg(min), src_reg(127.0f)));
560
561 dst_reg rounded(this, glsl_type::vec4_type);
562 emit(RNDE(rounded, src_reg(scaled)));
563
564 dst_reg i(this, glsl_type::ivec4_type);
565 emit(MOV(i, src_reg(rounded)));
566
567 src_reg bytes(i);
568 emit(VEC4_OPCODE_PACK_BYTES, dst, bytes);
569 }
570
571 /**
572 * Returns the minimum number of vec4 elements needed to pack a type.
573 *
574 * For simple types, it will return 1 (a single vec4); for matrices, the
575 * number of columns; for array and struct, the sum of the vec4_size of
576 * each of its elements; and for sampler and atomic, zero.
577 *
578 * This method is useful to calculate how much register space is needed to
579 * store a particular type.
580 */
581 extern "C" int
582 type_size_vec4(const struct glsl_type *type)
583 {
584 unsigned int i;
585 int size;
586
587 switch (type->base_type) {
588 case GLSL_TYPE_UINT:
589 case GLSL_TYPE_INT:
590 case GLSL_TYPE_FLOAT:
591 case GLSL_TYPE_BOOL:
592 if (type->is_matrix()) {
593 return type->matrix_columns;
594 } else {
595 /* Regardless of size of vector, it gets a vec4. This is bad
596 * packing for things like floats, but otherwise arrays become a
597 * mess. Hopefully a later pass over the code can pack scalars
598 * down if appropriate.
599 */
600 return 1;
601 }
602 case GLSL_TYPE_ARRAY:
603 assert(type->length > 0);
604 return type_size_vec4(type->fields.array) * type->length;
605 case GLSL_TYPE_STRUCT:
606 size = 0;
607 for (i = 0; i < type->length; i++) {
608 size += type_size_vec4(type->fields.structure[i].type);
609 }
610 return size;
611 case GLSL_TYPE_SUBROUTINE:
612 return 1;
613
614 case GLSL_TYPE_SAMPLER:
615 /* Samplers take up no register space, since they're baked in at
616 * link time.
617 */
618 return 0;
619 case GLSL_TYPE_ATOMIC_UINT:
620 return 0;
621 case GLSL_TYPE_IMAGE:
622 return DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4);
623 case GLSL_TYPE_VOID:
624 case GLSL_TYPE_DOUBLE:
625 case GLSL_TYPE_ERROR:
626 case GLSL_TYPE_INTERFACE:
627 case GLSL_TYPE_FUNCTION:
628 unreachable("not reached");
629 }
630
631 return 0;
632 }
633
634 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
635 {
636 init();
637
638 this->file = GRF;
639 this->reg = v->alloc.allocate(type_size_vec4(type));
640
641 if (type->is_array() || type->is_record()) {
642 this->swizzle = BRW_SWIZZLE_NOOP;
643 } else {
644 this->swizzle = brw_swizzle_for_size(type->vector_elements);
645 }
646
647 this->type = brw_type_for_base_type(type);
648 }
649
650 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size)
651 {
652 assert(size > 0);
653
654 init();
655
656 this->file = GRF;
657 this->reg = v->alloc.allocate(type_size_vec4(type) * size);
658
659 this->swizzle = BRW_SWIZZLE_NOOP;
660
661 this->type = brw_type_for_base_type(type);
662 }
663
664 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
665 {
666 init();
667
668 this->file = GRF;
669 this->reg = v->alloc.allocate(type_size_vec4(type));
670
671 if (type->is_array() || type->is_record()) {
672 this->writemask = WRITEMASK_XYZW;
673 } else {
674 this->writemask = (1 << type->vector_elements) - 1;
675 }
676
677 this->type = brw_type_for_base_type(type);
678 }
679
680 vec4_instruction *
681 vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
682 src_reg src0, src_reg src1)
683 {
684 vec4_instruction *inst;
685
686 if (devinfo->gen >= 6) {
687 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
688 inst->conditional_mod = conditionalmod;
689 } else {
690 emit(CMP(dst, src0, src1, conditionalmod));
691
692 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
693 inst->predicate = BRW_PREDICATE_NORMAL;
694 }
695
696 return inst;
697 }
698
699 vec4_instruction *
700 vec4_visitor::emit_lrp(const dst_reg &dst,
701 const src_reg &x, const src_reg &y, const src_reg &a)
702 {
703 if (devinfo->gen >= 6) {
704 /* Note that the instruction's argument order is reversed from GLSL
705 * and the IR.
706 */
707 return emit(LRP(dst, fix_3src_operand(a), fix_3src_operand(y),
708 fix_3src_operand(x)));
709 } else {
710 /* Earlier generations don't support three source operations, so we
711 * need to emit x*(1-a) + y*a.
712 */
713 dst_reg y_times_a = dst_reg(this, glsl_type::vec4_type);
714 dst_reg one_minus_a = dst_reg(this, glsl_type::vec4_type);
715 dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type);
716 y_times_a.writemask = dst.writemask;
717 one_minus_a.writemask = dst.writemask;
718 x_times_one_minus_a.writemask = dst.writemask;
719
720 emit(MUL(y_times_a, y, a));
721 emit(ADD(one_minus_a, negate(a), src_reg(1.0f)));
722 emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a)));
723 return emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)));
724 }
725 }
726
727 /**
728 * Emits the instructions needed to perform a pull constant load. before_block
729 * and before_inst can be NULL in which case the instruction will be appended
730 * to the end of the instruction list.
731 */
732 void
733 vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
734 src_reg surf_index,
735 src_reg offset_reg,
736 bblock_t *before_block,
737 vec4_instruction *before_inst)
738 {
739 assert((before_inst == NULL && before_block == NULL) ||
740 (before_inst && before_block));
741
742 vec4_instruction *pull;
743
744 if (devinfo->gen >= 9) {
745 /* Gen9+ needs a message header in order to use SIMD4x2 mode */
746 src_reg header(this, glsl_type::uvec4_type, 2);
747
748 pull = new(mem_ctx)
749 vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
750 dst_reg(header));
751
752 if (before_inst)
753 emit_before(before_block, before_inst, pull);
754 else
755 emit(pull);
756
757 dst_reg index_reg = retype(offset(dst_reg(header), 1),
758 offset_reg.type);
759 pull = MOV(writemask(index_reg, WRITEMASK_X), offset_reg);
760
761 if (before_inst)
762 emit_before(before_block, before_inst, pull);
763 else
764 emit(pull);
765
766 pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
767 dst,
768 surf_index,
769 header);
770 pull->mlen = 2;
771 pull->header_size = 1;
772 } else if (devinfo->gen >= 7) {
773 dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
774
775 grf_offset.type = offset_reg.type;
776
777 pull = MOV(grf_offset, offset_reg);
778
779 if (before_inst)
780 emit_before(before_block, before_inst, pull);
781 else
782 emit(pull);
783
784 pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
785 dst,
786 surf_index,
787 src_reg(grf_offset));
788 pull->mlen = 1;
789 } else {
790 pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD,
791 dst,
792 surf_index,
793 offset_reg);
794 pull->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->gen) + 1;
795 pull->mlen = 1;
796 }
797
798 if (before_inst)
799 emit_before(before_block, before_inst, pull);
800 else
801 emit(pull);
802 }
803
804 src_reg
805 vec4_visitor::emit_uniformize(const src_reg &src)
806 {
807 const src_reg chan_index(this, glsl_type::uint_type);
808 const dst_reg dst = retype(dst_reg(this, glsl_type::uint_type),
809 src.type);
810
811 emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, dst_reg(chan_index))
812 ->force_writemask_all = true;
813 emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index)
814 ->force_writemask_all = true;
815
816 return src_reg(dst);
817 }
818
819 src_reg
820 vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type,
821 src_reg coordinate, src_reg sampler)
822 {
823 vec4_instruction *inst =
824 new(mem_ctx) vec4_instruction(SHADER_OPCODE_TXF_MCS,
825 dst_reg(this, glsl_type::uvec4_type));
826 inst->base_mrf = 2;
827 inst->src[1] = sampler;
828
829 int param_base;
830
831 if (devinfo->gen >= 9) {
832 /* Gen9+ needs a message header in order to use SIMD4x2 mode */
833 vec4_instruction *header_inst = new(mem_ctx)
834 vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
835 dst_reg(MRF, inst->base_mrf));
836
837 emit(header_inst);
838
839 inst->mlen = 2;
840 inst->header_size = 1;
841 param_base = inst->base_mrf + 1;
842 } else {
843 inst->mlen = 1;
844 param_base = inst->base_mrf;
845 }
846
847 /* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */
848 int coord_mask = (1 << coordinate_type->vector_elements) - 1;
849 int zero_mask = 0xf & ~coord_mask;
850
851 emit(MOV(dst_reg(MRF, param_base, coordinate_type, coord_mask),
852 coordinate));
853
854 emit(MOV(dst_reg(MRF, param_base, coordinate_type, zero_mask),
855 src_reg(0)));
856
857 emit(inst);
858 return src_reg(inst->dst);
859 }
860
861 bool
862 vec4_visitor::is_high_sampler(src_reg sampler)
863 {
864 if (devinfo->gen < 8 && !devinfo->is_haswell)
865 return false;
866
867 return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16;
868 }
869
870 void
871 vec4_visitor::emit_texture(ir_texture_opcode op,
872 dst_reg dest,
873 const glsl_type *dest_type,
874 src_reg coordinate,
875 int coord_components,
876 src_reg shadow_comparitor,
877 src_reg lod, src_reg lod2,
878 src_reg sample_index,
879 uint32_t constant_offset,
880 src_reg offset_value,
881 src_reg mcs,
882 bool is_cube_array,
883 uint32_t sampler,
884 src_reg sampler_reg)
885 {
886 enum opcode opcode;
887 switch (op) {
888 case ir_tex: opcode = SHADER_OPCODE_TXL; break;
889 case ir_txl: opcode = SHADER_OPCODE_TXL; break;
890 case ir_txd: opcode = SHADER_OPCODE_TXD; break;
891 case ir_txf: opcode = SHADER_OPCODE_TXF; break;
892 case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break;
893 case ir_txs: opcode = SHADER_OPCODE_TXS; break;
894 case ir_tg4: opcode = offset_value.file != BAD_FILE
895 ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
896 case ir_query_levels: opcode = SHADER_OPCODE_TXS; break;
897 case ir_texture_samples: opcode = SHADER_OPCODE_SAMPLEINFO; break;
898 case ir_txb:
899 unreachable("TXB is not valid for vertex shaders.");
900 case ir_lod:
901 unreachable("LOD is not valid for vertex shaders.");
902 default:
903 unreachable("Unrecognized tex op");
904 }
905
906 vec4_instruction *inst = new(mem_ctx) vec4_instruction(
907 opcode, dst_reg(this, dest_type));
908
909 inst->offset = constant_offset;
910
911 /* The message header is necessary for:
912 * - Gen4 (always)
913 * - Gen9+ for selecting SIMD4x2
914 * - Texel offsets
915 * - Gather channel selection
916 * - Sampler indices too large to fit in a 4-bit value.
917 * - Sampleinfo message - takes no parameters, but mlen = 0 is illegal
918 */
919 inst->header_size =
920 (devinfo->gen < 5 || devinfo->gen >= 9 ||
921 inst->offset != 0 || op == ir_tg4 ||
922 op == ir_texture_samples ||
923 is_high_sampler(sampler_reg)) ? 1 : 0;
924 inst->base_mrf = 2;
925 inst->mlen = inst->header_size;
926 inst->dst.writemask = WRITEMASK_XYZW;
927 inst->shadow_compare = shadow_comparitor.file != BAD_FILE;
928
929 inst->src[1] = sampler_reg;
930
931 /* MRF for the first parameter */
932 int param_base = inst->base_mrf + inst->header_size;
933
934 if (op == ir_txs || op == ir_query_levels) {
935 int writemask = devinfo->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
936 emit(MOV(dst_reg(MRF, param_base, lod.type, writemask), lod));
937 inst->mlen++;
938 } else if (op == ir_texture_samples) {
939 inst->dst.writemask = WRITEMASK_X;
940 } else {
941 /* Load the coordinate */
942 /* FINISHME: gl_clamp_mask and saturate */
943 int coord_mask = (1 << coord_components) - 1;
944 int zero_mask = 0xf & ~coord_mask;
945
946 emit(MOV(dst_reg(MRF, param_base, coordinate.type, coord_mask),
947 coordinate));
948 inst->mlen++;
949
950 if (zero_mask != 0) {
951 emit(MOV(dst_reg(MRF, param_base, coordinate.type, zero_mask),
952 src_reg(0)));
953 }
954 /* Load the shadow comparitor */
955 if (shadow_comparitor.file != BAD_FILE && op != ir_txd && (op != ir_tg4 || offset_value.file == BAD_FILE)) {
956 emit(MOV(dst_reg(MRF, param_base + 1, shadow_comparitor.type,
957 WRITEMASK_X),
958 shadow_comparitor));
959 inst->mlen++;
960 }
961
962 /* Load the LOD info */
963 if (op == ir_tex || op == ir_txl) {
964 int mrf, writemask;
965 if (devinfo->gen >= 5) {
966 mrf = param_base + 1;
967 if (shadow_comparitor.file != BAD_FILE) {
968 writemask = WRITEMASK_Y;
969 /* mlen already incremented */
970 } else {
971 writemask = WRITEMASK_X;
972 inst->mlen++;
973 }
974 } else /* devinfo->gen == 4 */ {
975 mrf = param_base;
976 writemask = WRITEMASK_W;
977 }
978 emit(MOV(dst_reg(MRF, mrf, lod.type, writemask), lod));
979 } else if (op == ir_txf) {
980 emit(MOV(dst_reg(MRF, param_base, lod.type, WRITEMASK_W), lod));
981 } else if (op == ir_txf_ms) {
982 emit(MOV(dst_reg(MRF, param_base + 1, sample_index.type, WRITEMASK_X),
983 sample_index));
984 if (devinfo->gen >= 7) {
985 /* MCS data is in the first channel of `mcs`, but we need to get it into
986 * the .y channel of the second vec4 of params, so replicate .x across
987 * the whole vec4 and then mask off everything except .y
988 */
989 mcs.swizzle = BRW_SWIZZLE_XXXX;
990 emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::uint_type, WRITEMASK_Y),
991 mcs));
992 }
993 inst->mlen++;
994 } else if (op == ir_txd) {
995 const brw_reg_type type = lod.type;
996
997 if (devinfo->gen >= 5) {
998 lod.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
999 lod2.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
1000 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), lod));
1001 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), lod2));
1002 inst->mlen++;
1003
1004 if (dest_type->vector_elements == 3 || shadow_comparitor.file != BAD_FILE) {
1005 lod.swizzle = BRW_SWIZZLE_ZZZZ;
1006 lod2.swizzle = BRW_SWIZZLE_ZZZZ;
1007 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), lod));
1008 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), lod2));
1009 inst->mlen++;
1010
1011 if (shadow_comparitor.file != BAD_FILE) {
1012 emit(MOV(dst_reg(MRF, param_base + 2,
1013 shadow_comparitor.type, WRITEMASK_Z),
1014 shadow_comparitor));
1015 }
1016 }
1017 } else /* devinfo->gen == 4 */ {
1018 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), lod));
1019 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), lod2));
1020 inst->mlen += 2;
1021 }
1022 } else if (op == ir_tg4 && offset_value.file != BAD_FILE) {
1023 if (shadow_comparitor.file != BAD_FILE) {
1024 emit(MOV(dst_reg(MRF, param_base, shadow_comparitor.type, WRITEMASK_W),
1025 shadow_comparitor));
1026 }
1027
1028 emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::ivec2_type, WRITEMASK_XY),
1029 offset_value));
1030 inst->mlen++;
1031 }
1032 }
1033
1034 emit(inst);
1035
1036 /* fixup num layers (z) for cube arrays: hardware returns faces * layers;
1037 * spec requires layers.
1038 */
1039 if (op == ir_txs && is_cube_array) {
1040 emit_math(SHADER_OPCODE_INT_QUOTIENT,
1041 writemask(inst->dst, WRITEMASK_Z),
1042 src_reg(inst->dst), src_reg(6));
1043 }
1044
1045 if (devinfo->gen == 6 && op == ir_tg4) {
1046 emit_gen6_gather_wa(key_tex->gen6_gather_wa[sampler], inst->dst);
1047 }
1048
1049 swizzle_result(op, dest,
1050 src_reg(inst->dst), sampler, dest_type);
1051 }
1052
1053 /**
1054 * Apply workarounds for Gen6 gather with UINT/SINT
1055 */
1056 void
1057 vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst)
1058 {
1059 if (!wa)
1060 return;
1061
1062 int width = (wa & WA_8BIT) ? 8 : 16;
1063 dst_reg dst_f = dst;
1064 dst_f.type = BRW_REGISTER_TYPE_F;
1065
1066 /* Convert from UNORM to UINT */
1067 emit(MUL(dst_f, src_reg(dst_f), src_reg((float)((1 << width) - 1))));
1068 emit(MOV(dst, src_reg(dst_f)));
1069
1070 if (wa & WA_SIGN) {
1071 /* Reinterpret the UINT value as a signed INT value by
1072 * shifting the sign bit into place, then shifting back
1073 * preserving sign.
1074 */
1075 emit(SHL(dst, src_reg(dst), src_reg(32 - width)));
1076 emit(ASR(dst, src_reg(dst), src_reg(32 - width)));
1077 }
1078 }
1079
1080 /**
1081 * Set up the gather channel based on the swizzle, for gather4.
1082 */
1083 uint32_t
1084 vec4_visitor::gather_channel(unsigned gather_component, uint32_t sampler)
1085 {
1086 int swiz = GET_SWZ(key_tex->swizzles[sampler], gather_component);
1087 switch (swiz) {
1088 case SWIZZLE_X: return 0;
1089 case SWIZZLE_Y:
1090 /* gather4 sampler is broken for green channel on RG32F --
1091 * we must ask for blue instead.
1092 */
1093 if (key_tex->gather_channel_quirk_mask & (1 << sampler))
1094 return 2;
1095 return 1;
1096 case SWIZZLE_Z: return 2;
1097 case SWIZZLE_W: return 3;
1098 default:
1099 unreachable("Not reached"); /* zero, one swizzles handled already */
1100 }
1101 }
1102
1103 void
1104 vec4_visitor::swizzle_result(ir_texture_opcode op, dst_reg dest,
1105 src_reg orig_val, uint32_t sampler,
1106 const glsl_type *dest_type)
1107 {
1108 int s = key_tex->swizzles[sampler];
1109
1110 dst_reg swizzled_result = dest;
1111
1112 if (op == ir_query_levels) {
1113 /* # levels is in .w */
1114 orig_val.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
1115 emit(MOV(swizzled_result, orig_val));
1116 return;
1117 }
1118
1119 if (op == ir_txs || dest_type == glsl_type::float_type
1120 || s == SWIZZLE_NOOP || op == ir_tg4) {
1121 emit(MOV(swizzled_result, orig_val));
1122 return;
1123 }
1124
1125
1126 int zero_mask = 0, one_mask = 0, copy_mask = 0;
1127 int swizzle[4] = {0};
1128
1129 for (int i = 0; i < 4; i++) {
1130 switch (GET_SWZ(s, i)) {
1131 case SWIZZLE_ZERO:
1132 zero_mask |= (1 << i);
1133 break;
1134 case SWIZZLE_ONE:
1135 one_mask |= (1 << i);
1136 break;
1137 default:
1138 copy_mask |= (1 << i);
1139 swizzle[i] = GET_SWZ(s, i);
1140 break;
1141 }
1142 }
1143
1144 if (copy_mask) {
1145 orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1146 swizzled_result.writemask = copy_mask;
1147 emit(MOV(swizzled_result, orig_val));
1148 }
1149
1150 if (zero_mask) {
1151 swizzled_result.writemask = zero_mask;
1152 emit(MOV(swizzled_result, src_reg(0.0f)));
1153 }
1154
1155 if (one_mask) {
1156 swizzled_result.writemask = one_mask;
1157 emit(MOV(swizzled_result, src_reg(1.0f)));
1158 }
1159 }
1160
1161 void
1162 vec4_visitor::gs_emit_vertex(int stream_id)
1163 {
1164 unreachable("not reached");
1165 }
1166
1167 void
1168 vec4_visitor::gs_end_primitive()
1169 {
1170 unreachable("not reached");
1171 }
1172
1173 void
1174 vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
1175 dst_reg dst, src_reg offset,
1176 src_reg src0, src_reg src1)
1177 {
1178 unsigned mlen = 0;
1179
1180 /* Set the atomic operation offset. */
1181 emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), offset));
1182 mlen++;
1183
1184 /* Set the atomic operation arguments. */
1185 if (src0.file != BAD_FILE) {
1186 emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src0));
1187 mlen++;
1188 }
1189
1190 if (src1.file != BAD_FILE) {
1191 emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src1));
1192 mlen++;
1193 }
1194
1195 /* Emit the instruction. Note that this maps to the normal SIMD8
1196 * untyped atomic message on Ivy Bridge, but that's OK because
1197 * unused channels will be masked out.
1198 */
1199 vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst,
1200 brw_message_reg(0),
1201 src_reg(surf_index), src_reg(atomic_op));
1202 inst->mlen = mlen;
1203 }
1204
1205 void
1206 vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
1207 src_reg offset)
1208 {
1209 /* Set the surface read offset. */
1210 emit(MOV(brw_writemask(brw_uvec_mrf(8, 0, 0), WRITEMASK_X), offset));
1211
1212 /* Emit the instruction. Note that this maps to the normal SIMD8
1213 * untyped surface read message, but that's OK because unused
1214 * channels will be masked out.
1215 */
1216 vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst,
1217 brw_message_reg(0),
1218 src_reg(surf_index), src_reg(1));
1219 inst->mlen = 1;
1220 }
1221
1222 void
1223 vec4_visitor::emit_ndc_computation()
1224 {
1225 /* Get the position */
1226 src_reg pos = src_reg(output_reg[VARYING_SLOT_POS]);
1227
1228 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
1229 dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
1230 output_reg[BRW_VARYING_SLOT_NDC] = ndc;
1231
1232 current_annotation = "NDC";
1233 dst_reg ndc_w = ndc;
1234 ndc_w.writemask = WRITEMASK_W;
1235 src_reg pos_w = pos;
1236 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
1237 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
1238
1239 dst_reg ndc_xyz = ndc;
1240 ndc_xyz.writemask = WRITEMASK_XYZ;
1241
1242 emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
1243 }
1244
1245 void
1246 vec4_visitor::emit_psiz_and_flags(dst_reg reg)
1247 {
1248 if (devinfo->gen < 6 &&
1249 ((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) ||
1250 output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE ||
1251 devinfo->has_negative_rhw_bug)) {
1252 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
1253 dst_reg header1_w = header1;
1254 header1_w.writemask = WRITEMASK_W;
1255
1256 emit(MOV(header1, 0u));
1257
1258 if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
1259 src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]);
1260
1261 current_annotation = "Point size";
1262 emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
1263 emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
1264 }
1265
1266 if (output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE) {
1267 current_annotation = "Clipping flags";
1268 dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
1269 dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
1270
1271 emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), src_reg(0.0f), BRW_CONDITIONAL_L));
1272 emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, src_reg(0));
1273 emit(OR(header1_w, src_reg(header1_w), src_reg(flags0)));
1274
1275 emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), src_reg(0.0f), BRW_CONDITIONAL_L));
1276 emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, src_reg(0));
1277 emit(SHL(flags1, src_reg(flags1), src_reg(4)));
1278 emit(OR(header1_w, src_reg(header1_w), src_reg(flags1)));
1279 }
1280
1281 /* i965 clipping workaround:
1282 * 1) Test for -ve rhw
1283 * 2) If set,
1284 * set ndc = (0,0,0,0)
1285 * set ucp[6] = 1
1286 *
1287 * Later, clipping will detect ucp[6] and ensure the primitive is
1288 * clipped against all fixed planes.
1289 */
1290 if (devinfo->has_negative_rhw_bug) {
1291 src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]);
1292 ndc_w.swizzle = BRW_SWIZZLE_WWWW;
1293 emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L));
1294 vec4_instruction *inst;
1295 inst = emit(OR(header1_w, src_reg(header1_w), src_reg(1u << 6)));
1296 inst->predicate = BRW_PREDICATE_NORMAL;
1297 output_reg[BRW_VARYING_SLOT_NDC].type = BRW_REGISTER_TYPE_F;
1298 inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f)));
1299 inst->predicate = BRW_PREDICATE_NORMAL;
1300 }
1301
1302 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
1303 } else if (devinfo->gen < 6) {
1304 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
1305 } else {
1306 emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
1307 if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
1308 dst_reg reg_w = reg;
1309 reg_w.writemask = WRITEMASK_W;
1310 src_reg reg_as_src = src_reg(output_reg[VARYING_SLOT_PSIZ]);
1311 reg_as_src.type = reg_w.type;
1312 reg_as_src.swizzle = brw_swizzle_for_size(1);
1313 emit(MOV(reg_w, reg_as_src));
1314 }
1315 if (prog_data->vue_map.slots_valid & VARYING_BIT_LAYER) {
1316 dst_reg reg_y = reg;
1317 reg_y.writemask = WRITEMASK_Y;
1318 reg_y.type = BRW_REGISTER_TYPE_D;
1319 output_reg[VARYING_SLOT_LAYER].type = reg_y.type;
1320 emit(MOV(reg_y, src_reg(output_reg[VARYING_SLOT_LAYER])));
1321 }
1322 if (prog_data->vue_map.slots_valid & VARYING_BIT_VIEWPORT) {
1323 dst_reg reg_z = reg;
1324 reg_z.writemask = WRITEMASK_Z;
1325 reg_z.type = BRW_REGISTER_TYPE_D;
1326 output_reg[VARYING_SLOT_VIEWPORT].type = reg_z.type;
1327 emit(MOV(reg_z, src_reg(output_reg[VARYING_SLOT_VIEWPORT])));
1328 }
1329 }
1330 }
1331
1332 vec4_instruction *
1333 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
1334 {
1335 assert(varying < VARYING_SLOT_MAX);
1336 assert(output_reg[varying].type == reg.type);
1337 current_annotation = output_reg_annotation[varying];
1338 /* Copy the register, saturating if necessary */
1339 return emit(MOV(reg, src_reg(output_reg[varying])));
1340 }
1341
1342 void
1343 vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
1344 {
1345 reg.type = BRW_REGISTER_TYPE_F;
1346 output_reg[varying].type = reg.type;
1347
1348 switch (varying) {
1349 case VARYING_SLOT_PSIZ:
1350 {
1351 /* PSIZ is always in slot 0, and is coupled with other flags. */
1352 current_annotation = "indices, point width, clip flags";
1353 emit_psiz_and_flags(reg);
1354 break;
1355 }
1356 case BRW_VARYING_SLOT_NDC:
1357 current_annotation = "NDC";
1358 emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
1359 break;
1360 case VARYING_SLOT_POS:
1361 current_annotation = "gl_Position";
1362 emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
1363 break;
1364 case VARYING_SLOT_EDGE:
1365 /* This is present when doing unfilled polygons. We're supposed to copy
1366 * the edge flag from the user-provided vertex array
1367 * (glEdgeFlagPointer), or otherwise we'll copy from the current value
1368 * of that attribute (starts as 1.0f). This is then used in clipping to
1369 * determine which edges should be drawn as wireframe.
1370 */
1371 current_annotation = "edge flag";
1372 emit(MOV(reg, src_reg(dst_reg(ATTR, VERT_ATTRIB_EDGEFLAG,
1373 glsl_type::float_type, WRITEMASK_XYZW))));
1374 break;
1375 case BRW_VARYING_SLOT_PAD:
1376 /* No need to write to this slot */
1377 break;
1378 default:
1379 emit_generic_urb_slot(reg, varying);
1380 break;
1381 }
1382 }
1383
1384 static int
1385 align_interleaved_urb_mlen(const struct brw_device_info *devinfo, int mlen)
1386 {
1387 if (devinfo->gen >= 6) {
1388 /* URB data written (does not include the message header reg) must
1389 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
1390 * section 5.4.3.2.2: URB_INTERLEAVED.
1391 *
1392 * URB entries are allocated on a multiple of 1024 bits, so an
1393 * extra 128 bits written here to make the end align to 256 is
1394 * no problem.
1395 */
1396 if ((mlen % 2) != 1)
1397 mlen++;
1398 }
1399
1400 return mlen;
1401 }
1402
1403
1404 /**
1405 * Generates the VUE payload plus the necessary URB write instructions to
1406 * output it.
1407 *
1408 * The VUE layout is documented in Volume 2a.
1409 */
1410 void
1411 vec4_visitor::emit_vertex()
1412 {
1413 /* MRF 0 is reserved for the debugger, so start with message header
1414 * in MRF 1.
1415 */
1416 int base_mrf = 1;
1417 int mrf = base_mrf;
1418 /* In the process of generating our URB write message contents, we
1419 * may need to unspill a register or load from an array. Those
1420 * reads would use MRFs 14-15.
1421 */
1422 int max_usable_mrf = FIRST_SPILL_MRF(devinfo->gen);
1423
1424 /* The following assertion verifies that max_usable_mrf causes an
1425 * even-numbered amount of URB write data, which will meet gen6's
1426 * requirements for length alignment.
1427 */
1428 assert ((max_usable_mrf - base_mrf) % 2 == 0);
1429
1430 /* First mrf is the g0-based message header containing URB handles and
1431 * such.
1432 */
1433 emit_urb_write_header(mrf++);
1434
1435 if (devinfo->gen < 6) {
1436 emit_ndc_computation();
1437 }
1438
1439 /* We may need to split this up into several URB writes, so do them in a
1440 * loop.
1441 */
1442 int slot = 0;
1443 bool complete = false;
1444 do {
1445 /* URB offset is in URB row increments, and each of our MRFs is half of
1446 * one of those, since we're doing interleaved writes.
1447 */
1448 int offset = slot / 2;
1449
1450 mrf = base_mrf + 1;
1451 for (; slot < prog_data->vue_map.num_slots; ++slot) {
1452 emit_urb_slot(dst_reg(MRF, mrf++),
1453 prog_data->vue_map.slot_to_varying[slot]);
1454
1455 /* If this was max_usable_mrf, we can't fit anything more into this
1456 * URB WRITE. Same thing if we reached the maximum length available.
1457 */
1458 if (mrf > max_usable_mrf ||
1459 align_interleaved_urb_mlen(devinfo, mrf - base_mrf + 1) > BRW_MAX_MSG_LENGTH) {
1460 slot++;
1461 break;
1462 }
1463 }
1464
1465 complete = slot >= prog_data->vue_map.num_slots;
1466 current_annotation = "URB write";
1467 vec4_instruction *inst = emit_urb_write_opcode(complete);
1468 inst->base_mrf = base_mrf;
1469 inst->mlen = align_interleaved_urb_mlen(devinfo, mrf - base_mrf);
1470 inst->offset += offset;
1471 } while(!complete);
1472 }
1473
1474
1475 src_reg
1476 vec4_visitor::get_scratch_offset(bblock_t *block, vec4_instruction *inst,
1477 src_reg *reladdr, int reg_offset)
1478 {
1479 /* Because we store the values to scratch interleaved like our
1480 * vertex data, we need to scale the vec4 index by 2.
1481 */
1482 int message_header_scale = 2;
1483
1484 /* Pre-gen6, the message header uses byte offsets instead of vec4
1485 * (16-byte) offset units.
1486 */
1487 if (devinfo->gen < 6)
1488 message_header_scale *= 16;
1489
1490 if (reladdr) {
1491 src_reg index = src_reg(this, glsl_type::int_type);
1492
1493 emit_before(block, inst, ADD(dst_reg(index), *reladdr,
1494 src_reg(reg_offset)));
1495 emit_before(block, inst, MUL(dst_reg(index), index,
1496 src_reg(message_header_scale)));
1497
1498 return index;
1499 } else {
1500 return src_reg(reg_offset * message_header_scale);
1501 }
1502 }
1503
1504 src_reg
1505 vec4_visitor::get_pull_constant_offset(bblock_t * block, vec4_instruction *inst,
1506 src_reg *reladdr, int reg_offset)
1507 {
1508 if (reladdr) {
1509 src_reg index = src_reg(this, glsl_type::int_type);
1510
1511 emit_before(block, inst, ADD(dst_reg(index), *reladdr,
1512 src_reg(reg_offset)));
1513
1514 /* Pre-gen6, the message header uses byte offsets instead of vec4
1515 * (16-byte) offset units.
1516 */
1517 if (devinfo->gen < 6) {
1518 emit_before(block, inst, MUL(dst_reg(index), index, src_reg(16)));
1519 }
1520
1521 return index;
1522 } else if (devinfo->gen >= 8) {
1523 /* Store the offset in a GRF so we can send-from-GRF. */
1524 src_reg offset = src_reg(this, glsl_type::int_type);
1525 emit_before(block, inst, MOV(dst_reg(offset), src_reg(reg_offset)));
1526 return offset;
1527 } else {
1528 int message_header_scale = devinfo->gen < 6 ? 16 : 1;
1529 return src_reg(reg_offset * message_header_scale);
1530 }
1531 }
1532
1533 /**
1534 * Emits an instruction before @inst to load the value named by @orig_src
1535 * from scratch space at @base_offset to @temp.
1536 *
1537 * @base_offset is measured in 32-byte units (the size of a register).
1538 */
1539 void
1540 vec4_visitor::emit_scratch_read(bblock_t *block, vec4_instruction *inst,
1541 dst_reg temp, src_reg orig_src,
1542 int base_offset)
1543 {
1544 int reg_offset = base_offset + orig_src.reg_offset;
1545 src_reg index = get_scratch_offset(block, inst, orig_src.reladdr,
1546 reg_offset);
1547
1548 emit_before(block, inst, SCRATCH_READ(temp, index));
1549 }
1550
1551 /**
1552 * Emits an instruction after @inst to store the value to be written
1553 * to @orig_dst to scratch space at @base_offset, from @temp.
1554 *
1555 * @base_offset is measured in 32-byte units (the size of a register).
1556 */
1557 void
1558 vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst,
1559 int base_offset)
1560 {
1561 int reg_offset = base_offset + inst->dst.reg_offset;
1562 src_reg index = get_scratch_offset(block, inst, inst->dst.reladdr,
1563 reg_offset);
1564
1565 /* Create a temporary register to store *inst's result in.
1566 *
1567 * We have to be careful in MOVing from our temporary result register in
1568 * the scratch write. If we swizzle from channels of the temporary that
1569 * weren't initialized, it will confuse live interval analysis, which will
1570 * make spilling fail to make progress.
1571 */
1572 const src_reg temp = swizzle(retype(src_reg(this, glsl_type::vec4_type),
1573 inst->dst.type),
1574 brw_swizzle_for_mask(inst->dst.writemask));
1575 dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
1576 inst->dst.writemask));
1577 vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
1578 if (inst->opcode != BRW_OPCODE_SEL)
1579 write->predicate = inst->predicate;
1580 write->ir = inst->ir;
1581 write->annotation = inst->annotation;
1582 inst->insert_after(block, write);
1583
1584 inst->dst.file = temp.file;
1585 inst->dst.reg = temp.reg;
1586 inst->dst.reg_offset = temp.reg_offset;
1587 inst->dst.reladdr = NULL;
1588 }
1589
1590 /**
1591 * Checks if \p src and/or \p src.reladdr require a scratch read, and if so,
1592 * adds the scratch read(s) before \p inst. The function also checks for
1593 * recursive reladdr scratch accesses, issuing the corresponding scratch
1594 * loads and rewriting reladdr references accordingly.
1595 *
1596 * \return \p src if it did not require a scratch load, otherwise, the
1597 * register holding the result of the scratch load that the caller should
1598 * use to rewrite src.
1599 */
1600 src_reg
1601 vec4_visitor::emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
1602 vec4_instruction *inst, src_reg src)
1603 {
1604 /* Resolve recursive reladdr scratch access by calling ourselves
1605 * with src.reladdr
1606 */
1607 if (src.reladdr)
1608 *src.reladdr = emit_resolve_reladdr(scratch_loc, block, inst,
1609 *src.reladdr);
1610
1611 /* Now handle scratch access on src */
1612 if (src.file == GRF && scratch_loc[src.reg] != -1) {
1613 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
1614 emit_scratch_read(block, inst, temp, src, scratch_loc[src.reg]);
1615 src.reg = temp.reg;
1616 src.reg_offset = temp.reg_offset;
1617 src.reladdr = NULL;
1618 }
1619
1620 return src;
1621 }
1622
1623 /**
1624 * We can't generally support array access in GRF space, because a
1625 * single instruction's destination can only span 2 contiguous
1626 * registers. So, we send all GRF arrays that get variable index
1627 * access to scratch space.
1628 */
1629 void
1630 vec4_visitor::move_grf_array_access_to_scratch()
1631 {
1632 int scratch_loc[this->alloc.count];
1633 memset(scratch_loc, -1, sizeof(scratch_loc));
1634
1635 /* First, calculate the set of virtual GRFs that need to be punted
1636 * to scratch due to having any array access on them, and where in
1637 * scratch.
1638 */
1639 foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
1640 if (inst->dst.file == GRF && inst->dst.reladdr) {
1641 if (scratch_loc[inst->dst.reg] == -1) {
1642 scratch_loc[inst->dst.reg] = last_scratch;
1643 last_scratch += this->alloc.sizes[inst->dst.reg];
1644 }
1645
1646 for (src_reg *iter = inst->dst.reladdr;
1647 iter->reladdr;
1648 iter = iter->reladdr) {
1649 if (iter->file == GRF && scratch_loc[iter->reg] == -1) {
1650 scratch_loc[iter->reg] = last_scratch;
1651 last_scratch += this->alloc.sizes[iter->reg];
1652 }
1653 }
1654 }
1655
1656 for (int i = 0 ; i < 3; i++) {
1657 for (src_reg *iter = &inst->src[i];
1658 iter->reladdr;
1659 iter = iter->reladdr) {
1660 if (iter->file == GRF && scratch_loc[iter->reg] == -1) {
1661 scratch_loc[iter->reg] = last_scratch;
1662 last_scratch += this->alloc.sizes[iter->reg];
1663 }
1664 }
1665 }
1666 }
1667
1668 /* Now, for anything that will be accessed through scratch, rewrite
1669 * it to load/store. Note that this is a _safe list walk, because
1670 * we may generate a new scratch_write instruction after the one
1671 * we're processing.
1672 */
1673 foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
1674 /* Set up the annotation tracking for new generated instructions. */
1675 base_ir = inst->ir;
1676 current_annotation = inst->annotation;
1677
1678 /* First handle scratch access on the dst. Notice we have to handle
1679 * the case where the dst's reladdr also points to scratch space.
1680 */
1681 if (inst->dst.reladdr)
1682 *inst->dst.reladdr = emit_resolve_reladdr(scratch_loc, block, inst,
1683 *inst->dst.reladdr);
1684
1685 /* Now that we have handled any (possibly recursive) reladdr scratch
1686 * accesses for dst we can safely do the scratch write for dst itself
1687 */
1688 if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1)
1689 emit_scratch_write(block, inst, scratch_loc[inst->dst.reg]);
1690
1691 /* Now handle scratch access on any src. In this case, since inst->src[i]
1692 * already is a src_reg, we can just call emit_resolve_reladdr with
1693 * inst->src[i] and it will take care of handling scratch loads for
1694 * both src and src.reladdr (recursively).
1695 */
1696 for (int i = 0 ; i < 3; i++) {
1697 inst->src[i] = emit_resolve_reladdr(scratch_loc, block, inst,
1698 inst->src[i]);
1699 }
1700 }
1701 }
1702
1703 /**
1704 * Emits an instruction before @inst to load the value named by @orig_src
1705 * from the pull constant buffer (surface) at @base_offset to @temp.
1706 */
1707 void
1708 vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
1709 dst_reg temp, src_reg orig_src,
1710 int base_offset)
1711 {
1712 int reg_offset = base_offset + orig_src.reg_offset;
1713 src_reg index = src_reg(prog_data->base.binding_table.pull_constants_start);
1714 src_reg offset = get_pull_constant_offset(block, inst, orig_src.reladdr,
1715 reg_offset);
1716
1717 emit_pull_constant_load_reg(temp,
1718 index,
1719 offset,
1720 block, inst);
1721 }
1722
1723 /**
1724 * Implements array access of uniforms by inserting a
1725 * PULL_CONSTANT_LOAD instruction.
1726 *
1727 * Unlike temporary GRF array access (where we don't support it due to
1728 * the difficulty of doing relative addressing on instruction
1729 * destinations), we could potentially do array access of uniforms
1730 * that were loaded in GRF space as push constants. In real-world
1731 * usage we've seen, though, the arrays being used are always larger
1732 * than we could load as push constants, so just always move all
1733 * uniform array access out to a pull constant buffer.
1734 */
1735 void
1736 vec4_visitor::move_uniform_array_access_to_pull_constants()
1737 {
1738 int pull_constant_loc[this->uniforms];
1739 memset(pull_constant_loc, -1, sizeof(pull_constant_loc));
1740 bool nested_reladdr;
1741
1742 /* Walk through and find array access of uniforms. Put a copy of that
1743 * uniform in the pull constant buffer.
1744 *
1745 * Note that we don't move constant-indexed accesses to arrays. No
1746 * testing has been done of the performance impact of this choice.
1747 */
1748 do {
1749 nested_reladdr = false;
1750
1751 foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
1752 for (int i = 0 ; i < 3; i++) {
1753 if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
1754 continue;
1755
1756 int uniform = inst->src[i].reg;
1757
1758 if (inst->src[i].reladdr->reladdr)
1759 nested_reladdr = true; /* will need another pass */
1760
1761 /* If this array isn't already present in the pull constant buffer,
1762 * add it.
1763 */
1764 if (pull_constant_loc[uniform] == -1) {
1765 const gl_constant_value **values =
1766 &stage_prog_data->param[uniform * 4];
1767
1768 pull_constant_loc[uniform] = stage_prog_data->nr_pull_params / 4;
1769
1770 assert(uniform < uniform_array_size);
1771 for (int j = 0; j < uniform_size[uniform] * 4; j++) {
1772 stage_prog_data->pull_param[stage_prog_data->nr_pull_params++]
1773 = values[j];
1774 }
1775 }
1776
1777 /* Set up the annotation tracking for new generated instructions. */
1778 base_ir = inst->ir;
1779 current_annotation = inst->annotation;
1780
1781 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
1782
1783 emit_pull_constant_load(block, inst, temp, inst->src[i],
1784 pull_constant_loc[uniform]);
1785
1786 inst->src[i].file = temp.file;
1787 inst->src[i].reg = temp.reg;
1788 inst->src[i].reg_offset = temp.reg_offset;
1789 inst->src[i].reladdr = NULL;
1790 }
1791 }
1792 } while (nested_reladdr);
1793
1794 /* Now there are no accesses of the UNIFORM file with a reladdr, so
1795 * no need to track them as larger-than-vec4 objects. This will be
1796 * relied on in cutting out unused uniform vectors from push
1797 * constants.
1798 */
1799 split_uniform_registers();
1800 }
1801
1802 void
1803 vec4_visitor::resolve_ud_negate(src_reg *reg)
1804 {
1805 if (reg->type != BRW_REGISTER_TYPE_UD ||
1806 !reg->negate)
1807 return;
1808
1809 src_reg temp = src_reg(this, glsl_type::uvec4_type);
1810 emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
1811 *reg = temp;
1812 }
1813
1814 vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
1815 void *log_data,
1816 const struct brw_sampler_prog_key_data *key_tex,
1817 struct brw_vue_prog_data *prog_data,
1818 const nir_shader *shader,
1819 void *mem_ctx,
1820 bool no_spills,
1821 int shader_time_index)
1822 : backend_shader(compiler, log_data, mem_ctx, shader, &prog_data->base),
1823 key_tex(key_tex),
1824 prog_data(prog_data),
1825 fail_msg(NULL),
1826 first_non_payload_grf(0),
1827 need_all_constants_in_pull_buffer(false),
1828 no_spills(no_spills),
1829 shader_time_index(shader_time_index),
1830 last_scratch(0)
1831 {
1832 this->failed = false;
1833
1834 this->base_ir = NULL;
1835 this->current_annotation = NULL;
1836 memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
1837
1838 this->virtual_grf_start = NULL;
1839 this->virtual_grf_end = NULL;
1840 this->live_intervals = NULL;
1841
1842 this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
1843
1844 this->uniforms = 0;
1845
1846 /* Initialize uniform_array_size to at least 1 because pre-gen6 VS requires
1847 * at least one. See setup_uniforms() in brw_vec4.cpp.
1848 */
1849 this->uniform_array_size = 1;
1850 if (prog_data) {
1851 this->uniform_array_size =
1852 MAX2(DIV_ROUND_UP(stage_prog_data->nr_params, 4), 1);
1853 }
1854
1855 this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
1856 }
1857
1858 vec4_visitor::~vec4_visitor()
1859 {
1860 }
1861
1862
1863 void
1864 vec4_visitor::fail(const char *format, ...)
1865 {
1866 va_list va;
1867 char *msg;
1868
1869 if (failed)
1870 return;
1871
1872 failed = true;
1873
1874 va_start(va, format);
1875 msg = ralloc_vasprintf(mem_ctx, format, va);
1876 va_end(va);
1877 msg = ralloc_asprintf(mem_ctx, "%s compile failed: %s\n", stage_abbrev, msg);
1878
1879 this->fail_msg = msg;
1880
1881 if (debug_enabled) {
1882 fprintf(stderr, "%s", msg);
1883 }
1884 }
1885
1886 } /* namespace brw */