Merge remote-tracking branch 'mesa-public/master' into vulkan
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_visitor.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25 #include "brw_cfg.h"
26 #include "glsl/ir_uniform.h"
27 #include "program/sampler.h"
28
29 namespace brw {
30
31 vec4_instruction::vec4_instruction(enum opcode opcode, const dst_reg &dst,
32 const src_reg &src0, const src_reg &src1,
33 const src_reg &src2)
34 {
35 this->opcode = opcode;
36 this->dst = dst;
37 this->src[0] = src0;
38 this->src[1] = src1;
39 this->src[2] = src2;
40 this->saturate = false;
41 this->force_writemask_all = false;
42 this->no_dd_clear = false;
43 this->no_dd_check = false;
44 this->writes_accumulator = false;
45 this->conditional_mod = BRW_CONDITIONAL_NONE;
46 this->predicate = BRW_PREDICATE_NONE;
47 this->predicate_inverse = false;
48 this->target = 0;
49 this->regs_written = (dst.file == BAD_FILE ? 0 : 1);
50 this->shadow_compare = false;
51 this->ir = NULL;
52 this->urb_write_flags = BRW_URB_WRITE_NO_FLAGS;
53 this->header_size = 0;
54 this->flag_subreg = 0;
55 this->mlen = 0;
56 this->base_mrf = 0;
57 this->offset = 0;
58 this->annotation = NULL;
59 }
60
61 vec4_instruction *
62 vec4_visitor::emit(vec4_instruction *inst)
63 {
64 inst->ir = this->base_ir;
65 inst->annotation = this->current_annotation;
66
67 this->instructions.push_tail(inst);
68
69 return inst;
70 }
71
72 vec4_instruction *
73 vec4_visitor::emit_before(bblock_t *block, vec4_instruction *inst,
74 vec4_instruction *new_inst)
75 {
76 new_inst->ir = inst->ir;
77 new_inst->annotation = inst->annotation;
78
79 inst->insert_before(block, new_inst);
80
81 return inst;
82 }
83
84 vec4_instruction *
85 vec4_visitor::emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
86 const src_reg &src1, const src_reg &src2)
87 {
88 return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0, src1, src2));
89 }
90
91
92 vec4_instruction *
93 vec4_visitor::emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
94 const src_reg &src1)
95 {
96 return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0, src1));
97 }
98
99 vec4_instruction *
100 vec4_visitor::emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0)
101 {
102 return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0));
103 }
104
105 vec4_instruction *
106 vec4_visitor::emit(enum opcode opcode, const dst_reg &dst)
107 {
108 return emit(new(mem_ctx) vec4_instruction(opcode, dst));
109 }
110
111 vec4_instruction *
112 vec4_visitor::emit(enum opcode opcode)
113 {
114 return emit(new(mem_ctx) vec4_instruction(opcode, dst_reg()));
115 }
116
117 #define ALU1(op) \
118 vec4_instruction * \
119 vec4_visitor::op(const dst_reg &dst, const src_reg &src0) \
120 { \
121 return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, src0); \
122 }
123
124 #define ALU2(op) \
125 vec4_instruction * \
126 vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
127 const src_reg &src1) \
128 { \
129 return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, \
130 src0, src1); \
131 }
132
133 #define ALU2_ACC(op) \
134 vec4_instruction * \
135 vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
136 const src_reg &src1) \
137 { \
138 vec4_instruction *inst = new(mem_ctx) vec4_instruction( \
139 BRW_OPCODE_##op, dst, src0, src1); \
140 inst->writes_accumulator = true; \
141 return inst; \
142 }
143
144 #define ALU3(op) \
145 vec4_instruction * \
146 vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
147 const src_reg &src1, const src_reg &src2) \
148 { \
149 assert(devinfo->gen >= 6); \
150 return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, \
151 src0, src1, src2); \
152 }
153
154 ALU1(NOT)
155 ALU1(MOV)
156 ALU1(FRC)
157 ALU1(RNDD)
158 ALU1(RNDE)
159 ALU1(RNDZ)
160 ALU1(F32TO16)
161 ALU1(F16TO32)
162 ALU2(ADD)
163 ALU2(MUL)
164 ALU2_ACC(MACH)
165 ALU2(AND)
166 ALU2(OR)
167 ALU2(XOR)
168 ALU2(DP3)
169 ALU2(DP4)
170 ALU2(DPH)
171 ALU2(SHL)
172 ALU2(SHR)
173 ALU2(ASR)
174 ALU3(LRP)
175 ALU1(BFREV)
176 ALU3(BFE)
177 ALU2(BFI1)
178 ALU3(BFI2)
179 ALU1(FBH)
180 ALU1(FBL)
181 ALU1(CBIT)
182 ALU3(MAD)
183 ALU2_ACC(ADDC)
184 ALU2_ACC(SUBB)
185 ALU2(MAC)
186
187 /** Gen4 predicated IF. */
188 vec4_instruction *
189 vec4_visitor::IF(enum brw_predicate predicate)
190 {
191 vec4_instruction *inst;
192
193 inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_IF);
194 inst->predicate = predicate;
195
196 return inst;
197 }
198
199 /** Gen6 IF with embedded comparison. */
200 vec4_instruction *
201 vec4_visitor::IF(src_reg src0, src_reg src1,
202 enum brw_conditional_mod condition)
203 {
204 assert(devinfo->gen == 6);
205
206 vec4_instruction *inst;
207
208 resolve_ud_negate(&src0);
209 resolve_ud_negate(&src1);
210
211 inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_IF, dst_null_d(),
212 src0, src1);
213 inst->conditional_mod = condition;
214
215 return inst;
216 }
217
218 /**
219 * CMP: Sets the low bit of the destination channels with the result
220 * of the comparison, while the upper bits are undefined, and updates
221 * the flag register with the packed 16 bits of the result.
222 */
223 vec4_instruction *
224 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1,
225 enum brw_conditional_mod condition)
226 {
227 vec4_instruction *inst;
228
229 /* Take the instruction:
230 *
231 * CMP null<d> src0<f> src1<f>
232 *
233 * Original gen4 does type conversion to the destination type before
234 * comparison, producing garbage results for floating point comparisons.
235 *
236 * The destination type doesn't matter on newer generations, so we set the
237 * type to match src0 so we can compact the instruction.
238 */
239 dst.type = src0.type;
240 if (dst.file == HW_REG)
241 dst.fixed_hw_reg.type = dst.type;
242
243 resolve_ud_negate(&src0);
244 resolve_ud_negate(&src1);
245
246 inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_CMP, dst, src0, src1);
247 inst->conditional_mod = condition;
248
249 return inst;
250 }
251
252 vec4_instruction *
253 vec4_visitor::SCRATCH_READ(const dst_reg &dst, const src_reg &index)
254 {
255 vec4_instruction *inst;
256
257 inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_READ,
258 dst, index);
259 inst->base_mrf = 14;
260 inst->mlen = 2;
261
262 return inst;
263 }
264
265 vec4_instruction *
266 vec4_visitor::SCRATCH_WRITE(const dst_reg &dst, const src_reg &src,
267 const src_reg &index)
268 {
269 vec4_instruction *inst;
270
271 inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_WRITE,
272 dst, src, index);
273 inst->base_mrf = 13;
274 inst->mlen = 3;
275
276 return inst;
277 }
278
279 void
280 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
281 {
282 static enum opcode dot_opcodes[] = {
283 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
284 };
285
286 emit(dot_opcodes[elements - 2], dst, src0, src1);
287 }
288
289 src_reg
290 vec4_visitor::fix_3src_operand(const src_reg &src)
291 {
292 /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
293 * able to use vertical stride of zero to replicate the vec4 uniform, like
294 *
295 * g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
296 *
297 * But you can't, since vertical stride is always four in three-source
298 * instructions. Instead, insert a MOV instruction to do the replication so
299 * that the three-source instruction can consume it.
300 */
301
302 /* The MOV is only needed if the source is a uniform or immediate. */
303 if (src.file != UNIFORM && src.file != IMM)
304 return src;
305
306 if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle))
307 return src;
308
309 dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
310 expanded.type = src.type;
311 emit(VEC4_OPCODE_UNPACK_UNIFORM, expanded, src);
312 return src_reg(expanded);
313 }
314
315 src_reg
316 vec4_visitor::resolve_source_modifiers(const src_reg &src)
317 {
318 if (!src.abs && !src.negate)
319 return src;
320
321 dst_reg resolved = dst_reg(this, glsl_type::ivec4_type);
322 resolved.type = src.type;
323 emit(MOV(resolved, src));
324
325 return src_reg(resolved);
326 }
327
328 src_reg
329 vec4_visitor::fix_math_operand(const src_reg &src)
330 {
331 if (devinfo->gen < 6 || devinfo->gen >= 8 || src.file == BAD_FILE)
332 return src;
333
334 /* The gen6 math instruction ignores the source modifiers --
335 * swizzle, abs, negate, and at least some parts of the register
336 * region description.
337 *
338 * Rather than trying to enumerate all these cases, *always* expand the
339 * operand to a temp GRF for gen6.
340 *
341 * For gen7, keep the operand as-is, except if immediate, which gen7 still
342 * can't use.
343 */
344
345 if (devinfo->gen == 7 && src.file != IMM)
346 return src;
347
348 dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
349 expanded.type = src.type;
350 emit(MOV(expanded, src));
351 return src_reg(expanded);
352 }
353
354 vec4_instruction *
355 vec4_visitor::emit_math(enum opcode opcode,
356 const dst_reg &dst,
357 const src_reg &src0, const src_reg &src1)
358 {
359 vec4_instruction *math =
360 emit(opcode, dst, fix_math_operand(src0), fix_math_operand(src1));
361
362 if (devinfo->gen == 6 && dst.writemask != WRITEMASK_XYZW) {
363 /* MATH on Gen6 must be align1, so we can't do writemasks. */
364 math->dst = dst_reg(this, glsl_type::vec4_type);
365 math->dst.type = dst.type;
366 math = emit(MOV(dst, src_reg(math->dst)));
367 } else if (devinfo->gen < 6) {
368 math->base_mrf = 1;
369 math->mlen = src1.file == BAD_FILE ? 1 : 2;
370 }
371
372 return math;
373 }
374
375 void
376 vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0)
377 {
378 if (devinfo->gen < 7) {
379 unreachable("ir_unop_pack_half_2x16 should be lowered");
380 }
381
382 assert(dst.type == BRW_REGISTER_TYPE_UD);
383 assert(src0.type == BRW_REGISTER_TYPE_F);
384
385 /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
386 *
387 * Because this instruction does not have a 16-bit floating-point type,
388 * the destination data type must be Word (W).
389 *
390 * The destination must be DWord-aligned and specify a horizontal stride
391 * (HorzStride) of 2. The 16-bit result is stored in the lower word of
392 * each destination channel and the upper word is not modified.
393 *
394 * The above restriction implies that the f32to16 instruction must use
395 * align1 mode, because only in align1 mode is it possible to specify
396 * horizontal stride. We choose here to defy the hardware docs and emit
397 * align16 instructions.
398 *
399 * (I [chadv] did attempt to emit align1 instructions for VS f32to16
400 * instructions. I was partially successful in that the code passed all
401 * tests. However, the code was dubiously correct and fragile, and the
402 * tests were not harsh enough to probe that frailty. Not trusting the
403 * code, I chose instead to remain in align16 mode in defiance of the hw
404 * docs).
405 *
406 * I've [chadv] experimentally confirmed that, on gen7 hardware and the
407 * simulator, emitting a f32to16 in align16 mode with UD as destination
408 * data type is safe. The behavior differs from that specified in the PRM
409 * in that the upper word of each destination channel is cleared to 0.
410 */
411
412 dst_reg tmp_dst(this, glsl_type::uvec2_type);
413 src_reg tmp_src(tmp_dst);
414
415 #if 0
416 /* Verify the undocumented behavior on which the following instructions
417 * rely. If f32to16 fails to clear the upper word of the X and Y channels,
418 * then the result of the bit-or instruction below will be incorrect.
419 *
420 * You should inspect the disasm output in order to verify that the MOV is
421 * not optimized away.
422 */
423 emit(MOV(tmp_dst, src_reg(0x12345678u)));
424 #endif
425
426 /* Give tmp the form below, where "." means untouched.
427 *
428 * w z y x w z y x
429 * |.|.|0x0000hhhh|0x0000llll|.|.|0x0000hhhh|0x0000llll|
430 *
431 * That the upper word of each write-channel be 0 is required for the
432 * following bit-shift and bit-or instructions to work. Note that this
433 * relies on the undocumented hardware behavior mentioned above.
434 */
435 tmp_dst.writemask = WRITEMASK_XY;
436 emit(F32TO16(tmp_dst, src0));
437
438 /* Give the write-channels of dst the form:
439 * 0xhhhh0000
440 */
441 tmp_src.swizzle = BRW_SWIZZLE_YYYY;
442 emit(SHL(dst, tmp_src, src_reg(16u)));
443
444 /* Finally, give the write-channels of dst the form of packHalf2x16's
445 * output:
446 * 0xhhhhllll
447 */
448 tmp_src.swizzle = BRW_SWIZZLE_XXXX;
449 emit(OR(dst, src_reg(dst), tmp_src));
450 }
451
452 void
453 vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0)
454 {
455 if (devinfo->gen < 7) {
456 unreachable("ir_unop_unpack_half_2x16 should be lowered");
457 }
458
459 assert(dst.type == BRW_REGISTER_TYPE_F);
460 assert(src0.type == BRW_REGISTER_TYPE_UD);
461
462 /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
463 *
464 * Because this instruction does not have a 16-bit floating-point type,
465 * the source data type must be Word (W). The destination type must be
466 * F (Float).
467 *
468 * To use W as the source data type, we must adjust horizontal strides,
469 * which is only possible in align1 mode. All my [chadv] attempts at
470 * emitting align1 instructions for unpackHalf2x16 failed to pass the
471 * Piglit tests, so I gave up.
472 *
473 * I've verified that, on gen7 hardware and the simulator, it is safe to
474 * emit f16to32 in align16 mode with UD as source data type.
475 */
476
477 dst_reg tmp_dst(this, glsl_type::uvec2_type);
478 src_reg tmp_src(tmp_dst);
479
480 tmp_dst.writemask = WRITEMASK_X;
481 emit(AND(tmp_dst, src0, src_reg(0xffffu)));
482
483 tmp_dst.writemask = WRITEMASK_Y;
484 emit(SHR(tmp_dst, src0, src_reg(16u)));
485
486 dst.writemask = WRITEMASK_XY;
487 emit(F16TO32(dst, tmp_src));
488 }
489
490 void
491 vec4_visitor::emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0)
492 {
493 /* Instead of splitting the 32-bit integer, shifting, and ORing it back
494 * together, we can shift it by <0, 8, 16, 24>. The packed integer immediate
495 * is not suitable to generate the shift values, but we can use the packed
496 * vector float and a type-converting MOV.
497 */
498 dst_reg shift(this, glsl_type::uvec4_type);
499 emit(MOV(shift, src_reg(0x00, 0x60, 0x70, 0x78)));
500
501 dst_reg shifted(this, glsl_type::uvec4_type);
502 src0.swizzle = BRW_SWIZZLE_XXXX;
503 emit(SHR(shifted, src0, src_reg(shift)));
504
505 shifted.type = BRW_REGISTER_TYPE_UB;
506 dst_reg f(this, glsl_type::vec4_type);
507 emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted));
508
509 emit(MUL(dst, src_reg(f), src_reg(1.0f / 255.0f)));
510 }
511
512 void
513 vec4_visitor::emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0)
514 {
515 /* Instead of splitting the 32-bit integer, shifting, and ORing it back
516 * together, we can shift it by <0, 8, 16, 24>. The packed integer immediate
517 * is not suitable to generate the shift values, but we can use the packed
518 * vector float and a type-converting MOV.
519 */
520 dst_reg shift(this, glsl_type::uvec4_type);
521 emit(MOV(shift, src_reg(0x00, 0x60, 0x70, 0x78)));
522
523 dst_reg shifted(this, glsl_type::uvec4_type);
524 src0.swizzle = BRW_SWIZZLE_XXXX;
525 emit(SHR(shifted, src0, src_reg(shift)));
526
527 shifted.type = BRW_REGISTER_TYPE_B;
528 dst_reg f(this, glsl_type::vec4_type);
529 emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted));
530
531 dst_reg scaled(this, glsl_type::vec4_type);
532 emit(MUL(scaled, src_reg(f), src_reg(1.0f / 127.0f)));
533
534 dst_reg max(this, glsl_type::vec4_type);
535 emit_minmax(BRW_CONDITIONAL_GE, max, src_reg(scaled), src_reg(-1.0f));
536 emit_minmax(BRW_CONDITIONAL_L, dst, src_reg(max), src_reg(1.0f));
537 }
538
539 void
540 vec4_visitor::emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0)
541 {
542 dst_reg saturated(this, glsl_type::vec4_type);
543 vec4_instruction *inst = emit(MOV(saturated, src0));
544 inst->saturate = true;
545
546 dst_reg scaled(this, glsl_type::vec4_type);
547 emit(MUL(scaled, src_reg(saturated), src_reg(255.0f)));
548
549 dst_reg rounded(this, glsl_type::vec4_type);
550 emit(RNDE(rounded, src_reg(scaled)));
551
552 dst_reg u(this, glsl_type::uvec4_type);
553 emit(MOV(u, src_reg(rounded)));
554
555 src_reg bytes(u);
556 emit(VEC4_OPCODE_PACK_BYTES, dst, bytes);
557 }
558
559 void
560 vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0)
561 {
562 dst_reg max(this, glsl_type::vec4_type);
563 emit_minmax(BRW_CONDITIONAL_GE, max, src0, src_reg(-1.0f));
564
565 dst_reg min(this, glsl_type::vec4_type);
566 emit_minmax(BRW_CONDITIONAL_L, min, src_reg(max), src_reg(1.0f));
567
568 dst_reg scaled(this, glsl_type::vec4_type);
569 emit(MUL(scaled, src_reg(min), src_reg(127.0f)));
570
571 dst_reg rounded(this, glsl_type::vec4_type);
572 emit(RNDE(rounded, src_reg(scaled)));
573
574 dst_reg i(this, glsl_type::ivec4_type);
575 emit(MOV(i, src_reg(rounded)));
576
577 src_reg bytes(i);
578 emit(VEC4_OPCODE_PACK_BYTES, dst, bytes);
579 }
580
581 void
582 vec4_visitor::visit_instructions(const exec_list *list)
583 {
584 foreach_in_list(ir_instruction, ir, list) {
585 base_ir = ir;
586 ir->accept(this);
587 }
588 }
589
590 /**
591 * Returns the minimum number of vec4 elements needed to pack a type.
592 *
593 * For simple types, it will return 1 (a single vec4); for matrices, the
594 * number of columns; for array and struct, the sum of the vec4_size of
595 * each of its elements; and for sampler and atomic, zero.
596 *
597 * This method is useful to calculate how much register space is needed to
598 * store a particular type.
599 */
600 extern "C" int
601 type_size_vec4(const struct glsl_type *type)
602 {
603 unsigned int i;
604 int size;
605
606 switch (type->base_type) {
607 case GLSL_TYPE_UINT:
608 case GLSL_TYPE_INT:
609 case GLSL_TYPE_FLOAT:
610 case GLSL_TYPE_BOOL:
611 if (type->is_matrix()) {
612 return type->matrix_columns;
613 } else {
614 /* Regardless of size of vector, it gets a vec4. This is bad
615 * packing for things like floats, but otherwise arrays become a
616 * mess. Hopefully a later pass over the code can pack scalars
617 * down if appropriate.
618 */
619 return 1;
620 }
621 case GLSL_TYPE_ARRAY:
622 assert(type->length > 0);
623 return type_size_vec4(type->fields.array) * type->length;
624 case GLSL_TYPE_STRUCT:
625 size = 0;
626 for (i = 0; i < type->length; i++) {
627 size += type_size_vec4(type->fields.structure[i].type);
628 }
629 return size;
630 case GLSL_TYPE_SUBROUTINE:
631 return 1;
632
633 case GLSL_TYPE_SAMPLER:
634 /* Samplers take up no register space, since they're baked in at
635 * link time.
636 */
637 return 0;
638 case GLSL_TYPE_ATOMIC_UINT:
639 return 0;
640 case GLSL_TYPE_IMAGE:
641 return DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4);
642 case GLSL_TYPE_VOID:
643 case GLSL_TYPE_DOUBLE:
644 case GLSL_TYPE_ERROR:
645 case GLSL_TYPE_INTERFACE:
646 case GLSL_TYPE_FUNCTION:
647 unreachable("not reached");
648 }
649
650 return 0;
651 }
652
653 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
654 {
655 init();
656
657 this->file = GRF;
658 this->reg = v->alloc.allocate(type_size_vec4(type));
659
660 if (type->is_array() || type->is_record()) {
661 this->swizzle = BRW_SWIZZLE_NOOP;
662 } else {
663 this->swizzle = brw_swizzle_for_size(type->vector_elements);
664 }
665
666 this->type = brw_type_for_base_type(type);
667 }
668
669 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size)
670 {
671 assert(size > 0);
672
673 init();
674
675 this->file = GRF;
676 this->reg = v->alloc.allocate(type_size_vec4(type) * size);
677
678 this->swizzle = BRW_SWIZZLE_NOOP;
679
680 this->type = brw_type_for_base_type(type);
681 }
682
683 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
684 {
685 init();
686
687 this->file = GRF;
688 this->reg = v->alloc.allocate(type_size_vec4(type));
689
690 if (type->is_array() || type->is_record()) {
691 this->writemask = WRITEMASK_XYZW;
692 } else {
693 this->writemask = (1 << type->vector_elements) - 1;
694 }
695
696 this->type = brw_type_for_base_type(type);
697 }
698
699 void
700 vec4_visitor::setup_vec4_uniform_value(unsigned param_offset,
701 const gl_constant_value *values,
702 unsigned n)
703 {
704 static const gl_constant_value zero = { 0 };
705
706 assert(param_offset % 4 == 0);
707
708 for (unsigned i = 0; i < n; ++i)
709 stage_prog_data->param[param_offset + i] = &values[i];
710
711 for (unsigned i = n; i < 4; ++i)
712 stage_prog_data->param[param_offset + i] = &zero;
713
714 uniform_vector_size[param_offset / 4] = n;
715 }
716
717 /* Our support for uniforms is piggy-backed on the struct
718 * gl_fragment_program, because that's where the values actually
719 * get stored, rather than in some global gl_shader_program uniform
720 * store.
721 */
722 void
723 vec4_visitor::setup_uniform_values(ir_variable *ir)
724 {
725 int namelen = strlen(ir->name);
726
727 /* The data for our (non-builtin) uniforms is stored in a series of
728 * gl_uniform_driver_storage structs for each subcomponent that
729 * glGetUniformLocation() could name. We know it's been set up in the same
730 * order we'd walk the type, so walk the list of storage and find anything
731 * with our name, or the prefix of a component that starts with our name.
732 */
733 for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) {
734 struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
735
736 if (storage->builtin)
737 continue;
738
739 if (strncmp(ir->name, storage->name, namelen) != 0 ||
740 (storage->name[namelen] != 0 &&
741 storage->name[namelen] != '.' &&
742 storage->name[namelen] != '[')) {
743 continue;
744 }
745
746 const unsigned vector_count = (MAX2(storage->array_elements, 1) *
747 storage->type->matrix_columns);
748 const unsigned vector_size = storage->type->vector_elements;
749
750 for (unsigned s = 0; s < vector_count; s++) {
751 setup_vec4_uniform_value(uniforms * 4,
752 &storage->storage[s * vector_size],
753 vector_size);
754 uniforms++;
755 }
756 }
757 }
758
759 void
760 vec4_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes)
761 {
762 for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
763 assert(this->uniforms < uniform_array_size);
764 this->uniform_vector_size[this->uniforms] = 4;
765 this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
766 this->userplane[i].type = BRW_REGISTER_TYPE_F;
767 for (int j = 0; j < 4; ++j) {
768 stage_prog_data->param[this->uniforms * 4 + j] =
769 (gl_constant_value *) &clip_planes[i][j];
770 }
771 ++this->uniforms;
772 }
773 }
774
775 /* Our support for builtin uniforms is even scarier than non-builtin.
776 * It sits on top of the PROG_STATE_VAR parameters that are
777 * automatically updated from GL context state.
778 */
779 void
780 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
781 {
782 const ir_state_slot *const slots = ir->get_state_slots();
783 assert(slots != NULL);
784
785 for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
786 /* This state reference has already been setup by ir_to_mesa,
787 * but we'll get the same index back here. We can reference
788 * ParameterValues directly, since unlike brw_fs.cpp, we never
789 * add new state references during compile.
790 */
791 int index = _mesa_add_state_reference(this->prog->Parameters,
792 (gl_state_index *)slots[i].tokens);
793 gl_constant_value *values =
794 &this->prog->Parameters->ParameterValues[index][0];
795
796 assert(this->uniforms < uniform_array_size);
797
798 for (unsigned j = 0; j < 4; j++)
799 stage_prog_data->param[this->uniforms * 4 + j] =
800 &values[GET_SWZ(slots[i].swizzle, j)];
801
802 this->uniform_vector_size[this->uniforms] =
803 (ir->type->is_scalar() || ir->type->is_vector() ||
804 ir->type->is_matrix() ? ir->type->vector_elements : 4);
805
806 this->uniforms++;
807 }
808 }
809
810 dst_reg *
811 vec4_visitor::variable_storage(ir_variable *var)
812 {
813 return (dst_reg *)hash_table_find(this->variable_ht, var);
814 }
815
816 void
817 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir,
818 enum brw_predicate *predicate)
819 {
820 ir_expression *expr = ir->as_expression();
821
822 *predicate = BRW_PREDICATE_NORMAL;
823
824 if (expr && expr->operation != ir_binop_ubo_load) {
825 src_reg op[3];
826 vec4_instruction *inst;
827
828 assert(expr->get_num_operands() <= 3);
829 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
830 expr->operands[i]->accept(this);
831 op[i] = this->result;
832
833 resolve_ud_negate(&op[i]);
834 }
835
836 switch (expr->operation) {
837 case ir_unop_logic_not:
838 inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
839 inst->conditional_mod = BRW_CONDITIONAL_Z;
840 break;
841
842 case ir_binop_logic_xor:
843 if (devinfo->gen <= 5) {
844 src_reg temp = src_reg(this, ir->type);
845 emit(XOR(dst_reg(temp), op[0], op[1]));
846 inst = emit(AND(dst_null_d(), temp, src_reg(1)));
847 } else {
848 inst = emit(XOR(dst_null_d(), op[0], op[1]));
849 }
850 inst->conditional_mod = BRW_CONDITIONAL_NZ;
851 break;
852
853 case ir_binop_logic_or:
854 if (devinfo->gen <= 5) {
855 src_reg temp = src_reg(this, ir->type);
856 emit(OR(dst_reg(temp), op[0], op[1]));
857 inst = emit(AND(dst_null_d(), temp, src_reg(1)));
858 } else {
859 inst = emit(OR(dst_null_d(), op[0], op[1]));
860 }
861 inst->conditional_mod = BRW_CONDITIONAL_NZ;
862 break;
863
864 case ir_binop_logic_and:
865 if (devinfo->gen <= 5) {
866 src_reg temp = src_reg(this, ir->type);
867 emit(AND(dst_reg(temp), op[0], op[1]));
868 inst = emit(AND(dst_null_d(), temp, src_reg(1)));
869 } else {
870 inst = emit(AND(dst_null_d(), op[0], op[1]));
871 }
872 inst->conditional_mod = BRW_CONDITIONAL_NZ;
873 break;
874
875 case ir_unop_f2b:
876 if (devinfo->gen >= 6) {
877 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
878 } else {
879 inst = emit(MOV(dst_null_f(), op[0]));
880 inst->conditional_mod = BRW_CONDITIONAL_NZ;
881 }
882 break;
883
884 case ir_unop_i2b:
885 if (devinfo->gen >= 6) {
886 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
887 } else {
888 inst = emit(MOV(dst_null_d(), op[0]));
889 inst->conditional_mod = BRW_CONDITIONAL_NZ;
890 }
891 break;
892
893 case ir_binop_all_equal:
894 if (devinfo->gen <= 5) {
895 resolve_bool_comparison(expr->operands[0], &op[0]);
896 resolve_bool_comparison(expr->operands[1], &op[1]);
897 }
898 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
899 *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
900 break;
901
902 case ir_binop_any_nequal:
903 if (devinfo->gen <= 5) {
904 resolve_bool_comparison(expr->operands[0], &op[0]);
905 resolve_bool_comparison(expr->operands[1], &op[1]);
906 }
907 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
908 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
909 break;
910
911 case ir_unop_any:
912 if (devinfo->gen <= 5) {
913 resolve_bool_comparison(expr->operands[0], &op[0]);
914 }
915 inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
916 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
917 break;
918
919 case ir_binop_greater:
920 case ir_binop_gequal:
921 case ir_binop_less:
922 case ir_binop_lequal:
923 case ir_binop_equal:
924 case ir_binop_nequal:
925 if (devinfo->gen <= 5) {
926 resolve_bool_comparison(expr->operands[0], &op[0]);
927 resolve_bool_comparison(expr->operands[1], &op[1]);
928 }
929 emit(CMP(dst_null_d(), op[0], op[1],
930 brw_conditional_for_comparison(expr->operation)));
931 break;
932
933 case ir_triop_csel: {
934 /* Expand the boolean condition into the flag register. */
935 inst = emit(MOV(dst_null_d(), op[0]));
936 inst->conditional_mod = BRW_CONDITIONAL_NZ;
937
938 /* Select which boolean to return. */
939 dst_reg temp(this, expr->operands[1]->type);
940 inst = emit(BRW_OPCODE_SEL, temp, op[1], op[2]);
941 inst->predicate = BRW_PREDICATE_NORMAL;
942
943 /* Expand the result to a condition code. */
944 inst = emit(MOV(dst_null_d(), src_reg(temp)));
945 inst->conditional_mod = BRW_CONDITIONAL_NZ;
946 break;
947 }
948
949 default:
950 unreachable("not reached");
951 }
952 return;
953 }
954
955 ir->accept(this);
956
957 resolve_ud_negate(&this->result);
958
959 vec4_instruction *inst = emit(AND(dst_null_d(), this->result, src_reg(1)));
960 inst->conditional_mod = BRW_CONDITIONAL_NZ;
961 }
962
963 /**
964 * Emit a gen6 IF statement with the comparison folded into the IF
965 * instruction.
966 */
967 void
968 vec4_visitor::emit_if_gen6(ir_if *ir)
969 {
970 ir_expression *expr = ir->condition->as_expression();
971
972 if (expr && expr->operation != ir_binop_ubo_load) {
973 src_reg op[3];
974 dst_reg temp;
975
976 assert(expr->get_num_operands() <= 3);
977 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
978 expr->operands[i]->accept(this);
979 op[i] = this->result;
980 }
981
982 switch (expr->operation) {
983 case ir_unop_logic_not:
984 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
985 return;
986
987 case ir_binop_logic_xor:
988 emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
989 return;
990
991 case ir_binop_logic_or:
992 temp = dst_reg(this, glsl_type::bool_type);
993 emit(OR(temp, op[0], op[1]));
994 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
995 return;
996
997 case ir_binop_logic_and:
998 temp = dst_reg(this, glsl_type::bool_type);
999 emit(AND(temp, op[0], op[1]));
1000 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
1001 return;
1002
1003 case ir_unop_f2b:
1004 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1005 return;
1006
1007 case ir_unop_i2b:
1008 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1009 return;
1010
1011 case ir_binop_greater:
1012 case ir_binop_gequal:
1013 case ir_binop_less:
1014 case ir_binop_lequal:
1015 case ir_binop_equal:
1016 case ir_binop_nequal:
1017 emit(IF(op[0], op[1],
1018 brw_conditional_for_comparison(expr->operation)));
1019 return;
1020
1021 case ir_binop_all_equal:
1022 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
1023 emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
1024 return;
1025
1026 case ir_binop_any_nequal:
1027 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
1028 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
1029 return;
1030
1031 case ir_unop_any:
1032 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1033 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
1034 return;
1035
1036 case ir_triop_csel: {
1037 /* Expand the boolean condition into the flag register. */
1038 vec4_instruction *inst = emit(MOV(dst_null_d(), op[0]));
1039 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1040
1041 /* Select which boolean to return. */
1042 dst_reg temp(this, expr->operands[1]->type);
1043 inst = emit(BRW_OPCODE_SEL, temp, op[1], op[2]);
1044 inst->predicate = BRW_PREDICATE_NORMAL;
1045
1046 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
1047 return;
1048 }
1049
1050 default:
1051 unreachable("not reached");
1052 }
1053 return;
1054 }
1055
1056 ir->condition->accept(this);
1057
1058 emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
1059 }
1060
1061 void
1062 vec4_visitor::visit(ir_variable *ir)
1063 {
1064 dst_reg *reg = NULL;
1065
1066 if (variable_storage(ir))
1067 return;
1068
1069 switch (ir->data.mode) {
1070 case ir_var_shader_in:
1071 assert(ir->data.location != -1);
1072 reg = new(mem_ctx) dst_reg(ATTR, ir->data.location);
1073 break;
1074
1075 case ir_var_shader_out:
1076 assert(ir->data.location != -1);
1077 reg = new(mem_ctx) dst_reg(this, ir->type);
1078
1079 for (int i = 0; i < type_size_vec4(ir->type); i++) {
1080 output_reg[ir->data.location + i] = *reg;
1081 output_reg[ir->data.location + i].reg_offset = i;
1082 output_reg_annotation[ir->data.location + i] = ir->name;
1083 }
1084 break;
1085
1086 case ir_var_auto:
1087 case ir_var_temporary:
1088 reg = new(mem_ctx) dst_reg(this, ir->type);
1089 break;
1090
1091 case ir_var_uniform:
1092 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
1093
1094 /* Thanks to the lower_ubo_reference pass, we will see only
1095 * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
1096 * variables, so no need for them to be in variable_ht.
1097 *
1098 * Some uniforms, such as samplers and atomic counters, have no actual
1099 * storage, so we should ignore them.
1100 */
1101 if (ir->is_in_buffer_block() || type_size_vec4(ir->type) == 0)
1102 return;
1103
1104 /* Track how big the whole uniform variable is, in case we need to put a
1105 * copy of its data into pull constants for array access.
1106 */
1107 assert(this->uniforms < uniform_array_size);
1108 this->uniform_size[this->uniforms] = type_size_vec4(ir->type);
1109
1110 if (!strncmp(ir->name, "gl_", 3)) {
1111 setup_builtin_uniform_values(ir);
1112 } else {
1113 setup_uniform_values(ir);
1114 }
1115 break;
1116
1117 case ir_var_system_value:
1118 reg = make_reg_for_system_value(ir->data.location, ir->type);
1119 break;
1120
1121 default:
1122 unreachable("not reached");
1123 }
1124
1125 reg->type = brw_type_for_base_type(ir->type);
1126 hash_table_insert(this->variable_ht, reg, ir);
1127 }
1128
1129 void
1130 vec4_visitor::visit(ir_loop *ir)
1131 {
1132 /* We don't want debugging output to print the whole body of the
1133 * loop as the annotation.
1134 */
1135 this->base_ir = NULL;
1136
1137 emit(BRW_OPCODE_DO);
1138
1139 visit_instructions(&ir->body_instructions);
1140
1141 emit(BRW_OPCODE_WHILE);
1142 }
1143
1144 void
1145 vec4_visitor::visit(ir_loop_jump *ir)
1146 {
1147 switch (ir->mode) {
1148 case ir_loop_jump::jump_break:
1149 emit(BRW_OPCODE_BREAK);
1150 break;
1151 case ir_loop_jump::jump_continue:
1152 emit(BRW_OPCODE_CONTINUE);
1153 break;
1154 }
1155 }
1156
1157
1158 void
1159 vec4_visitor::visit(ir_function_signature *)
1160 {
1161 unreachable("not reached");
1162 }
1163
1164 void
1165 vec4_visitor::visit(ir_function *ir)
1166 {
1167 /* Ignore function bodies other than main() -- we shouldn't see calls to
1168 * them since they should all be inlined.
1169 */
1170 if (strcmp(ir->name, "main") == 0) {
1171 const ir_function_signature *sig;
1172 exec_list empty;
1173
1174 sig = ir->matching_signature(NULL, &empty, false);
1175
1176 assert(sig);
1177
1178 visit_instructions(&sig->body);
1179 }
1180 }
1181
1182 bool
1183 vec4_visitor::try_emit_mad(ir_expression *ir)
1184 {
1185 /* 3-src instructions were introduced in gen6. */
1186 if (devinfo->gen < 6)
1187 return false;
1188
1189 /* MAD can only handle floating-point data. */
1190 if (ir->type->base_type != GLSL_TYPE_FLOAT)
1191 return false;
1192
1193 ir_rvalue *nonmul;
1194 ir_expression *mul;
1195 bool mul_negate, mul_abs;
1196
1197 for (int i = 0; i < 2; i++) {
1198 mul_negate = false;
1199 mul_abs = false;
1200
1201 mul = ir->operands[i]->as_expression();
1202 nonmul = ir->operands[1 - i];
1203
1204 if (mul && mul->operation == ir_unop_abs) {
1205 mul = mul->operands[0]->as_expression();
1206 mul_abs = true;
1207 } else if (mul && mul->operation == ir_unop_neg) {
1208 mul = mul->operands[0]->as_expression();
1209 mul_negate = true;
1210 }
1211
1212 if (mul && mul->operation == ir_binop_mul)
1213 break;
1214 }
1215
1216 if (!mul || mul->operation != ir_binop_mul)
1217 return false;
1218
1219 nonmul->accept(this);
1220 src_reg src0 = fix_3src_operand(this->result);
1221
1222 mul->operands[0]->accept(this);
1223 src_reg src1 = fix_3src_operand(this->result);
1224 src1.negate ^= mul_negate;
1225 src1.abs = mul_abs;
1226 if (mul_abs)
1227 src1.negate = false;
1228
1229 mul->operands[1]->accept(this);
1230 src_reg src2 = fix_3src_operand(this->result);
1231 src2.abs = mul_abs;
1232 if (mul_abs)
1233 src2.negate = false;
1234
1235 this->result = src_reg(this, ir->type);
1236 emit(BRW_OPCODE_MAD, dst_reg(this->result), src0, src1, src2);
1237
1238 return true;
1239 }
1240
1241 bool
1242 vec4_visitor::try_emit_b2f_of_compare(ir_expression *ir)
1243 {
1244 /* This optimization relies on CMP setting the destination to 0 when
1245 * false. Early hardware only sets the least significant bit, and
1246 * leaves the other bits undefined. So we can't use it.
1247 */
1248 if (devinfo->gen < 6)
1249 return false;
1250
1251 ir_expression *const cmp = ir->operands[0]->as_expression();
1252
1253 if (cmp == NULL)
1254 return false;
1255
1256 switch (cmp->operation) {
1257 case ir_binop_less:
1258 case ir_binop_greater:
1259 case ir_binop_lequal:
1260 case ir_binop_gequal:
1261 case ir_binop_equal:
1262 case ir_binop_nequal:
1263 break;
1264
1265 default:
1266 return false;
1267 }
1268
1269 cmp->operands[0]->accept(this);
1270 const src_reg cmp_src0 = this->result;
1271
1272 cmp->operands[1]->accept(this);
1273 const src_reg cmp_src1 = this->result;
1274
1275 this->result = src_reg(this, ir->type);
1276
1277 emit(CMP(dst_reg(this->result), cmp_src0, cmp_src1,
1278 brw_conditional_for_comparison(cmp->operation)));
1279
1280 /* If the comparison is false, this->result will just happen to be zero.
1281 */
1282 vec4_instruction *const inst = emit(BRW_OPCODE_SEL, dst_reg(this->result),
1283 this->result, src_reg(1.0f));
1284 inst->predicate = BRW_PREDICATE_NORMAL;
1285 inst->predicate_inverse = true;
1286
1287 return true;
1288 }
1289
1290 vec4_instruction *
1291 vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
1292 src_reg src0, src_reg src1)
1293 {
1294 vec4_instruction *inst;
1295
1296 if (devinfo->gen >= 6) {
1297 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
1298 inst->conditional_mod = conditionalmod;
1299 } else {
1300 emit(CMP(dst, src0, src1, conditionalmod));
1301
1302 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
1303 inst->predicate = BRW_PREDICATE_NORMAL;
1304 }
1305
1306 return inst;
1307 }
1308
1309 vec4_instruction *
1310 vec4_visitor::emit_lrp(const dst_reg &dst,
1311 const src_reg &x, const src_reg &y, const src_reg &a)
1312 {
1313 if (devinfo->gen >= 6) {
1314 /* Note that the instruction's argument order is reversed from GLSL
1315 * and the IR.
1316 */
1317 return emit(LRP(dst, fix_3src_operand(a), fix_3src_operand(y),
1318 fix_3src_operand(x)));
1319 } else {
1320 /* Earlier generations don't support three source operations, so we
1321 * need to emit x*(1-a) + y*a.
1322 */
1323 dst_reg y_times_a = dst_reg(this, glsl_type::vec4_type);
1324 dst_reg one_minus_a = dst_reg(this, glsl_type::vec4_type);
1325 dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type);
1326 y_times_a.writemask = dst.writemask;
1327 one_minus_a.writemask = dst.writemask;
1328 x_times_one_minus_a.writemask = dst.writemask;
1329
1330 emit(MUL(y_times_a, y, a));
1331 emit(ADD(one_minus_a, negate(a), src_reg(1.0f)));
1332 emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a)));
1333 return emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)));
1334 }
1335 }
1336
1337 /**
1338 * Emits the instructions needed to perform a pull constant load. before_block
1339 * and before_inst can be NULL in which case the instruction will be appended
1340 * to the end of the instruction list.
1341 */
1342 void
1343 vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
1344 src_reg surf_index,
1345 src_reg offset_reg,
1346 bblock_t *before_block,
1347 vec4_instruction *before_inst)
1348 {
1349 assert((before_inst == NULL && before_block == NULL) ||
1350 (before_inst && before_block));
1351
1352 vec4_instruction *pull;
1353
1354 if (devinfo->gen >= 9) {
1355 /* Gen9+ needs a message header in order to use SIMD4x2 mode */
1356 src_reg header(this, glsl_type::uvec4_type, 2);
1357
1358 pull = new(mem_ctx)
1359 vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
1360 dst_reg(header));
1361
1362 if (before_inst)
1363 emit_before(before_block, before_inst, pull);
1364 else
1365 emit(pull);
1366
1367 dst_reg index_reg = retype(offset(dst_reg(header), 1),
1368 offset_reg.type);
1369 pull = MOV(writemask(index_reg, WRITEMASK_X), offset_reg);
1370
1371 if (before_inst)
1372 emit_before(before_block, before_inst, pull);
1373 else
1374 emit(pull);
1375
1376 pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
1377 dst,
1378 surf_index,
1379 header);
1380 pull->mlen = 2;
1381 pull->header_size = 1;
1382 } else if (devinfo->gen >= 7) {
1383 dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
1384
1385 grf_offset.type = offset_reg.type;
1386
1387 pull = MOV(grf_offset, offset_reg);
1388
1389 if (before_inst)
1390 emit_before(before_block, before_inst, pull);
1391 else
1392 emit(pull);
1393
1394 pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
1395 dst,
1396 surf_index,
1397 src_reg(grf_offset));
1398 pull->mlen = 1;
1399 } else {
1400 pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD,
1401 dst,
1402 surf_index,
1403 offset_reg);
1404 pull->base_mrf = 14;
1405 pull->mlen = 1;
1406 }
1407
1408 if (before_inst)
1409 emit_before(before_block, before_inst, pull);
1410 else
1411 emit(pull);
1412 }
1413
1414 src_reg
1415 vec4_visitor::emit_uniformize(const src_reg &src)
1416 {
1417 const src_reg chan_index(this, glsl_type::uint_type);
1418 const dst_reg dst = retype(dst_reg(this, glsl_type::uint_type),
1419 src.type);
1420
1421 emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, dst_reg(chan_index))
1422 ->force_writemask_all = true;
1423 emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index)
1424 ->force_writemask_all = true;
1425
1426 return src_reg(dst);
1427 }
1428
1429 void
1430 vec4_visitor::visit(ir_expression *ir)
1431 {
1432 unsigned int operand;
1433 src_reg op[ARRAY_SIZE(ir->operands)];
1434 vec4_instruction *inst;
1435
1436 if (ir->operation == ir_binop_add) {
1437 if (try_emit_mad(ir))
1438 return;
1439 }
1440
1441 if (ir->operation == ir_unop_b2f) {
1442 if (try_emit_b2f_of_compare(ir))
1443 return;
1444 }
1445
1446 /* Storage for our result. Ideally for an assignment we'd be using
1447 * the actual storage for the result here, instead.
1448 */
1449 dst_reg result_dst(this, ir->type);
1450 src_reg result_src(result_dst);
1451
1452 if (ir->operation == ir_triop_csel) {
1453 ir->operands[1]->accept(this);
1454 op[1] = this->result;
1455 ir->operands[2]->accept(this);
1456 op[2] = this->result;
1457
1458 enum brw_predicate predicate;
1459 emit_bool_to_cond_code(ir->operands[0], &predicate);
1460 inst = emit(BRW_OPCODE_SEL, result_dst, op[1], op[2]);
1461 inst->predicate = predicate;
1462 this->result = result_src;
1463 return;
1464 }
1465
1466 for (operand = 0; operand < ir->get_num_operands(); operand++) {
1467 this->result.file = BAD_FILE;
1468 ir->operands[operand]->accept(this);
1469 if (this->result.file == BAD_FILE) {
1470 fprintf(stderr, "Failed to get tree for expression operand:\n");
1471 ir->operands[operand]->fprint(stderr);
1472 exit(1);
1473 }
1474 op[operand] = this->result;
1475
1476 /* Matrix expression operands should have been broken down to vector
1477 * operations already.
1478 */
1479 assert(!ir->operands[operand]->type->is_matrix());
1480 }
1481
1482 /* If nothing special happens, this is the result. */
1483 this->result = result_src;
1484
1485 switch (ir->operation) {
1486 case ir_unop_logic_not:
1487 emit(NOT(result_dst, op[0]));
1488 break;
1489 case ir_unop_neg:
1490 op[0].negate = !op[0].negate;
1491 emit(MOV(result_dst, op[0]));
1492 break;
1493 case ir_unop_abs:
1494 op[0].abs = true;
1495 op[0].negate = false;
1496 emit(MOV(result_dst, op[0]));
1497 break;
1498
1499 case ir_unop_sign:
1500 if (ir->type->is_float()) {
1501 /* AND(val, 0x80000000) gives the sign bit.
1502 *
1503 * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
1504 * zero.
1505 */
1506 emit(CMP(dst_null_f(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1507
1508 op[0].type = BRW_REGISTER_TYPE_UD;
1509 result_dst.type = BRW_REGISTER_TYPE_UD;
1510 emit(AND(result_dst, op[0], src_reg(0x80000000u)));
1511
1512 inst = emit(OR(result_dst, src_reg(result_dst), src_reg(0x3f800000u)));
1513 inst->predicate = BRW_PREDICATE_NORMAL;
1514
1515 this->result.type = BRW_REGISTER_TYPE_F;
1516 } else {
1517 /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
1518 * -> non-negative val generates 0x00000000.
1519 * Predicated OR sets 1 if val is positive.
1520 */
1521 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_G));
1522
1523 emit(ASR(result_dst, op[0], src_reg(31)));
1524
1525 inst = emit(OR(result_dst, src_reg(result_dst), src_reg(1)));
1526 inst->predicate = BRW_PREDICATE_NORMAL;
1527 }
1528 break;
1529
1530 case ir_unop_rcp:
1531 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
1532 break;
1533
1534 case ir_unop_exp2:
1535 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
1536 break;
1537 case ir_unop_log2:
1538 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
1539 break;
1540 case ir_unop_exp:
1541 case ir_unop_log:
1542 unreachable("not reached: should be handled by ir_explog_to_explog2");
1543 case ir_unop_sin:
1544 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
1545 break;
1546 case ir_unop_cos:
1547 emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
1548 break;
1549
1550 case ir_unop_dFdx:
1551 case ir_unop_dFdx_coarse:
1552 case ir_unop_dFdx_fine:
1553 case ir_unop_dFdy:
1554 case ir_unop_dFdy_coarse:
1555 case ir_unop_dFdy_fine:
1556 unreachable("derivatives not valid in vertex shader");
1557
1558 case ir_unop_bitfield_reverse:
1559 emit(BFREV(result_dst, op[0]));
1560 break;
1561 case ir_unop_bit_count:
1562 emit(CBIT(result_dst, op[0]));
1563 break;
1564 case ir_unop_find_msb: {
1565 src_reg temp = src_reg(this, glsl_type::uint_type);
1566
1567 inst = emit(FBH(dst_reg(temp), op[0]));
1568 inst->dst.writemask = WRITEMASK_XYZW;
1569
1570 /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
1571 * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
1572 * subtract the result from 31 to convert the MSB count into an LSB count.
1573 */
1574
1575 /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
1576 temp.swizzle = BRW_SWIZZLE_NOOP;
1577 emit(MOV(result_dst, temp));
1578
1579 src_reg src_tmp = src_reg(result_dst);
1580 emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ));
1581
1582 src_tmp.negate = true;
1583 inst = emit(ADD(result_dst, src_tmp, src_reg(31)));
1584 inst->predicate = BRW_PREDICATE_NORMAL;
1585 break;
1586 }
1587 case ir_unop_find_lsb:
1588 emit(FBL(result_dst, op[0]));
1589 break;
1590 case ir_unop_saturate:
1591 inst = emit(MOV(result_dst, op[0]));
1592 inst->saturate = true;
1593 break;
1594
1595 case ir_unop_noise:
1596 unreachable("not reached: should be handled by lower_noise");
1597
1598 case ir_unop_subroutine_to_int:
1599 emit(MOV(result_dst, op[0]));
1600 break;
1601
1602 case ir_binop_add:
1603 emit(ADD(result_dst, op[0], op[1]));
1604 break;
1605 case ir_binop_sub:
1606 unreachable("not reached: should be handled by ir_sub_to_add_neg");
1607
1608 case ir_binop_mul:
1609 if (devinfo->gen < 8 && ir->type->is_integer()) {
1610 /* For integer multiplication, the MUL uses the low 16 bits of one of
1611 * the operands (src0 through SNB, src1 on IVB and later). The MACH
1612 * accumulates in the contribution of the upper 16 bits of that
1613 * operand. If we can determine that one of the args is in the low
1614 * 16 bits, though, we can just emit a single MUL.
1615 */
1616 if (ir->operands[0]->is_uint16_constant()) {
1617 if (devinfo->gen < 7)
1618 emit(MUL(result_dst, op[0], op[1]));
1619 else
1620 emit(MUL(result_dst, op[1], op[0]));
1621 } else if (ir->operands[1]->is_uint16_constant()) {
1622 if (devinfo->gen < 7)
1623 emit(MUL(result_dst, op[1], op[0]));
1624 else
1625 emit(MUL(result_dst, op[0], op[1]));
1626 } else {
1627 struct brw_reg acc = retype(brw_acc_reg(8), result_dst.type);
1628
1629 emit(MUL(acc, op[0], op[1]));
1630 emit(MACH(dst_null_d(), op[0], op[1]));
1631 emit(MOV(result_dst, src_reg(acc)));
1632 }
1633 } else {
1634 emit(MUL(result_dst, op[0], op[1]));
1635 }
1636 break;
1637 case ir_binop_imul_high: {
1638 struct brw_reg acc = retype(brw_acc_reg(8), result_dst.type);
1639
1640 emit(MUL(acc, op[0], op[1]));
1641 emit(MACH(result_dst, op[0], op[1]));
1642 break;
1643 }
1644 case ir_binop_div:
1645 /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
1646 assert(ir->type->is_integer());
1647 emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
1648 break;
1649
1650 case ir_binop_carry:
1651 unreachable("Should have been lowered by carry_to_arith().");
1652
1653 case ir_binop_borrow:
1654 unreachable("Should have been lowered by borrow_to_arith().");
1655
1656 case ir_binop_mod:
1657 /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
1658 assert(ir->type->is_integer());
1659 emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
1660 break;
1661
1662 case ir_binop_less:
1663 case ir_binop_greater:
1664 case ir_binop_lequal:
1665 case ir_binop_gequal:
1666 case ir_binop_equal:
1667 case ir_binop_nequal: {
1668 if (devinfo->gen <= 5) {
1669 resolve_bool_comparison(ir->operands[0], &op[0]);
1670 resolve_bool_comparison(ir->operands[1], &op[1]);
1671 }
1672 emit(CMP(result_dst, op[0], op[1],
1673 brw_conditional_for_comparison(ir->operation)));
1674 break;
1675 }
1676
1677 case ir_binop_all_equal:
1678 if (devinfo->gen <= 5) {
1679 resolve_bool_comparison(ir->operands[0], &op[0]);
1680 resolve_bool_comparison(ir->operands[1], &op[1]);
1681 }
1682
1683 /* "==" operator producing a scalar boolean. */
1684 if (ir->operands[0]->type->is_vector() ||
1685 ir->operands[1]->type->is_vector()) {
1686 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
1687 emit(MOV(result_dst, src_reg(0)));
1688 inst = emit(MOV(result_dst, src_reg(~0)));
1689 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
1690 } else {
1691 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
1692 }
1693 break;
1694 case ir_binop_any_nequal:
1695 if (devinfo->gen <= 5) {
1696 resolve_bool_comparison(ir->operands[0], &op[0]);
1697 resolve_bool_comparison(ir->operands[1], &op[1]);
1698 }
1699
1700 /* "!=" operator producing a scalar boolean. */
1701 if (ir->operands[0]->type->is_vector() ||
1702 ir->operands[1]->type->is_vector()) {
1703 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
1704
1705 emit(MOV(result_dst, src_reg(0)));
1706 inst = emit(MOV(result_dst, src_reg(~0)));
1707 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1708 } else {
1709 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
1710 }
1711 break;
1712
1713 case ir_unop_any:
1714 if (devinfo->gen <= 5) {
1715 resolve_bool_comparison(ir->operands[0], &op[0]);
1716 }
1717 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1718 emit(MOV(result_dst, src_reg(0)));
1719
1720 inst = emit(MOV(result_dst, src_reg(~0)));
1721 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1722 break;
1723
1724 case ir_binop_logic_xor:
1725 emit(XOR(result_dst, op[0], op[1]));
1726 break;
1727
1728 case ir_binop_logic_or:
1729 emit(OR(result_dst, op[0], op[1]));
1730 break;
1731
1732 case ir_binop_logic_and:
1733 emit(AND(result_dst, op[0], op[1]));
1734 break;
1735
1736 case ir_binop_dot:
1737 assert(ir->operands[0]->type->is_vector());
1738 assert(ir->operands[0]->type == ir->operands[1]->type);
1739 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1740 break;
1741
1742 case ir_unop_sqrt:
1743 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1744 break;
1745 case ir_unop_rsq:
1746 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1747 break;
1748
1749 case ir_unop_bitcast_i2f:
1750 case ir_unop_bitcast_u2f:
1751 this->result = op[0];
1752 this->result.type = BRW_REGISTER_TYPE_F;
1753 break;
1754
1755 case ir_unop_bitcast_f2i:
1756 this->result = op[0];
1757 this->result.type = BRW_REGISTER_TYPE_D;
1758 break;
1759
1760 case ir_unop_bitcast_f2u:
1761 this->result = op[0];
1762 this->result.type = BRW_REGISTER_TYPE_UD;
1763 break;
1764
1765 case ir_unop_i2f:
1766 case ir_unop_i2u:
1767 case ir_unop_u2i:
1768 case ir_unop_u2f:
1769 case ir_unop_f2i:
1770 case ir_unop_f2u:
1771 emit(MOV(result_dst, op[0]));
1772 break;
1773 case ir_unop_b2i:
1774 case ir_unop_b2f:
1775 if (devinfo->gen <= 5) {
1776 resolve_bool_comparison(ir->operands[0], &op[0]);
1777 }
1778 emit(MOV(result_dst, negate(op[0])));
1779 break;
1780 case ir_unop_f2b:
1781 emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1782 break;
1783 case ir_unop_i2b:
1784 emit(CMP(result_dst, op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1785 break;
1786
1787 case ir_unop_trunc:
1788 emit(RNDZ(result_dst, op[0]));
1789 break;
1790 case ir_unop_ceil: {
1791 src_reg tmp = src_reg(this, ir->type);
1792 op[0].negate = !op[0].negate;
1793 emit(RNDD(dst_reg(tmp), op[0]));
1794 tmp.negate = true;
1795 emit(MOV(result_dst, tmp));
1796 }
1797 break;
1798 case ir_unop_floor:
1799 inst = emit(RNDD(result_dst, op[0]));
1800 break;
1801 case ir_unop_fract:
1802 inst = emit(FRC(result_dst, op[0]));
1803 break;
1804 case ir_unop_round_even:
1805 emit(RNDE(result_dst, op[0]));
1806 break;
1807
1808 case ir_binop_min:
1809 emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]);
1810 break;
1811 case ir_binop_max:
1812 emit_minmax(BRW_CONDITIONAL_GE, result_dst, op[0], op[1]);
1813 break;
1814
1815 case ir_binop_pow:
1816 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1817 break;
1818
1819 case ir_unop_bit_not:
1820 inst = emit(NOT(result_dst, op[0]));
1821 break;
1822 case ir_binop_bit_and:
1823 inst = emit(AND(result_dst, op[0], op[1]));
1824 break;
1825 case ir_binop_bit_xor:
1826 inst = emit(XOR(result_dst, op[0], op[1]));
1827 break;
1828 case ir_binop_bit_or:
1829 inst = emit(OR(result_dst, op[0], op[1]));
1830 break;
1831
1832 case ir_binop_lshift:
1833 inst = emit(SHL(result_dst, op[0], op[1]));
1834 break;
1835
1836 case ir_binop_rshift:
1837 if (ir->type->base_type == GLSL_TYPE_INT)
1838 inst = emit(ASR(result_dst, op[0], op[1]));
1839 else
1840 inst = emit(SHR(result_dst, op[0], op[1]));
1841 break;
1842
1843 case ir_binop_bfm:
1844 emit(BFI1(result_dst, op[0], op[1]));
1845 break;
1846
1847 case ir_binop_ubo_load: {
1848 ir_constant *const_uniform_block = ir->operands[0]->as_constant();
1849 ir_constant *const_offset_ir = ir->operands[1]->as_constant();
1850 unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
1851 src_reg offset;
1852
1853 /* Now, load the vector from that offset. */
1854 assert(ir->type->is_vector() || ir->type->is_scalar());
1855
1856 src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
1857 packed_consts.type = result.type;
1858 src_reg surf_index;
1859
1860 if (const_uniform_block) {
1861 /* The block index is a constant, so just emit the binding table entry
1862 * as an immediate.
1863 */
1864 surf_index = src_reg(prog_data->base.binding_table.ubo_start +
1865 const_uniform_block->value.u[0]);
1866 } else {
1867 /* The block index is not a constant. Evaluate the index expression
1868 * per-channel and add the base UBO index; we have to select a value
1869 * from any live channel.
1870 */
1871 surf_index = src_reg(this, glsl_type::uint_type);
1872 emit(ADD(dst_reg(surf_index), op[0],
1873 src_reg(prog_data->base.binding_table.ubo_start)));
1874 surf_index = emit_uniformize(surf_index);
1875
1876 /* Assume this may touch any UBO. It would be nice to provide
1877 * a tighter bound, but the array information is already lowered away.
1878 */
1879 brw_mark_surface_used(&prog_data->base,
1880 prog_data->base.binding_table.ubo_start +
1881 shader_prog->NumUniformBlocks - 1);
1882 }
1883
1884 if (const_offset_ir) {
1885 if (devinfo->gen >= 8) {
1886 /* Store the offset in a GRF so we can send-from-GRF. */
1887 offset = src_reg(this, glsl_type::int_type);
1888 emit(MOV(dst_reg(offset), src_reg(const_offset / 16)));
1889 } else {
1890 /* Immediates are fine on older generations since they'll be moved
1891 * to a (potentially fake) MRF at the generator level.
1892 */
1893 offset = src_reg(const_offset / 16);
1894 }
1895 } else {
1896 offset = src_reg(this, glsl_type::uint_type);
1897 emit(SHR(dst_reg(offset), op[1], src_reg(4u)));
1898 }
1899
1900 emit_pull_constant_load_reg(dst_reg(packed_consts),
1901 surf_index,
1902 offset,
1903 NULL, NULL /* before_block/inst */);
1904
1905 packed_consts.swizzle = brw_swizzle_for_size(ir->type->vector_elements);
1906 packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4,
1907 const_offset % 16 / 4,
1908 const_offset % 16 / 4,
1909 const_offset % 16 / 4);
1910
1911 /* UBO bools are any nonzero int. We need to convert them to 0/~0. */
1912 if (ir->type->base_type == GLSL_TYPE_BOOL) {
1913 emit(CMP(result_dst, packed_consts, src_reg(0u),
1914 BRW_CONDITIONAL_NZ));
1915 } else {
1916 emit(MOV(result_dst, packed_consts));
1917 }
1918 break;
1919 }
1920
1921 case ir_binop_vector_extract:
1922 unreachable("should have been lowered by vec_index_to_cond_assign");
1923
1924 case ir_triop_fma:
1925 op[0] = fix_3src_operand(op[0]);
1926 op[1] = fix_3src_operand(op[1]);
1927 op[2] = fix_3src_operand(op[2]);
1928 /* Note that the instruction's argument order is reversed from GLSL
1929 * and the IR.
1930 */
1931 emit(MAD(result_dst, op[2], op[1], op[0]));
1932 break;
1933
1934 case ir_triop_lrp:
1935 emit_lrp(result_dst, op[0], op[1], op[2]);
1936 break;
1937
1938 case ir_triop_csel:
1939 unreachable("already handled above");
1940 break;
1941
1942 case ir_triop_bfi:
1943 op[0] = fix_3src_operand(op[0]);
1944 op[1] = fix_3src_operand(op[1]);
1945 op[2] = fix_3src_operand(op[2]);
1946 emit(BFI2(result_dst, op[0], op[1], op[2]));
1947 break;
1948
1949 case ir_triop_bitfield_extract:
1950 op[0] = fix_3src_operand(op[0]);
1951 op[1] = fix_3src_operand(op[1]);
1952 op[2] = fix_3src_operand(op[2]);
1953 /* Note that the instruction's argument order is reversed from GLSL
1954 * and the IR.
1955 */
1956 emit(BFE(result_dst, op[2], op[1], op[0]));
1957 break;
1958
1959 case ir_triop_vector_insert:
1960 unreachable("should have been lowered by lower_vector_insert");
1961
1962 case ir_quadop_bitfield_insert:
1963 unreachable("not reached: should be handled by "
1964 "bitfield_insert_to_bfm_bfi\n");
1965
1966 case ir_quadop_vector:
1967 unreachable("not reached: should be handled by lower_quadop_vector");
1968
1969 case ir_unop_pack_half_2x16:
1970 emit_pack_half_2x16(result_dst, op[0]);
1971 break;
1972 case ir_unop_unpack_half_2x16:
1973 emit_unpack_half_2x16(result_dst, op[0]);
1974 break;
1975 case ir_unop_unpack_unorm_4x8:
1976 emit_unpack_unorm_4x8(result_dst, op[0]);
1977 break;
1978 case ir_unop_unpack_snorm_4x8:
1979 emit_unpack_snorm_4x8(result_dst, op[0]);
1980 break;
1981 case ir_unop_pack_unorm_4x8:
1982 emit_pack_unorm_4x8(result_dst, op[0]);
1983 break;
1984 case ir_unop_pack_snorm_4x8:
1985 emit_pack_snorm_4x8(result_dst, op[0]);
1986 break;
1987 case ir_unop_pack_snorm_2x16:
1988 case ir_unop_pack_unorm_2x16:
1989 case ir_unop_unpack_snorm_2x16:
1990 case ir_unop_unpack_unorm_2x16:
1991 unreachable("not reached: should be handled by lower_packing_builtins");
1992 case ir_unop_unpack_half_2x16_split_x:
1993 case ir_unop_unpack_half_2x16_split_y:
1994 case ir_binop_pack_half_2x16_split:
1995 case ir_unop_interpolate_at_centroid:
1996 case ir_binop_interpolate_at_sample:
1997 case ir_binop_interpolate_at_offset:
1998 unreachable("not reached: should not occur in vertex shader");
1999 case ir_binop_ldexp:
2000 unreachable("not reached: should be handled by ldexp_to_arith()");
2001 case ir_unop_d2f:
2002 case ir_unop_f2d:
2003 case ir_unop_d2i:
2004 case ir_unop_i2d:
2005 case ir_unop_d2u:
2006 case ir_unop_u2d:
2007 case ir_unop_d2b:
2008 case ir_unop_pack_double_2x32:
2009 case ir_unop_unpack_double_2x32:
2010 case ir_unop_frexp_sig:
2011 case ir_unop_frexp_exp:
2012 unreachable("fp64 todo");
2013 }
2014 }
2015
2016
2017 void
2018 vec4_visitor::visit(ir_swizzle *ir)
2019 {
2020 /* Note that this is only swizzles in expressions, not those on the left
2021 * hand side of an assignment, which do write masking. See ir_assignment
2022 * for that.
2023 */
2024 const unsigned swz = brw_compose_swizzle(
2025 brw_swizzle_for_size(ir->type->vector_elements),
2026 BRW_SWIZZLE4(ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w));
2027
2028 ir->val->accept(this);
2029 this->result = swizzle(this->result, swz);
2030 }
2031
2032 void
2033 vec4_visitor::visit(ir_dereference_variable *ir)
2034 {
2035 const struct glsl_type *type = ir->type;
2036 dst_reg *reg = variable_storage(ir->var);
2037
2038 if (!reg) {
2039 fail("Failed to find variable storage for %s\n", ir->var->name);
2040 this->result = src_reg(brw_null_reg());
2041 return;
2042 }
2043
2044 this->result = src_reg(*reg);
2045
2046 /* System values get their swizzle from the dst_reg writemask */
2047 if (ir->var->data.mode == ir_var_system_value)
2048 return;
2049
2050 if (type->is_scalar() || type->is_vector() || type->is_matrix())
2051 this->result.swizzle = brw_swizzle_for_size(type->vector_elements);
2052 }
2053
2054
2055 int
2056 vec4_visitor::compute_array_stride(ir_dereference_array *ir)
2057 {
2058 /* Under normal circumstances array elements are stored consecutively, so
2059 * the stride is equal to the size of the array element.
2060 */
2061 return type_size_vec4(ir->type);
2062 }
2063
2064
2065 void
2066 vec4_visitor::visit(ir_dereference_array *ir)
2067 {
2068 ir_constant *constant_index;
2069 src_reg src;
2070 int array_stride = compute_array_stride(ir);
2071
2072 constant_index = ir->array_index->constant_expression_value();
2073
2074 ir->array->accept(this);
2075 src = this->result;
2076
2077 if (constant_index) {
2078 src.reg_offset += constant_index->value.i[0] * array_stride;
2079 } else {
2080 /* Variable index array dereference. It eats the "vec4" of the
2081 * base of the array and an index that offsets the Mesa register
2082 * index.
2083 */
2084 ir->array_index->accept(this);
2085
2086 src_reg index_reg;
2087
2088 if (array_stride == 1) {
2089 index_reg = this->result;
2090 } else {
2091 index_reg = src_reg(this, glsl_type::int_type);
2092
2093 emit(MUL(dst_reg(index_reg), this->result, src_reg(array_stride)));
2094 }
2095
2096 if (src.reladdr) {
2097 src_reg temp = src_reg(this, glsl_type::int_type);
2098
2099 emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
2100
2101 index_reg = temp;
2102 }
2103
2104 src.reladdr = ralloc(mem_ctx, src_reg);
2105 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
2106 }
2107
2108 /* If the type is smaller than a vec4, replicate the last channel out. */
2109 if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
2110 src.swizzle = brw_swizzle_for_size(ir->type->vector_elements);
2111 else
2112 src.swizzle = BRW_SWIZZLE_NOOP;
2113 src.type = brw_type_for_base_type(ir->type);
2114
2115 this->result = src;
2116 }
2117
2118 void
2119 vec4_visitor::visit(ir_dereference_record *ir)
2120 {
2121 unsigned int i;
2122 const glsl_type *struct_type = ir->record->type;
2123 int offset = 0;
2124
2125 ir->record->accept(this);
2126
2127 for (i = 0; i < struct_type->length; i++) {
2128 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
2129 break;
2130 offset += type_size_vec4(struct_type->fields.structure[i].type);
2131 }
2132
2133 /* If the type is smaller than a vec4, replicate the last channel out. */
2134 if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
2135 this->result.swizzle = brw_swizzle_for_size(ir->type->vector_elements);
2136 else
2137 this->result.swizzle = BRW_SWIZZLE_NOOP;
2138 this->result.type = brw_type_for_base_type(ir->type);
2139
2140 this->result.reg_offset += offset;
2141 }
2142
2143 /**
2144 * We want to be careful in assignment setup to hit the actual storage
2145 * instead of potentially using a temporary like we might with the
2146 * ir_dereference handler.
2147 */
2148 static dst_reg
2149 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
2150 {
2151 /* The LHS must be a dereference. If the LHS is a variable indexed array
2152 * access of a vector, it must be separated into a series conditional moves
2153 * before reaching this point (see ir_vec_index_to_cond_assign).
2154 */
2155 assert(ir->as_dereference());
2156 ir_dereference_array *deref_array = ir->as_dereference_array();
2157 if (deref_array) {
2158 assert(!deref_array->array->type->is_vector());
2159 }
2160
2161 /* Use the rvalue deref handler for the most part. We'll ignore
2162 * swizzles in it and write swizzles using writemask, though.
2163 */
2164 ir->accept(v);
2165 return dst_reg(v->result);
2166 }
2167
2168 void
2169 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
2170 const struct glsl_type *type,
2171 enum brw_predicate predicate)
2172 {
2173 if (type->base_type == GLSL_TYPE_STRUCT) {
2174 for (unsigned int i = 0; i < type->length; i++) {
2175 emit_block_move(dst, src, type->fields.structure[i].type, predicate);
2176 }
2177 return;
2178 }
2179
2180 if (type->is_array()) {
2181 for (unsigned int i = 0; i < type->length; i++) {
2182 emit_block_move(dst, src, type->fields.array, predicate);
2183 }
2184 return;
2185 }
2186
2187 if (type->is_matrix()) {
2188 const struct glsl_type *vec_type;
2189
2190 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
2191 type->vector_elements, 1);
2192
2193 for (int i = 0; i < type->matrix_columns; i++) {
2194 emit_block_move(dst, src, vec_type, predicate);
2195 }
2196 return;
2197 }
2198
2199 assert(type->is_scalar() || type->is_vector());
2200
2201 dst->type = brw_type_for_base_type(type);
2202 src->type = dst->type;
2203
2204 dst->writemask = (1 << type->vector_elements) - 1;
2205
2206 src->swizzle = brw_swizzle_for_size(type->vector_elements);
2207
2208 vec4_instruction *inst = emit(MOV(*dst, *src));
2209 inst->predicate = predicate;
2210
2211 dst->reg_offset++;
2212 src->reg_offset++;
2213 }
2214
2215
2216 /* If the RHS processing resulted in an instruction generating a
2217 * temporary value, and it would be easy to rewrite the instruction to
2218 * generate its result right into the LHS instead, do so. This ends
2219 * up reliably removing instructions where it can be tricky to do so
2220 * later without real UD chain information.
2221 */
2222 bool
2223 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
2224 dst_reg dst,
2225 src_reg src,
2226 vec4_instruction *pre_rhs_inst,
2227 vec4_instruction *last_rhs_inst)
2228 {
2229 /* This could be supported, but it would take more smarts. */
2230 if (ir->condition)
2231 return false;
2232
2233 if (pre_rhs_inst == last_rhs_inst)
2234 return false; /* No instructions generated to work with. */
2235
2236 /* Make sure the last instruction generated our source reg. */
2237 if (src.file != GRF ||
2238 src.file != last_rhs_inst->dst.file ||
2239 src.reg != last_rhs_inst->dst.reg ||
2240 src.reg_offset != last_rhs_inst->dst.reg_offset ||
2241 src.reladdr ||
2242 src.abs ||
2243 src.negate ||
2244 last_rhs_inst->predicate != BRW_PREDICATE_NONE)
2245 return false;
2246
2247 /* Check that that last instruction fully initialized the channels
2248 * we want to use, in the order we want to use them. We could
2249 * potentially reswizzle the operands of many instructions so that
2250 * we could handle out of order channels, but don't yet.
2251 */
2252
2253 for (unsigned i = 0; i < 4; i++) {
2254 if (dst.writemask & (1 << i)) {
2255 if (!(last_rhs_inst->dst.writemask & (1 << i)))
2256 return false;
2257
2258 if (BRW_GET_SWZ(src.swizzle, i) != i)
2259 return false;
2260 }
2261 }
2262
2263 /* Success! Rewrite the instruction. */
2264 last_rhs_inst->dst.file = dst.file;
2265 last_rhs_inst->dst.reg = dst.reg;
2266 last_rhs_inst->dst.reg_offset = dst.reg_offset;
2267 last_rhs_inst->dst.reladdr = dst.reladdr;
2268 last_rhs_inst->dst.writemask &= dst.writemask;
2269
2270 return true;
2271 }
2272
2273 void
2274 vec4_visitor::visit(ir_assignment *ir)
2275 {
2276 dst_reg dst = get_assignment_lhs(ir->lhs, this);
2277 enum brw_predicate predicate = BRW_PREDICATE_NONE;
2278
2279 if (!ir->lhs->type->is_scalar() &&
2280 !ir->lhs->type->is_vector()) {
2281 ir->rhs->accept(this);
2282 src_reg src = this->result;
2283
2284 if (ir->condition) {
2285 emit_bool_to_cond_code(ir->condition, &predicate);
2286 }
2287
2288 /* emit_block_move doesn't account for swizzles in the source register.
2289 * This should be ok, since the source register is a structure or an
2290 * array, and those can't be swizzled. But double-check to be sure.
2291 */
2292 assert(src.swizzle ==
2293 (ir->rhs->type->is_matrix()
2294 ? brw_swizzle_for_size(ir->rhs->type->vector_elements)
2295 : BRW_SWIZZLE_NOOP));
2296
2297 emit_block_move(&dst, &src, ir->rhs->type, predicate);
2298 return;
2299 }
2300
2301 /* Now we're down to just a scalar/vector with writemasks. */
2302 int i;
2303
2304 vec4_instruction *pre_rhs_inst, *last_rhs_inst;
2305 pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
2306
2307 ir->rhs->accept(this);
2308
2309 last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
2310
2311 int swizzles[4];
2312 int src_chan = 0;
2313
2314 assert(ir->lhs->type->is_vector() ||
2315 ir->lhs->type->is_scalar());
2316 dst.writemask = ir->write_mask;
2317
2318 /* Swizzle a small RHS vector into the channels being written.
2319 *
2320 * glsl ir treats write_mask as dictating how many channels are
2321 * present on the RHS while in our instructions we need to make
2322 * those channels appear in the slots of the vec4 they're written to.
2323 */
2324 for (int i = 0; i < 4; i++)
2325 swizzles[i] = (ir->write_mask & (1 << i) ? src_chan++ : 0);
2326
2327 src_reg src = swizzle(this->result,
2328 BRW_SWIZZLE4(swizzles[0], swizzles[1],
2329 swizzles[2], swizzles[3]));
2330
2331 if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
2332 return;
2333 }
2334
2335 if (ir->condition) {
2336 emit_bool_to_cond_code(ir->condition, &predicate);
2337 }
2338
2339 for (i = 0; i < type_size_vec4(ir->lhs->type); i++) {
2340 vec4_instruction *inst = emit(MOV(dst, src));
2341 inst->predicate = predicate;
2342
2343 dst.reg_offset++;
2344 src.reg_offset++;
2345 }
2346 }
2347
2348 void
2349 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
2350 {
2351 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
2352 foreach_in_list(ir_constant, field_value, &ir->components) {
2353 emit_constant_values(dst, field_value);
2354 }
2355 return;
2356 }
2357
2358 if (ir->type->is_array()) {
2359 for (unsigned int i = 0; i < ir->type->length; i++) {
2360 emit_constant_values(dst, ir->array_elements[i]);
2361 }
2362 return;
2363 }
2364
2365 if (ir->type->is_matrix()) {
2366 for (int i = 0; i < ir->type->matrix_columns; i++) {
2367 float *vec = &ir->value.f[i * ir->type->vector_elements];
2368
2369 for (int j = 0; j < ir->type->vector_elements; j++) {
2370 dst->writemask = 1 << j;
2371 dst->type = BRW_REGISTER_TYPE_F;
2372
2373 emit(MOV(*dst, src_reg(vec[j])));
2374 }
2375 dst->reg_offset++;
2376 }
2377 return;
2378 }
2379
2380 int remaining_writemask = (1 << ir->type->vector_elements) - 1;
2381
2382 for (int i = 0; i < ir->type->vector_elements; i++) {
2383 if (!(remaining_writemask & (1 << i)))
2384 continue;
2385
2386 dst->writemask = 1 << i;
2387 dst->type = brw_type_for_base_type(ir->type);
2388
2389 /* Find other components that match the one we're about to
2390 * write. Emits fewer instructions for things like vec4(0.5,
2391 * 1.5, 1.5, 1.5).
2392 */
2393 for (int j = i + 1; j < ir->type->vector_elements; j++) {
2394 if (ir->type->base_type == GLSL_TYPE_BOOL) {
2395 if (ir->value.b[i] == ir->value.b[j])
2396 dst->writemask |= (1 << j);
2397 } else {
2398 /* u, i, and f storage all line up, so no need for a
2399 * switch case for comparing each type.
2400 */
2401 if (ir->value.u[i] == ir->value.u[j])
2402 dst->writemask |= (1 << j);
2403 }
2404 }
2405
2406 switch (ir->type->base_type) {
2407 case GLSL_TYPE_FLOAT:
2408 emit(MOV(*dst, src_reg(ir->value.f[i])));
2409 break;
2410 case GLSL_TYPE_INT:
2411 emit(MOV(*dst, src_reg(ir->value.i[i])));
2412 break;
2413 case GLSL_TYPE_UINT:
2414 emit(MOV(*dst, src_reg(ir->value.u[i])));
2415 break;
2416 case GLSL_TYPE_BOOL:
2417 emit(MOV(*dst, src_reg(ir->value.b[i] != 0 ? ~0 : 0)));
2418 break;
2419 default:
2420 unreachable("Non-float/uint/int/bool constant");
2421 }
2422
2423 remaining_writemask &= ~dst->writemask;
2424 }
2425 dst->reg_offset++;
2426 }
2427
2428 void
2429 vec4_visitor::visit(ir_constant *ir)
2430 {
2431 dst_reg dst = dst_reg(this, ir->type);
2432 this->result = src_reg(dst);
2433
2434 emit_constant_values(&dst, ir);
2435 }
2436
2437 void
2438 vec4_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
2439 {
2440 ir_dereference *deref = static_cast<ir_dereference *>(
2441 ir->actual_parameters.get_head());
2442 ir_variable *location = deref->variable_referenced();
2443 unsigned surf_index = (prog_data->base.binding_table.abo_start +
2444 location->data.binding);
2445
2446 /* Calculate the surface offset */
2447 src_reg offset(this, glsl_type::uint_type);
2448 ir_dereference_array *deref_array = deref->as_dereference_array();
2449 if (deref_array) {
2450 deref_array->array_index->accept(this);
2451
2452 src_reg tmp(this, glsl_type::uint_type);
2453 emit(MUL(dst_reg(tmp), this->result, ATOMIC_COUNTER_SIZE));
2454 emit(ADD(dst_reg(offset), tmp, location->data.atomic.offset));
2455 } else {
2456 offset = location->data.atomic.offset;
2457 }
2458
2459 /* Emit the appropriate machine instruction */
2460 const char *callee = ir->callee->function_name();
2461 dst_reg dst = get_assignment_lhs(ir->return_deref, this);
2462
2463 if (!strcmp("__intrinsic_atomic_read", callee)) {
2464 emit_untyped_surface_read(surf_index, dst, offset);
2465
2466 } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
2467 emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset,
2468 src_reg(), src_reg());
2469
2470 } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
2471 emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset,
2472 src_reg(), src_reg());
2473 }
2474
2475 brw_mark_surface_used(stage_prog_data, surf_index);
2476 }
2477
2478 void
2479 vec4_visitor::visit(ir_call *ir)
2480 {
2481 const char *callee = ir->callee->function_name();
2482
2483 if (!strcmp("__intrinsic_atomic_read", callee) ||
2484 !strcmp("__intrinsic_atomic_increment", callee) ||
2485 !strcmp("__intrinsic_atomic_predecrement", callee)) {
2486 visit_atomic_counter_intrinsic(ir);
2487 } else {
2488 unreachable("Unsupported intrinsic.");
2489 }
2490 }
2491
2492 src_reg
2493 vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type,
2494 src_reg coordinate, src_reg sampler)
2495 {
2496 vec4_instruction *inst =
2497 new(mem_ctx) vec4_instruction(SHADER_OPCODE_TXF_MCS,
2498 dst_reg(this, glsl_type::uvec4_type));
2499 inst->base_mrf = 2;
2500 inst->src[1] = sampler;
2501
2502 int param_base;
2503
2504 if (devinfo->gen >= 9) {
2505 /* Gen9+ needs a message header in order to use SIMD4x2 mode */
2506 vec4_instruction *header_inst = new(mem_ctx)
2507 vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
2508 dst_reg(MRF, inst->base_mrf));
2509
2510 emit(header_inst);
2511
2512 inst->mlen = 2;
2513 inst->header_size = 1;
2514 param_base = inst->base_mrf + 1;
2515 } else {
2516 inst->mlen = 1;
2517 param_base = inst->base_mrf;
2518 }
2519
2520 /* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */
2521 int coord_mask = (1 << coordinate_type->vector_elements) - 1;
2522 int zero_mask = 0xf & ~coord_mask;
2523
2524 emit(MOV(dst_reg(MRF, param_base, coordinate_type, coord_mask),
2525 coordinate));
2526
2527 emit(MOV(dst_reg(MRF, param_base, coordinate_type, zero_mask),
2528 src_reg(0)));
2529
2530 emit(inst);
2531 return src_reg(inst->dst);
2532 }
2533
2534 bool
2535 vec4_visitor::is_high_sampler(src_reg sampler)
2536 {
2537 if (devinfo->gen < 8 && !devinfo->is_haswell)
2538 return false;
2539
2540 return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16;
2541 }
2542
2543 void
2544 vec4_visitor::emit_texture(ir_texture_opcode op,
2545 dst_reg dest,
2546 const glsl_type *dest_type,
2547 src_reg coordinate,
2548 int coord_components,
2549 src_reg shadow_comparitor,
2550 src_reg lod, src_reg lod2,
2551 src_reg sample_index,
2552 uint32_t constant_offset,
2553 src_reg offset_value,
2554 src_reg mcs,
2555 bool is_cube_array,
2556 uint32_t sampler,
2557 src_reg sampler_reg)
2558 {
2559 enum opcode opcode;
2560 switch (op) {
2561 case ir_tex: opcode = SHADER_OPCODE_TXL; break;
2562 case ir_txl: opcode = SHADER_OPCODE_TXL; break;
2563 case ir_txd: opcode = SHADER_OPCODE_TXD; break;
2564 case ir_txf: opcode = SHADER_OPCODE_TXF; break;
2565 case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break;
2566 case ir_txs: opcode = SHADER_OPCODE_TXS; break;
2567 case ir_tg4: opcode = offset_value.file != BAD_FILE
2568 ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
2569 case ir_query_levels: opcode = SHADER_OPCODE_TXS; break;
2570 case ir_txb:
2571 unreachable("TXB is not valid for vertex shaders.");
2572 case ir_lod:
2573 unreachable("LOD is not valid for vertex shaders.");
2574 default:
2575 unreachable("Unrecognized tex op");
2576 }
2577
2578 vec4_instruction *inst = new(mem_ctx) vec4_instruction(
2579 opcode, dst_reg(this, dest_type));
2580
2581 inst->offset = constant_offset;
2582
2583 /* The message header is necessary for:
2584 * - Gen4 (always)
2585 * - Gen9+ for selecting SIMD4x2
2586 * - Texel offsets
2587 * - Gather channel selection
2588 * - Sampler indices too large to fit in a 4-bit value.
2589 */
2590 inst->header_size =
2591 (devinfo->gen < 5 || devinfo->gen >= 9 ||
2592 inst->offset != 0 || op == ir_tg4 ||
2593 is_high_sampler(sampler_reg)) ? 1 : 0;
2594 inst->base_mrf = 2;
2595 inst->mlen = inst->header_size + 1; /* always at least one */
2596 inst->dst.writemask = WRITEMASK_XYZW;
2597 inst->shadow_compare = shadow_comparitor.file != BAD_FILE;
2598
2599 inst->src[1] = sampler_reg;
2600
2601 /* MRF for the first parameter */
2602 int param_base = inst->base_mrf + inst->header_size;
2603
2604 if (op == ir_txs || op == ir_query_levels) {
2605 int writemask = devinfo->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
2606 emit(MOV(dst_reg(MRF, param_base, lod.type, writemask), lod));
2607 } else {
2608 /* Load the coordinate */
2609 /* FINISHME: gl_clamp_mask and saturate */
2610 int coord_mask = (1 << coord_components) - 1;
2611 int zero_mask = 0xf & ~coord_mask;
2612
2613 emit(MOV(dst_reg(MRF, param_base, coordinate.type, coord_mask),
2614 coordinate));
2615
2616 if (zero_mask != 0) {
2617 emit(MOV(dst_reg(MRF, param_base, coordinate.type, zero_mask),
2618 src_reg(0)));
2619 }
2620 /* Load the shadow comparitor */
2621 if (shadow_comparitor.file != BAD_FILE && op != ir_txd && (op != ir_tg4 || offset_value.file == BAD_FILE)) {
2622 emit(MOV(dst_reg(MRF, param_base + 1, shadow_comparitor.type,
2623 WRITEMASK_X),
2624 shadow_comparitor));
2625 inst->mlen++;
2626 }
2627
2628 /* Load the LOD info */
2629 if (op == ir_tex || op == ir_txl) {
2630 int mrf, writemask;
2631 if (devinfo->gen >= 5) {
2632 mrf = param_base + 1;
2633 if (shadow_comparitor.file != BAD_FILE) {
2634 writemask = WRITEMASK_Y;
2635 /* mlen already incremented */
2636 } else {
2637 writemask = WRITEMASK_X;
2638 inst->mlen++;
2639 }
2640 } else /* devinfo->gen == 4 */ {
2641 mrf = param_base;
2642 writemask = WRITEMASK_W;
2643 }
2644 lod.swizzle = BRW_SWIZZLE_XXXX;
2645 emit(MOV(dst_reg(MRF, mrf, lod.type, writemask), lod));
2646 } else if (op == ir_txf) {
2647 emit(MOV(dst_reg(MRF, param_base, lod.type, WRITEMASK_W), lod));
2648 } else if (op == ir_txf_ms) {
2649 emit(MOV(dst_reg(MRF, param_base + 1, sample_index.type, WRITEMASK_X),
2650 sample_index));
2651 if (devinfo->gen >= 7) {
2652 /* MCS data is in the first channel of `mcs`, but we need to get it into
2653 * the .y channel of the second vec4 of params, so replicate .x across
2654 * the whole vec4 and then mask off everything except .y
2655 */
2656 mcs.swizzle = BRW_SWIZZLE_XXXX;
2657 emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::uint_type, WRITEMASK_Y),
2658 mcs));
2659 }
2660 inst->mlen++;
2661 } else if (op == ir_txd) {
2662 const brw_reg_type type = lod.type;
2663
2664 if (devinfo->gen >= 5) {
2665 lod.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
2666 lod2.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
2667 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), lod));
2668 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), lod2));
2669 inst->mlen++;
2670
2671 if (dest_type->vector_elements == 3 || shadow_comparitor.file != BAD_FILE) {
2672 lod.swizzle = BRW_SWIZZLE_ZZZZ;
2673 lod2.swizzle = BRW_SWIZZLE_ZZZZ;
2674 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), lod));
2675 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), lod2));
2676 inst->mlen++;
2677
2678 if (shadow_comparitor.file != BAD_FILE) {
2679 emit(MOV(dst_reg(MRF, param_base + 2,
2680 shadow_comparitor.type, WRITEMASK_Z),
2681 shadow_comparitor));
2682 }
2683 }
2684 } else /* devinfo->gen == 4 */ {
2685 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), lod));
2686 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), lod2));
2687 inst->mlen += 2;
2688 }
2689 } else if (op == ir_tg4 && offset_value.file != BAD_FILE) {
2690 if (shadow_comparitor.file != BAD_FILE) {
2691 emit(MOV(dst_reg(MRF, param_base, shadow_comparitor.type, WRITEMASK_W),
2692 shadow_comparitor));
2693 }
2694
2695 emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::ivec2_type, WRITEMASK_XY),
2696 offset_value));
2697 inst->mlen++;
2698 }
2699 }
2700
2701 emit(inst);
2702
2703 /* fixup num layers (z) for cube arrays: hardware returns faces * layers;
2704 * spec requires layers.
2705 */
2706 if (op == ir_txs && is_cube_array) {
2707 emit_math(SHADER_OPCODE_INT_QUOTIENT,
2708 writemask(inst->dst, WRITEMASK_Z),
2709 src_reg(inst->dst), src_reg(6));
2710 }
2711
2712 if (devinfo->gen == 6 && op == ir_tg4) {
2713 emit_gen6_gather_wa(key->tex.gen6_gather_wa[sampler], inst->dst);
2714 }
2715
2716 swizzle_result(op, dest,
2717 src_reg(inst->dst), sampler, dest_type);
2718 }
2719
2720 void
2721 vec4_visitor::visit(ir_texture *ir)
2722 {
2723 uint32_t sampler =
2724 _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog);
2725
2726 ir_rvalue *nonconst_sampler_index =
2727 _mesa_get_sampler_array_nonconst_index(ir->sampler);
2728
2729 /* Handle non-constant sampler array indexing */
2730 src_reg sampler_reg;
2731 if (nonconst_sampler_index) {
2732 /* The highest sampler which may be used by this operation is
2733 * the last element of the array. Mark it here, because the generator
2734 * doesn't have enough information to determine the bound.
2735 */
2736 uint32_t array_size = ir->sampler->as_dereference_array()
2737 ->array->type->array_size();
2738
2739 uint32_t max_used = sampler + array_size - 1;
2740 if (ir->op == ir_tg4 && devinfo->gen < 8) {
2741 max_used += prog_data->base.binding_table.gather_texture_start;
2742 } else {
2743 max_used += prog_data->base.binding_table.texture_start;
2744 }
2745
2746 brw_mark_surface_used(&prog_data->base, max_used);
2747
2748 /* Emit code to evaluate the actual indexing expression */
2749 nonconst_sampler_index->accept(this);
2750 src_reg temp(this, glsl_type::uint_type);
2751 emit(ADD(dst_reg(temp), this->result, src_reg(sampler)));
2752 sampler_reg = emit_uniformize(temp);
2753 } else {
2754 /* Single sampler, or constant array index; the indexing expression
2755 * is just an immediate.
2756 */
2757 sampler_reg = src_reg(sampler);
2758 }
2759
2760 /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother
2761 * emitting anything other than setting up the constant result.
2762 */
2763 if (ir->op == ir_tg4) {
2764 ir_constant *chan = ir->lod_info.component->as_constant();
2765 int swiz = GET_SWZ(key->tex.swizzles[sampler], chan->value.i[0]);
2766 if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
2767 dst_reg result(this, ir->type);
2768 this->result = src_reg(result);
2769 emit(MOV(result, src_reg(swiz == SWIZZLE_ONE ? 1.0f : 0.0f)));
2770 return;
2771 }
2772 }
2773
2774 /* Should be lowered by do_lower_texture_projection */
2775 assert(!ir->projector);
2776
2777 /* Should be lowered */
2778 assert(!ir->offset || !ir->offset->type->is_array());
2779
2780 /* Generate code to compute all the subexpression trees. This has to be
2781 * done before loading any values into MRFs for the sampler message since
2782 * generating these values may involve SEND messages that need the MRFs.
2783 */
2784 src_reg coordinate;
2785 int coord_components = 0;
2786 if (ir->coordinate) {
2787 coord_components = ir->coordinate->type->vector_elements;
2788 ir->coordinate->accept(this);
2789 coordinate = this->result;
2790 }
2791
2792 src_reg shadow_comparitor;
2793 if (ir->shadow_comparitor) {
2794 ir->shadow_comparitor->accept(this);
2795 shadow_comparitor = this->result;
2796 }
2797
2798 bool has_nonconstant_offset = ir->offset && !ir->offset->as_constant();
2799 src_reg offset_value;
2800 if (has_nonconstant_offset) {
2801 ir->offset->accept(this);
2802 offset_value = src_reg(this->result);
2803 }
2804
2805 src_reg lod, lod2, sample_index, mcs;
2806 switch (ir->op) {
2807 case ir_tex:
2808 lod = src_reg(0.0f);
2809 break;
2810 case ir_txf:
2811 case ir_txl:
2812 case ir_txs:
2813 ir->lod_info.lod->accept(this);
2814 lod = this->result;
2815 break;
2816 case ir_query_levels:
2817 lod = src_reg(0);
2818 break;
2819 case ir_txf_ms:
2820 ir->lod_info.sample_index->accept(this);
2821 sample_index = this->result;
2822
2823 if (devinfo->gen >= 7 && key->tex.compressed_multisample_layout_mask & (1<<sampler))
2824 mcs = emit_mcs_fetch(ir->coordinate->type, coordinate, sampler_reg);
2825 else
2826 mcs = src_reg(0u);
2827 break;
2828 case ir_txd:
2829 ir->lod_info.grad.dPdx->accept(this);
2830 lod = this->result;
2831
2832 ir->lod_info.grad.dPdy->accept(this);
2833 lod2 = this->result;
2834 break;
2835 case ir_txb:
2836 case ir_lod:
2837 case ir_tg4:
2838 break;
2839 }
2840
2841 uint32_t constant_offset = 0;
2842 if (ir->offset != NULL && !has_nonconstant_offset) {
2843 constant_offset =
2844 brw_texture_offset(ir->offset->as_constant()->value.i,
2845 ir->offset->type->vector_elements);
2846 }
2847
2848 /* Stuff the channel select bits in the top of the texture offset */
2849 if (ir->op == ir_tg4)
2850 constant_offset |=
2851 gather_channel( ir->lod_info.component->as_constant()->value.i[0],
2852 sampler) << 16;
2853
2854 glsl_type const *type = ir->sampler->type;
2855 bool is_cube_array = type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
2856 type->sampler_array;
2857
2858 this->result = src_reg(this, ir->type);
2859 dst_reg dest = dst_reg(this->result);
2860
2861 emit_texture(ir->op, dest, ir->type, coordinate, coord_components,
2862 shadow_comparitor,
2863 lod, lod2, sample_index,
2864 constant_offset, offset_value,
2865 mcs, is_cube_array, sampler, sampler_reg);
2866 }
2867
2868 /**
2869 * Apply workarounds for Gen6 gather with UINT/SINT
2870 */
2871 void
2872 vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst)
2873 {
2874 if (!wa)
2875 return;
2876
2877 int width = (wa & WA_8BIT) ? 8 : 16;
2878 dst_reg dst_f = dst;
2879 dst_f.type = BRW_REGISTER_TYPE_F;
2880
2881 /* Convert from UNORM to UINT */
2882 emit(MUL(dst_f, src_reg(dst_f), src_reg((float)((1 << width) - 1))));
2883 emit(MOV(dst, src_reg(dst_f)));
2884
2885 if (wa & WA_SIGN) {
2886 /* Reinterpret the UINT value as a signed INT value by
2887 * shifting the sign bit into place, then shifting back
2888 * preserving sign.
2889 */
2890 emit(SHL(dst, src_reg(dst), src_reg(32 - width)));
2891 emit(ASR(dst, src_reg(dst), src_reg(32 - width)));
2892 }
2893 }
2894
2895 /**
2896 * Set up the gather channel based on the swizzle, for gather4.
2897 */
2898 uint32_t
2899 vec4_visitor::gather_channel(unsigned gather_component, uint32_t sampler)
2900 {
2901 int swiz = GET_SWZ(key->tex.swizzles[sampler], gather_component);
2902 switch (swiz) {
2903 case SWIZZLE_X: return 0;
2904 case SWIZZLE_Y:
2905 /* gather4 sampler is broken for green channel on RG32F --
2906 * we must ask for blue instead.
2907 */
2908 if (key->tex.gather_channel_quirk_mask & (1<<sampler))
2909 return 2;
2910 return 1;
2911 case SWIZZLE_Z: return 2;
2912 case SWIZZLE_W: return 3;
2913 default:
2914 unreachable("Not reached"); /* zero, one swizzles handled already */
2915 }
2916 }
2917
2918 void
2919 vec4_visitor::swizzle_result(ir_texture_opcode op, dst_reg dest,
2920 src_reg orig_val, uint32_t sampler,
2921 const glsl_type *dest_type)
2922 {
2923 int s = key->tex.swizzles[sampler];
2924
2925 dst_reg swizzled_result = dest;
2926
2927 if (op == ir_query_levels) {
2928 /* # levels is in .w */
2929 orig_val.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
2930 emit(MOV(swizzled_result, orig_val));
2931 return;
2932 }
2933
2934 if (op == ir_txs || dest_type == glsl_type::float_type
2935 || s == SWIZZLE_NOOP || op == ir_tg4) {
2936 emit(MOV(swizzled_result, orig_val));
2937 return;
2938 }
2939
2940
2941 int zero_mask = 0, one_mask = 0, copy_mask = 0;
2942 int swizzle[4] = {0};
2943
2944 for (int i = 0; i < 4; i++) {
2945 switch (GET_SWZ(s, i)) {
2946 case SWIZZLE_ZERO:
2947 zero_mask |= (1 << i);
2948 break;
2949 case SWIZZLE_ONE:
2950 one_mask |= (1 << i);
2951 break;
2952 default:
2953 copy_mask |= (1 << i);
2954 swizzle[i] = GET_SWZ(s, i);
2955 break;
2956 }
2957 }
2958
2959 if (copy_mask) {
2960 orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
2961 swizzled_result.writemask = copy_mask;
2962 emit(MOV(swizzled_result, orig_val));
2963 }
2964
2965 if (zero_mask) {
2966 swizzled_result.writemask = zero_mask;
2967 emit(MOV(swizzled_result, src_reg(0.0f)));
2968 }
2969
2970 if (one_mask) {
2971 swizzled_result.writemask = one_mask;
2972 emit(MOV(swizzled_result, src_reg(1.0f)));
2973 }
2974 }
2975
2976 void
2977 vec4_visitor::visit(ir_return *)
2978 {
2979 unreachable("not reached");
2980 }
2981
2982 void
2983 vec4_visitor::visit(ir_discard *)
2984 {
2985 unreachable("not reached");
2986 }
2987
2988 void
2989 vec4_visitor::visit(ir_if *ir)
2990 {
2991 /* Don't point the annotation at the if statement, because then it plus
2992 * the then and else blocks get printed.
2993 */
2994 this->base_ir = ir->condition;
2995
2996 if (devinfo->gen == 6) {
2997 emit_if_gen6(ir);
2998 } else {
2999 enum brw_predicate predicate;
3000 emit_bool_to_cond_code(ir->condition, &predicate);
3001 emit(IF(predicate));
3002 }
3003
3004 visit_instructions(&ir->then_instructions);
3005
3006 if (!ir->else_instructions.is_empty()) {
3007 this->base_ir = ir->condition;
3008 emit(BRW_OPCODE_ELSE);
3009
3010 visit_instructions(&ir->else_instructions);
3011 }
3012
3013 this->base_ir = ir->condition;
3014 emit(BRW_OPCODE_ENDIF);
3015 }
3016
3017 void
3018 vec4_visitor::gs_emit_vertex(int stream_id)
3019 {
3020 unreachable("not reached");
3021 }
3022
3023 void
3024 vec4_visitor::visit(ir_emit_vertex *)
3025 {
3026 unreachable("not reached");
3027 }
3028
3029 void
3030 vec4_visitor::gs_end_primitive()
3031 {
3032 unreachable("not reached");
3033 }
3034
3035
3036 void
3037 vec4_visitor::visit(ir_end_primitive *)
3038 {
3039 unreachable("not reached");
3040 }
3041
3042 void
3043 vec4_visitor::visit(ir_barrier *)
3044 {
3045 unreachable("not reached");
3046 }
3047
3048 void
3049 vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
3050 dst_reg dst, src_reg offset,
3051 src_reg src0, src_reg src1)
3052 {
3053 unsigned mlen = 0;
3054
3055 /* Set the atomic operation offset. */
3056 emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), offset));
3057 mlen++;
3058
3059 /* Set the atomic operation arguments. */
3060 if (src0.file != BAD_FILE) {
3061 emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src0));
3062 mlen++;
3063 }
3064
3065 if (src1.file != BAD_FILE) {
3066 emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src1));
3067 mlen++;
3068 }
3069
3070 /* Emit the instruction. Note that this maps to the normal SIMD8
3071 * untyped atomic message on Ivy Bridge, but that's OK because
3072 * unused channels will be masked out.
3073 */
3074 vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst,
3075 brw_message_reg(0),
3076 src_reg(surf_index), src_reg(atomic_op));
3077 inst->mlen = mlen;
3078 }
3079
3080 void
3081 vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
3082 src_reg offset)
3083 {
3084 /* Set the surface read offset. */
3085 emit(MOV(brw_writemask(brw_uvec_mrf(8, 0, 0), WRITEMASK_X), offset));
3086
3087 /* Emit the instruction. Note that this maps to the normal SIMD8
3088 * untyped surface read message, but that's OK because unused
3089 * channels will be masked out.
3090 */
3091 vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst,
3092 brw_message_reg(0),
3093 src_reg(surf_index), src_reg(1));
3094 inst->mlen = 1;
3095 }
3096
3097 void
3098 vec4_visitor::emit_ndc_computation()
3099 {
3100 /* Get the position */
3101 src_reg pos = src_reg(output_reg[VARYING_SLOT_POS]);
3102
3103 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
3104 dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
3105 output_reg[BRW_VARYING_SLOT_NDC] = ndc;
3106
3107 current_annotation = "NDC";
3108 dst_reg ndc_w = ndc;
3109 ndc_w.writemask = WRITEMASK_W;
3110 src_reg pos_w = pos;
3111 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
3112 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
3113
3114 dst_reg ndc_xyz = ndc;
3115 ndc_xyz.writemask = WRITEMASK_XYZ;
3116
3117 emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
3118 }
3119
3120 void
3121 vec4_visitor::emit_psiz_and_flags(dst_reg reg)
3122 {
3123 if (devinfo->gen < 6 &&
3124 ((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) ||
3125 key->userclip_active || devinfo->has_negative_rhw_bug)) {
3126 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
3127 dst_reg header1_w = header1;
3128 header1_w.writemask = WRITEMASK_W;
3129
3130 emit(MOV(header1, 0u));
3131
3132 if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
3133 src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]);
3134
3135 current_annotation = "Point size";
3136 emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
3137 emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
3138 }
3139
3140 if (key->userclip_active) {
3141 current_annotation = "Clipping flags";
3142 dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
3143 dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
3144
3145 emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), src_reg(0.0f), BRW_CONDITIONAL_L));
3146 emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, src_reg(0));
3147 emit(OR(header1_w, src_reg(header1_w), src_reg(flags0)));
3148
3149 emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), src_reg(0.0f), BRW_CONDITIONAL_L));
3150 emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, src_reg(0));
3151 emit(SHL(flags1, src_reg(flags1), src_reg(4)));
3152 emit(OR(header1_w, src_reg(header1_w), src_reg(flags1)));
3153 }
3154
3155 /* i965 clipping workaround:
3156 * 1) Test for -ve rhw
3157 * 2) If set,
3158 * set ndc = (0,0,0,0)
3159 * set ucp[6] = 1
3160 *
3161 * Later, clipping will detect ucp[6] and ensure the primitive is
3162 * clipped against all fixed planes.
3163 */
3164 if (devinfo->has_negative_rhw_bug) {
3165 src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]);
3166 ndc_w.swizzle = BRW_SWIZZLE_WWWW;
3167 emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L));
3168 vec4_instruction *inst;
3169 inst = emit(OR(header1_w, src_reg(header1_w), src_reg(1u << 6)));
3170 inst->predicate = BRW_PREDICATE_NORMAL;
3171 output_reg[BRW_VARYING_SLOT_NDC].type = BRW_REGISTER_TYPE_F;
3172 inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f)));
3173 inst->predicate = BRW_PREDICATE_NORMAL;
3174 }
3175
3176 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
3177 } else if (devinfo->gen < 6) {
3178 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
3179 } else {
3180 emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
3181 if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
3182 dst_reg reg_w = reg;
3183 reg_w.writemask = WRITEMASK_W;
3184 src_reg reg_as_src = src_reg(output_reg[VARYING_SLOT_PSIZ]);
3185 reg_as_src.type = reg_w.type;
3186 reg_as_src.swizzle = brw_swizzle_for_size(1);
3187 emit(MOV(reg_w, reg_as_src));
3188 }
3189 if (prog_data->vue_map.slots_valid & VARYING_BIT_LAYER) {
3190 dst_reg reg_y = reg;
3191 reg_y.writemask = WRITEMASK_Y;
3192 reg_y.type = BRW_REGISTER_TYPE_D;
3193 output_reg[VARYING_SLOT_LAYER].type = reg_y.type;
3194 emit(MOV(reg_y, src_reg(output_reg[VARYING_SLOT_LAYER])));
3195 }
3196 if (prog_data->vue_map.slots_valid & VARYING_BIT_VIEWPORT) {
3197 dst_reg reg_z = reg;
3198 reg_z.writemask = WRITEMASK_Z;
3199 reg_z.type = BRW_REGISTER_TYPE_D;
3200 output_reg[VARYING_SLOT_VIEWPORT].type = reg_z.type;
3201 emit(MOV(reg_z, src_reg(output_reg[VARYING_SLOT_VIEWPORT])));
3202 }
3203 }
3204 }
3205
3206 void
3207 vec4_visitor::emit_clip_distances(dst_reg reg, int offset)
3208 {
3209 /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
3210 *
3211 * "If a linked set of shaders forming the vertex stage contains no
3212 * static write to gl_ClipVertex or gl_ClipDistance, but the
3213 * application has requested clipping against user clip planes through
3214 * the API, then the coordinate written to gl_Position is used for
3215 * comparison against the user clip planes."
3216 *
3217 * This function is only called if the shader didn't write to
3218 * gl_ClipDistance. Accordingly, we use gl_ClipVertex to perform clipping
3219 * if the user wrote to it; otherwise we use gl_Position.
3220 */
3221 gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX;
3222 if (!(prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) {
3223 clip_vertex = VARYING_SLOT_POS;
3224 }
3225
3226 for (int i = 0; i + offset < key->nr_userclip_plane_consts && i < 4;
3227 ++i) {
3228 reg.writemask = 1 << i;
3229 emit(DP4(reg,
3230 src_reg(output_reg[clip_vertex]),
3231 src_reg(this->userplane[i + offset])));
3232 }
3233 }
3234
3235 vec4_instruction *
3236 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
3237 {
3238 assert(varying < VARYING_SLOT_MAX);
3239 assert(output_reg[varying].type == reg.type);
3240 current_annotation = output_reg_annotation[varying];
3241 /* Copy the register, saturating if necessary */
3242 return emit(MOV(reg, src_reg(output_reg[varying])));
3243 }
3244
3245 void
3246 vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
3247 {
3248 reg.type = BRW_REGISTER_TYPE_F;
3249 output_reg[varying].type = reg.type;
3250
3251 switch (varying) {
3252 case VARYING_SLOT_PSIZ:
3253 {
3254 /* PSIZ is always in slot 0, and is coupled with other flags. */
3255 current_annotation = "indices, point width, clip flags";
3256 emit_psiz_and_flags(reg);
3257 break;
3258 }
3259 case BRW_VARYING_SLOT_NDC:
3260 current_annotation = "NDC";
3261 emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
3262 break;
3263 case VARYING_SLOT_POS:
3264 current_annotation = "gl_Position";
3265 emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
3266 break;
3267 case VARYING_SLOT_EDGE:
3268 /* This is present when doing unfilled polygons. We're supposed to copy
3269 * the edge flag from the user-provided vertex array
3270 * (glEdgeFlagPointer), or otherwise we'll copy from the current value
3271 * of that attribute (starts as 1.0f). This is then used in clipping to
3272 * determine which edges should be drawn as wireframe.
3273 */
3274 current_annotation = "edge flag";
3275 emit(MOV(reg, src_reg(dst_reg(ATTR, VERT_ATTRIB_EDGEFLAG,
3276 glsl_type::float_type, WRITEMASK_XYZW))));
3277 break;
3278 case BRW_VARYING_SLOT_PAD:
3279 /* No need to write to this slot */
3280 break;
3281 case VARYING_SLOT_COL0:
3282 case VARYING_SLOT_COL1:
3283 case VARYING_SLOT_BFC0:
3284 case VARYING_SLOT_BFC1: {
3285 /* These built-in varyings are only supported in compatibility mode,
3286 * and we only support GS in core profile. So, this must be a vertex
3287 * shader.
3288 */
3289 assert(stage == MESA_SHADER_VERTEX);
3290 vec4_instruction *inst = emit_generic_urb_slot(reg, varying);
3291 if (((struct brw_vs_prog_key *) key)->clamp_vertex_color)
3292 inst->saturate = true;
3293 break;
3294 }
3295
3296 default:
3297 emit_generic_urb_slot(reg, varying);
3298 break;
3299 }
3300 }
3301
3302 static int
3303 align_interleaved_urb_mlen(const struct brw_device_info *devinfo, int mlen)
3304 {
3305 if (devinfo->gen >= 6) {
3306 /* URB data written (does not include the message header reg) must
3307 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
3308 * section 5.4.3.2.2: URB_INTERLEAVED.
3309 *
3310 * URB entries are allocated on a multiple of 1024 bits, so an
3311 * extra 128 bits written here to make the end align to 256 is
3312 * no problem.
3313 */
3314 if ((mlen % 2) != 1)
3315 mlen++;
3316 }
3317
3318 return mlen;
3319 }
3320
3321
3322 /**
3323 * Generates the VUE payload plus the necessary URB write instructions to
3324 * output it.
3325 *
3326 * The VUE layout is documented in Volume 2a.
3327 */
3328 void
3329 vec4_visitor::emit_vertex()
3330 {
3331 /* MRF 0 is reserved for the debugger, so start with message header
3332 * in MRF 1.
3333 */
3334 int base_mrf = 1;
3335 int mrf = base_mrf;
3336 /* In the process of generating our URB write message contents, we
3337 * may need to unspill a register or load from an array. Those
3338 * reads would use MRFs 14-15.
3339 */
3340 int max_usable_mrf = 13;
3341
3342 /* The following assertion verifies that max_usable_mrf causes an
3343 * even-numbered amount of URB write data, which will meet gen6's
3344 * requirements for length alignment.
3345 */
3346 assert ((max_usable_mrf - base_mrf) % 2 == 0);
3347
3348 /* First mrf is the g0-based message header containing URB handles and
3349 * such.
3350 */
3351 emit_urb_write_header(mrf++);
3352
3353 if (devinfo->gen < 6) {
3354 emit_ndc_computation();
3355 }
3356
3357 /* Lower legacy ff and ClipVertex clipping to clip distances */
3358 if (key->userclip_active && !prog->UsesClipDistanceOut) {
3359 current_annotation = "user clip distances";
3360
3361 output_reg[VARYING_SLOT_CLIP_DIST0] = dst_reg(this, glsl_type::vec4_type);
3362 output_reg[VARYING_SLOT_CLIP_DIST1] = dst_reg(this, glsl_type::vec4_type);
3363
3364 emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST0], 0);
3365 emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1], 4);
3366 }
3367
3368 /* We may need to split this up into several URB writes, so do them in a
3369 * loop.
3370 */
3371 int slot = 0;
3372 bool complete = false;
3373 do {
3374 /* URB offset is in URB row increments, and each of our MRFs is half of
3375 * one of those, since we're doing interleaved writes.
3376 */
3377 int offset = slot / 2;
3378
3379 mrf = base_mrf + 1;
3380 for (; slot < prog_data->vue_map.num_slots; ++slot) {
3381 emit_urb_slot(dst_reg(MRF, mrf++),
3382 prog_data->vue_map.slot_to_varying[slot]);
3383
3384 /* If this was max_usable_mrf, we can't fit anything more into this
3385 * URB WRITE.
3386 */
3387 if (mrf > max_usable_mrf) {
3388 slot++;
3389 break;
3390 }
3391 }
3392
3393 complete = slot >= prog_data->vue_map.num_slots;
3394 current_annotation = "URB write";
3395 vec4_instruction *inst = emit_urb_write_opcode(complete);
3396 inst->base_mrf = base_mrf;
3397 inst->mlen = align_interleaved_urb_mlen(devinfo, mrf - base_mrf);
3398 inst->offset += offset;
3399 } while(!complete);
3400 }
3401
3402
3403 src_reg
3404 vec4_visitor::get_scratch_offset(bblock_t *block, vec4_instruction *inst,
3405 src_reg *reladdr, int reg_offset)
3406 {
3407 /* Because we store the values to scratch interleaved like our
3408 * vertex data, we need to scale the vec4 index by 2.
3409 */
3410 int message_header_scale = 2;
3411
3412 /* Pre-gen6, the message header uses byte offsets instead of vec4
3413 * (16-byte) offset units.
3414 */
3415 if (devinfo->gen < 6)
3416 message_header_scale *= 16;
3417
3418 if (reladdr) {
3419 src_reg index = src_reg(this, glsl_type::int_type);
3420
3421 emit_before(block, inst, ADD(dst_reg(index), *reladdr,
3422 src_reg(reg_offset)));
3423 emit_before(block, inst, MUL(dst_reg(index), index,
3424 src_reg(message_header_scale)));
3425
3426 return index;
3427 } else {
3428 return src_reg(reg_offset * message_header_scale);
3429 }
3430 }
3431
3432 src_reg
3433 vec4_visitor::get_pull_constant_offset(bblock_t * block, vec4_instruction *inst,
3434 src_reg *reladdr, int reg_offset)
3435 {
3436 if (reladdr) {
3437 src_reg index = src_reg(this, glsl_type::int_type);
3438
3439 emit_before(block, inst, ADD(dst_reg(index), *reladdr,
3440 src_reg(reg_offset)));
3441
3442 /* Pre-gen6, the message header uses byte offsets instead of vec4
3443 * (16-byte) offset units.
3444 */
3445 if (devinfo->gen < 6) {
3446 emit_before(block, inst, MUL(dst_reg(index), index, src_reg(16)));
3447 }
3448
3449 return index;
3450 } else if (devinfo->gen >= 8) {
3451 /* Store the offset in a GRF so we can send-from-GRF. */
3452 src_reg offset = src_reg(this, glsl_type::int_type);
3453 emit_before(block, inst, MOV(dst_reg(offset), src_reg(reg_offset)));
3454 return offset;
3455 } else {
3456 int message_header_scale = devinfo->gen < 6 ? 16 : 1;
3457 return src_reg(reg_offset * message_header_scale);
3458 }
3459 }
3460
3461 /**
3462 * Emits an instruction before @inst to load the value named by @orig_src
3463 * from scratch space at @base_offset to @temp.
3464 *
3465 * @base_offset is measured in 32-byte units (the size of a register).
3466 */
3467 void
3468 vec4_visitor::emit_scratch_read(bblock_t *block, vec4_instruction *inst,
3469 dst_reg temp, src_reg orig_src,
3470 int base_offset)
3471 {
3472 int reg_offset = base_offset + orig_src.reg_offset;
3473 src_reg index = get_scratch_offset(block, inst, orig_src.reladdr,
3474 reg_offset);
3475
3476 emit_before(block, inst, SCRATCH_READ(temp, index));
3477 }
3478
3479 /**
3480 * Emits an instruction after @inst to store the value to be written
3481 * to @orig_dst to scratch space at @base_offset, from @temp.
3482 *
3483 * @base_offset is measured in 32-byte units (the size of a register).
3484 */
3485 void
3486 vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst,
3487 int base_offset)
3488 {
3489 int reg_offset = base_offset + inst->dst.reg_offset;
3490 src_reg index = get_scratch_offset(block, inst, inst->dst.reladdr,
3491 reg_offset);
3492
3493 /* Create a temporary register to store *inst's result in.
3494 *
3495 * We have to be careful in MOVing from our temporary result register in
3496 * the scratch write. If we swizzle from channels of the temporary that
3497 * weren't initialized, it will confuse live interval analysis, which will
3498 * make spilling fail to make progress.
3499 */
3500 const src_reg temp = swizzle(retype(src_reg(this, glsl_type::vec4_type),
3501 inst->dst.type),
3502 brw_swizzle_for_mask(inst->dst.writemask));
3503 dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
3504 inst->dst.writemask));
3505 vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
3506 if (inst->opcode != BRW_OPCODE_SEL)
3507 write->predicate = inst->predicate;
3508 write->ir = inst->ir;
3509 write->annotation = inst->annotation;
3510 inst->insert_after(block, write);
3511
3512 inst->dst.file = temp.file;
3513 inst->dst.reg = temp.reg;
3514 inst->dst.reg_offset = temp.reg_offset;
3515 inst->dst.reladdr = NULL;
3516 }
3517
3518 /**
3519 * Checks if \p src and/or \p src.reladdr require a scratch read, and if so,
3520 * adds the scratch read(s) before \p inst. The function also checks for
3521 * recursive reladdr scratch accesses, issuing the corresponding scratch
3522 * loads and rewriting reladdr references accordingly.
3523 *
3524 * \return \p src if it did not require a scratch load, otherwise, the
3525 * register holding the result of the scratch load that the caller should
3526 * use to rewrite src.
3527 */
3528 src_reg
3529 vec4_visitor::emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
3530 vec4_instruction *inst, src_reg src)
3531 {
3532 /* Resolve recursive reladdr scratch access by calling ourselves
3533 * with src.reladdr
3534 */
3535 if (src.reladdr)
3536 *src.reladdr = emit_resolve_reladdr(scratch_loc, block, inst,
3537 *src.reladdr);
3538
3539 /* Now handle scratch access on src */
3540 if (src.file == GRF && scratch_loc[src.reg] != -1) {
3541 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
3542 emit_scratch_read(block, inst, temp, src, scratch_loc[src.reg]);
3543 src.reg = temp.reg;
3544 src.reg_offset = temp.reg_offset;
3545 src.reladdr = NULL;
3546 }
3547
3548 return src;
3549 }
3550
3551 /**
3552 * We can't generally support array access in GRF space, because a
3553 * single instruction's destination can only span 2 contiguous
3554 * registers. So, we send all GRF arrays that get variable index
3555 * access to scratch space.
3556 */
3557 void
3558 vec4_visitor::move_grf_array_access_to_scratch()
3559 {
3560 int scratch_loc[this->alloc.count];
3561 memset(scratch_loc, -1, sizeof(scratch_loc));
3562
3563 /* First, calculate the set of virtual GRFs that need to be punted
3564 * to scratch due to having any array access on them, and where in
3565 * scratch.
3566 */
3567 foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
3568 if (inst->dst.file == GRF && inst->dst.reladdr) {
3569 if (scratch_loc[inst->dst.reg] == -1) {
3570 scratch_loc[inst->dst.reg] = last_scratch;
3571 last_scratch += this->alloc.sizes[inst->dst.reg];
3572 }
3573
3574 for (src_reg *iter = inst->dst.reladdr;
3575 iter->reladdr;
3576 iter = iter->reladdr) {
3577 if (iter->file == GRF && scratch_loc[iter->reg] == -1) {
3578 scratch_loc[iter->reg] = last_scratch;
3579 last_scratch += this->alloc.sizes[iter->reg];
3580 }
3581 }
3582 }
3583
3584 for (int i = 0 ; i < 3; i++) {
3585 for (src_reg *iter = &inst->src[i];
3586 iter->reladdr;
3587 iter = iter->reladdr) {
3588 if (iter->file == GRF && scratch_loc[iter->reg] == -1) {
3589 scratch_loc[iter->reg] = last_scratch;
3590 last_scratch += this->alloc.sizes[iter->reg];
3591 }
3592 }
3593 }
3594 }
3595
3596 /* Now, for anything that will be accessed through scratch, rewrite
3597 * it to load/store. Note that this is a _safe list walk, because
3598 * we may generate a new scratch_write instruction after the one
3599 * we're processing.
3600 */
3601 foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
3602 /* Set up the annotation tracking for new generated instructions. */
3603 base_ir = inst->ir;
3604 current_annotation = inst->annotation;
3605
3606 /* First handle scratch access on the dst. Notice we have to handle
3607 * the case where the dst's reladdr also points to scratch space.
3608 */
3609 if (inst->dst.reladdr)
3610 *inst->dst.reladdr = emit_resolve_reladdr(scratch_loc, block, inst,
3611 *inst->dst.reladdr);
3612
3613 /* Now that we have handled any (possibly recursive) reladdr scratch
3614 * accesses for dst we can safely do the scratch write for dst itself
3615 */
3616 if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1)
3617 emit_scratch_write(block, inst, scratch_loc[inst->dst.reg]);
3618
3619 /* Now handle scratch access on any src. In this case, since inst->src[i]
3620 * already is a src_reg, we can just call emit_resolve_reladdr with
3621 * inst->src[i] and it will take care of handling scratch loads for
3622 * both src and src.reladdr (recursively).
3623 */
3624 for (int i = 0 ; i < 3; i++) {
3625 inst->src[i] = emit_resolve_reladdr(scratch_loc, block, inst,
3626 inst->src[i]);
3627 }
3628 }
3629 }
3630
3631 /**
3632 * Emits an instruction before @inst to load the value named by @orig_src
3633 * from the pull constant buffer (surface) at @base_offset to @temp.
3634 */
3635 void
3636 vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
3637 dst_reg temp, src_reg orig_src,
3638 int base_offset)
3639 {
3640 int reg_offset = base_offset + orig_src.reg_offset;
3641 src_reg index = src_reg(prog_data->base.binding_table.pull_constants_start);
3642 src_reg offset = get_pull_constant_offset(block, inst, orig_src.reladdr,
3643 reg_offset);
3644
3645 emit_pull_constant_load_reg(temp,
3646 index,
3647 offset,
3648 block, inst);
3649 }
3650
3651 /**
3652 * Implements array access of uniforms by inserting a
3653 * PULL_CONSTANT_LOAD instruction.
3654 *
3655 * Unlike temporary GRF array access (where we don't support it due to
3656 * the difficulty of doing relative addressing on instruction
3657 * destinations), we could potentially do array access of uniforms
3658 * that were loaded in GRF space as push constants. In real-world
3659 * usage we've seen, though, the arrays being used are always larger
3660 * than we could load as push constants, so just always move all
3661 * uniform array access out to a pull constant buffer.
3662 */
3663 void
3664 vec4_visitor::move_uniform_array_access_to_pull_constants()
3665 {
3666 int pull_constant_loc[this->uniforms];
3667 memset(pull_constant_loc, -1, sizeof(pull_constant_loc));
3668 bool nested_reladdr;
3669
3670 /* Walk through and find array access of uniforms. Put a copy of that
3671 * uniform in the pull constant buffer.
3672 *
3673 * Note that we don't move constant-indexed accesses to arrays. No
3674 * testing has been done of the performance impact of this choice.
3675 */
3676 do {
3677 nested_reladdr = false;
3678
3679 foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
3680 for (int i = 0 ; i < 3; i++) {
3681 if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
3682 continue;
3683
3684 int uniform = inst->src[i].reg;
3685
3686 if (inst->src[i].reladdr->reladdr)
3687 nested_reladdr = true; /* will need another pass */
3688
3689 /* If this array isn't already present in the pull constant buffer,
3690 * add it.
3691 */
3692 if (pull_constant_loc[uniform] == -1) {
3693 const gl_constant_value **values =
3694 &stage_prog_data->param[uniform * 4];
3695
3696 pull_constant_loc[uniform] = stage_prog_data->nr_pull_params / 4;
3697
3698 assert(uniform < uniform_array_size);
3699 for (int j = 0; j < uniform_size[uniform] * 4; j++) {
3700 stage_prog_data->pull_param[stage_prog_data->nr_pull_params++]
3701 = values[j];
3702 }
3703 }
3704
3705 /* Set up the annotation tracking for new generated instructions. */
3706 base_ir = inst->ir;
3707 current_annotation = inst->annotation;
3708
3709 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
3710
3711 emit_pull_constant_load(block, inst, temp, inst->src[i],
3712 pull_constant_loc[uniform]);
3713
3714 inst->src[i].file = temp.file;
3715 inst->src[i].reg = temp.reg;
3716 inst->src[i].reg_offset = temp.reg_offset;
3717 inst->src[i].reladdr = NULL;
3718 }
3719 }
3720 } while (nested_reladdr);
3721
3722 /* Now there are no accesses of the UNIFORM file with a reladdr, so
3723 * no need to track them as larger-than-vec4 objects. This will be
3724 * relied on in cutting out unused uniform vectors from push
3725 * constants.
3726 */
3727 split_uniform_registers();
3728 }
3729
3730 void
3731 vec4_visitor::resolve_ud_negate(src_reg *reg)
3732 {
3733 if (reg->type != BRW_REGISTER_TYPE_UD ||
3734 !reg->negate)
3735 return;
3736
3737 src_reg temp = src_reg(this, glsl_type::uvec4_type);
3738 emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
3739 *reg = temp;
3740 }
3741
3742 /**
3743 * Resolve the result of a Gen4-5 CMP instruction to a proper boolean.
3744 *
3745 * CMP on Gen4-5 only sets the LSB of the result; the rest are undefined.
3746 * If we need a proper boolean value, we have to fix it up to be 0 or ~0.
3747 */
3748 void
3749 vec4_visitor::resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg)
3750 {
3751 assert(devinfo->gen <= 5);
3752
3753 if (!rvalue->type->is_boolean())
3754 return;
3755
3756 src_reg and_result = src_reg(this, rvalue->type);
3757 src_reg neg_result = src_reg(this, rvalue->type);
3758 emit(AND(dst_reg(and_result), *reg, src_reg(1)));
3759 emit(MOV(dst_reg(neg_result), negate(and_result)));
3760 *reg = neg_result;
3761 }
3762
3763 vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
3764 void *log_data,
3765 struct gl_program *prog,
3766 const struct brw_vue_prog_key *key,
3767 struct brw_vue_prog_data *prog_data,
3768 struct gl_shader_program *shader_prog,
3769 gl_shader_stage stage,
3770 void *mem_ctx,
3771 bool no_spills,
3772 int shader_time_index)
3773 : backend_shader(compiler, log_data, mem_ctx,
3774 shader_prog, prog, &prog_data->base, stage),
3775 key(key),
3776 prog_data(prog_data),
3777 sanity_param_count(0),
3778 fail_msg(NULL),
3779 first_non_payload_grf(0),
3780 need_all_constants_in_pull_buffer(false),
3781 no_spills(no_spills),
3782 shader_time_index(shader_time_index),
3783 last_scratch(0)
3784 {
3785 this->failed = false;
3786
3787 this->base_ir = NULL;
3788 this->current_annotation = NULL;
3789 memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
3790
3791 this->variable_ht = hash_table_ctor(0,
3792 hash_table_pointer_hash,
3793 hash_table_pointer_compare);
3794
3795 this->virtual_grf_start = NULL;
3796 this->virtual_grf_end = NULL;
3797 this->live_intervals = NULL;
3798
3799 this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
3800
3801 this->uniforms = 0;
3802
3803 /* Initialize uniform_array_size to at least 1 because pre-gen6 VS requires
3804 * at least one. See setup_uniforms() in brw_vec4.cpp.
3805 */
3806 this->uniform_array_size = 1;
3807 if (prog_data) {
3808 this->uniform_array_size =
3809 MAX2(DIV_ROUND_UP(stage_prog_data->nr_params, 4), 1);
3810 }
3811
3812 this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
3813 this->uniform_vector_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
3814 }
3815
3816 vec4_visitor::~vec4_visitor()
3817 {
3818 hash_table_dtor(this->variable_ht);
3819 }
3820
3821
3822 void
3823 vec4_visitor::fail(const char *format, ...)
3824 {
3825 va_list va;
3826 char *msg;
3827
3828 if (failed)
3829 return;
3830
3831 failed = true;
3832
3833 va_start(va, format);
3834 msg = ralloc_vasprintf(mem_ctx, format, va);
3835 va_end(va);
3836 msg = ralloc_asprintf(mem_ctx, "%s compile failed: %s\n", stage_abbrev, msg);
3837
3838 this->fail_msg = msg;
3839
3840 if (debug_enabled) {
3841 fprintf(stderr, "%s", msg);
3842 }
3843 }
3844
3845 } /* namespace brw */