i965: Delete the brw_vue_program_key::userclip_active flag.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_visitor.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25 #include "brw_cfg.h"
26 #include "glsl/ir_uniform.h"
27 #include "program/sampler.h"
28
29 namespace brw {
30
31 vec4_instruction::vec4_instruction(enum opcode opcode, const dst_reg &dst,
32 const src_reg &src0, const src_reg &src1,
33 const src_reg &src2)
34 {
35 this->opcode = opcode;
36 this->dst = dst;
37 this->src[0] = src0;
38 this->src[1] = src1;
39 this->src[2] = src2;
40 this->saturate = false;
41 this->force_writemask_all = false;
42 this->no_dd_clear = false;
43 this->no_dd_check = false;
44 this->writes_accumulator = false;
45 this->conditional_mod = BRW_CONDITIONAL_NONE;
46 this->predicate = BRW_PREDICATE_NONE;
47 this->predicate_inverse = false;
48 this->target = 0;
49 this->regs_written = (dst.file == BAD_FILE ? 0 : 1);
50 this->shadow_compare = false;
51 this->ir = NULL;
52 this->urb_write_flags = BRW_URB_WRITE_NO_FLAGS;
53 this->header_size = 0;
54 this->flag_subreg = 0;
55 this->mlen = 0;
56 this->base_mrf = 0;
57 this->offset = 0;
58 this->annotation = NULL;
59 }
60
61 vec4_instruction *
62 vec4_visitor::emit(vec4_instruction *inst)
63 {
64 inst->ir = this->base_ir;
65 inst->annotation = this->current_annotation;
66
67 this->instructions.push_tail(inst);
68
69 return inst;
70 }
71
72 vec4_instruction *
73 vec4_visitor::emit_before(bblock_t *block, vec4_instruction *inst,
74 vec4_instruction *new_inst)
75 {
76 new_inst->ir = inst->ir;
77 new_inst->annotation = inst->annotation;
78
79 inst->insert_before(block, new_inst);
80
81 return inst;
82 }
83
84 vec4_instruction *
85 vec4_visitor::emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
86 const src_reg &src1, const src_reg &src2)
87 {
88 return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0, src1, src2));
89 }
90
91
92 vec4_instruction *
93 vec4_visitor::emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
94 const src_reg &src1)
95 {
96 return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0, src1));
97 }
98
99 vec4_instruction *
100 vec4_visitor::emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0)
101 {
102 return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0));
103 }
104
105 vec4_instruction *
106 vec4_visitor::emit(enum opcode opcode, const dst_reg &dst)
107 {
108 return emit(new(mem_ctx) vec4_instruction(opcode, dst));
109 }
110
111 vec4_instruction *
112 vec4_visitor::emit(enum opcode opcode)
113 {
114 return emit(new(mem_ctx) vec4_instruction(opcode, dst_reg()));
115 }
116
117 #define ALU1(op) \
118 vec4_instruction * \
119 vec4_visitor::op(const dst_reg &dst, const src_reg &src0) \
120 { \
121 return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, src0); \
122 }
123
124 #define ALU2(op) \
125 vec4_instruction * \
126 vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
127 const src_reg &src1) \
128 { \
129 return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, \
130 src0, src1); \
131 }
132
133 #define ALU2_ACC(op) \
134 vec4_instruction * \
135 vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
136 const src_reg &src1) \
137 { \
138 vec4_instruction *inst = new(mem_ctx) vec4_instruction( \
139 BRW_OPCODE_##op, dst, src0, src1); \
140 inst->writes_accumulator = true; \
141 return inst; \
142 }
143
144 #define ALU3(op) \
145 vec4_instruction * \
146 vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
147 const src_reg &src1, const src_reg &src2) \
148 { \
149 assert(devinfo->gen >= 6); \
150 return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, \
151 src0, src1, src2); \
152 }
153
154 ALU1(NOT)
155 ALU1(MOV)
156 ALU1(FRC)
157 ALU1(RNDD)
158 ALU1(RNDE)
159 ALU1(RNDZ)
160 ALU1(F32TO16)
161 ALU1(F16TO32)
162 ALU2(ADD)
163 ALU2(MUL)
164 ALU2_ACC(MACH)
165 ALU2(AND)
166 ALU2(OR)
167 ALU2(XOR)
168 ALU2(DP3)
169 ALU2(DP4)
170 ALU2(DPH)
171 ALU2(SHL)
172 ALU2(SHR)
173 ALU2(ASR)
174 ALU3(LRP)
175 ALU1(BFREV)
176 ALU3(BFE)
177 ALU2(BFI1)
178 ALU3(BFI2)
179 ALU1(FBH)
180 ALU1(FBL)
181 ALU1(CBIT)
182 ALU3(MAD)
183 ALU2_ACC(ADDC)
184 ALU2_ACC(SUBB)
185 ALU2(MAC)
186
187 /** Gen4 predicated IF. */
188 vec4_instruction *
189 vec4_visitor::IF(enum brw_predicate predicate)
190 {
191 vec4_instruction *inst;
192
193 inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_IF);
194 inst->predicate = predicate;
195
196 return inst;
197 }
198
199 /** Gen6 IF with embedded comparison. */
200 vec4_instruction *
201 vec4_visitor::IF(src_reg src0, src_reg src1,
202 enum brw_conditional_mod condition)
203 {
204 assert(devinfo->gen == 6);
205
206 vec4_instruction *inst;
207
208 resolve_ud_negate(&src0);
209 resolve_ud_negate(&src1);
210
211 inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_IF, dst_null_d(),
212 src0, src1);
213 inst->conditional_mod = condition;
214
215 return inst;
216 }
217
218 /**
219 * CMP: Sets the low bit of the destination channels with the result
220 * of the comparison, while the upper bits are undefined, and updates
221 * the flag register with the packed 16 bits of the result.
222 */
223 vec4_instruction *
224 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1,
225 enum brw_conditional_mod condition)
226 {
227 vec4_instruction *inst;
228
229 /* Take the instruction:
230 *
231 * CMP null<d> src0<f> src1<f>
232 *
233 * Original gen4 does type conversion to the destination type before
234 * comparison, producing garbage results for floating point comparisons.
235 *
236 * The destination type doesn't matter on newer generations, so we set the
237 * type to match src0 so we can compact the instruction.
238 */
239 dst.type = src0.type;
240 if (dst.file == HW_REG)
241 dst.fixed_hw_reg.type = dst.type;
242
243 resolve_ud_negate(&src0);
244 resolve_ud_negate(&src1);
245
246 inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_CMP, dst, src0, src1);
247 inst->conditional_mod = condition;
248
249 return inst;
250 }
251
252 vec4_instruction *
253 vec4_visitor::SCRATCH_READ(const dst_reg &dst, const src_reg &index)
254 {
255 vec4_instruction *inst;
256
257 inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_READ,
258 dst, index);
259 inst->base_mrf = 14;
260 inst->mlen = 2;
261
262 return inst;
263 }
264
265 vec4_instruction *
266 vec4_visitor::SCRATCH_WRITE(const dst_reg &dst, const src_reg &src,
267 const src_reg &index)
268 {
269 vec4_instruction *inst;
270
271 inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_WRITE,
272 dst, src, index);
273 inst->base_mrf = 13;
274 inst->mlen = 3;
275
276 return inst;
277 }
278
279 void
280 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
281 {
282 static enum opcode dot_opcodes[] = {
283 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
284 };
285
286 emit(dot_opcodes[elements - 2], dst, src0, src1);
287 }
288
289 src_reg
290 vec4_visitor::fix_3src_operand(const src_reg &src)
291 {
292 /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
293 * able to use vertical stride of zero to replicate the vec4 uniform, like
294 *
295 * g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
296 *
297 * But you can't, since vertical stride is always four in three-source
298 * instructions. Instead, insert a MOV instruction to do the replication so
299 * that the three-source instruction can consume it.
300 */
301
302 /* The MOV is only needed if the source is a uniform or immediate. */
303 if (src.file != UNIFORM && src.file != IMM)
304 return src;
305
306 if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle))
307 return src;
308
309 dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
310 expanded.type = src.type;
311 emit(VEC4_OPCODE_UNPACK_UNIFORM, expanded, src);
312 return src_reg(expanded);
313 }
314
315 src_reg
316 vec4_visitor::resolve_source_modifiers(const src_reg &src)
317 {
318 if (!src.abs && !src.negate)
319 return src;
320
321 dst_reg resolved = dst_reg(this, glsl_type::ivec4_type);
322 resolved.type = src.type;
323 emit(MOV(resolved, src));
324
325 return src_reg(resolved);
326 }
327
328 src_reg
329 vec4_visitor::fix_math_operand(const src_reg &src)
330 {
331 if (devinfo->gen < 6 || devinfo->gen >= 8 || src.file == BAD_FILE)
332 return src;
333
334 /* The gen6 math instruction ignores the source modifiers --
335 * swizzle, abs, negate, and at least some parts of the register
336 * region description.
337 *
338 * Rather than trying to enumerate all these cases, *always* expand the
339 * operand to a temp GRF for gen6.
340 *
341 * For gen7, keep the operand as-is, except if immediate, which gen7 still
342 * can't use.
343 */
344
345 if (devinfo->gen == 7 && src.file != IMM)
346 return src;
347
348 dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
349 expanded.type = src.type;
350 emit(MOV(expanded, src));
351 return src_reg(expanded);
352 }
353
354 vec4_instruction *
355 vec4_visitor::emit_math(enum opcode opcode,
356 const dst_reg &dst,
357 const src_reg &src0, const src_reg &src1)
358 {
359 vec4_instruction *math =
360 emit(opcode, dst, fix_math_operand(src0), fix_math_operand(src1));
361
362 if (devinfo->gen == 6 && dst.writemask != WRITEMASK_XYZW) {
363 /* MATH on Gen6 must be align1, so we can't do writemasks. */
364 math->dst = dst_reg(this, glsl_type::vec4_type);
365 math->dst.type = dst.type;
366 math = emit(MOV(dst, src_reg(math->dst)));
367 } else if (devinfo->gen < 6) {
368 math->base_mrf = 1;
369 math->mlen = src1.file == BAD_FILE ? 1 : 2;
370 }
371
372 return math;
373 }
374
375 void
376 vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0)
377 {
378 if (devinfo->gen < 7) {
379 unreachable("ir_unop_pack_half_2x16 should be lowered");
380 }
381
382 assert(dst.type == BRW_REGISTER_TYPE_UD);
383 assert(src0.type == BRW_REGISTER_TYPE_F);
384
385 /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
386 *
387 * Because this instruction does not have a 16-bit floating-point type,
388 * the destination data type must be Word (W).
389 *
390 * The destination must be DWord-aligned and specify a horizontal stride
391 * (HorzStride) of 2. The 16-bit result is stored in the lower word of
392 * each destination channel and the upper word is not modified.
393 *
394 * The above restriction implies that the f32to16 instruction must use
395 * align1 mode, because only in align1 mode is it possible to specify
396 * horizontal stride. We choose here to defy the hardware docs and emit
397 * align16 instructions.
398 *
399 * (I [chadv] did attempt to emit align1 instructions for VS f32to16
400 * instructions. I was partially successful in that the code passed all
401 * tests. However, the code was dubiously correct and fragile, and the
402 * tests were not harsh enough to probe that frailty. Not trusting the
403 * code, I chose instead to remain in align16 mode in defiance of the hw
404 * docs).
405 *
406 * I've [chadv] experimentally confirmed that, on gen7 hardware and the
407 * simulator, emitting a f32to16 in align16 mode with UD as destination
408 * data type is safe. The behavior differs from that specified in the PRM
409 * in that the upper word of each destination channel is cleared to 0.
410 */
411
412 dst_reg tmp_dst(this, glsl_type::uvec2_type);
413 src_reg tmp_src(tmp_dst);
414
415 #if 0
416 /* Verify the undocumented behavior on which the following instructions
417 * rely. If f32to16 fails to clear the upper word of the X and Y channels,
418 * then the result of the bit-or instruction below will be incorrect.
419 *
420 * You should inspect the disasm output in order to verify that the MOV is
421 * not optimized away.
422 */
423 emit(MOV(tmp_dst, src_reg(0x12345678u)));
424 #endif
425
426 /* Give tmp the form below, where "." means untouched.
427 *
428 * w z y x w z y x
429 * |.|.|0x0000hhhh|0x0000llll|.|.|0x0000hhhh|0x0000llll|
430 *
431 * That the upper word of each write-channel be 0 is required for the
432 * following bit-shift and bit-or instructions to work. Note that this
433 * relies on the undocumented hardware behavior mentioned above.
434 */
435 tmp_dst.writemask = WRITEMASK_XY;
436 emit(F32TO16(tmp_dst, src0));
437
438 /* Give the write-channels of dst the form:
439 * 0xhhhh0000
440 */
441 tmp_src.swizzle = BRW_SWIZZLE_YYYY;
442 emit(SHL(dst, tmp_src, src_reg(16u)));
443
444 /* Finally, give the write-channels of dst the form of packHalf2x16's
445 * output:
446 * 0xhhhhllll
447 */
448 tmp_src.swizzle = BRW_SWIZZLE_XXXX;
449 emit(OR(dst, src_reg(dst), tmp_src));
450 }
451
452 void
453 vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0)
454 {
455 if (devinfo->gen < 7) {
456 unreachable("ir_unop_unpack_half_2x16 should be lowered");
457 }
458
459 assert(dst.type == BRW_REGISTER_TYPE_F);
460 assert(src0.type == BRW_REGISTER_TYPE_UD);
461
462 /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
463 *
464 * Because this instruction does not have a 16-bit floating-point type,
465 * the source data type must be Word (W). The destination type must be
466 * F (Float).
467 *
468 * To use W as the source data type, we must adjust horizontal strides,
469 * which is only possible in align1 mode. All my [chadv] attempts at
470 * emitting align1 instructions for unpackHalf2x16 failed to pass the
471 * Piglit tests, so I gave up.
472 *
473 * I've verified that, on gen7 hardware and the simulator, it is safe to
474 * emit f16to32 in align16 mode with UD as source data type.
475 */
476
477 dst_reg tmp_dst(this, glsl_type::uvec2_type);
478 src_reg tmp_src(tmp_dst);
479
480 tmp_dst.writemask = WRITEMASK_X;
481 emit(AND(tmp_dst, src0, src_reg(0xffffu)));
482
483 tmp_dst.writemask = WRITEMASK_Y;
484 emit(SHR(tmp_dst, src0, src_reg(16u)));
485
486 dst.writemask = WRITEMASK_XY;
487 emit(F16TO32(dst, tmp_src));
488 }
489
490 void
491 vec4_visitor::emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0)
492 {
493 /* Instead of splitting the 32-bit integer, shifting, and ORing it back
494 * together, we can shift it by <0, 8, 16, 24>. The packed integer immediate
495 * is not suitable to generate the shift values, but we can use the packed
496 * vector float and a type-converting MOV.
497 */
498 dst_reg shift(this, glsl_type::uvec4_type);
499 emit(MOV(shift, src_reg(0x00, 0x60, 0x70, 0x78)));
500
501 dst_reg shifted(this, glsl_type::uvec4_type);
502 src0.swizzle = BRW_SWIZZLE_XXXX;
503 emit(SHR(shifted, src0, src_reg(shift)));
504
505 shifted.type = BRW_REGISTER_TYPE_UB;
506 dst_reg f(this, glsl_type::vec4_type);
507 emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted));
508
509 emit(MUL(dst, src_reg(f), src_reg(1.0f / 255.0f)));
510 }
511
512 void
513 vec4_visitor::emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0)
514 {
515 /* Instead of splitting the 32-bit integer, shifting, and ORing it back
516 * together, we can shift it by <0, 8, 16, 24>. The packed integer immediate
517 * is not suitable to generate the shift values, but we can use the packed
518 * vector float and a type-converting MOV.
519 */
520 dst_reg shift(this, glsl_type::uvec4_type);
521 emit(MOV(shift, src_reg(0x00, 0x60, 0x70, 0x78)));
522
523 dst_reg shifted(this, glsl_type::uvec4_type);
524 src0.swizzle = BRW_SWIZZLE_XXXX;
525 emit(SHR(shifted, src0, src_reg(shift)));
526
527 shifted.type = BRW_REGISTER_TYPE_B;
528 dst_reg f(this, glsl_type::vec4_type);
529 emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted));
530
531 dst_reg scaled(this, glsl_type::vec4_type);
532 emit(MUL(scaled, src_reg(f), src_reg(1.0f / 127.0f)));
533
534 dst_reg max(this, glsl_type::vec4_type);
535 emit_minmax(BRW_CONDITIONAL_GE, max, src_reg(scaled), src_reg(-1.0f));
536 emit_minmax(BRW_CONDITIONAL_L, dst, src_reg(max), src_reg(1.0f));
537 }
538
539 void
540 vec4_visitor::emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0)
541 {
542 dst_reg saturated(this, glsl_type::vec4_type);
543 vec4_instruction *inst = emit(MOV(saturated, src0));
544 inst->saturate = true;
545
546 dst_reg scaled(this, glsl_type::vec4_type);
547 emit(MUL(scaled, src_reg(saturated), src_reg(255.0f)));
548
549 dst_reg rounded(this, glsl_type::vec4_type);
550 emit(RNDE(rounded, src_reg(scaled)));
551
552 dst_reg u(this, glsl_type::uvec4_type);
553 emit(MOV(u, src_reg(rounded)));
554
555 src_reg bytes(u);
556 emit(VEC4_OPCODE_PACK_BYTES, dst, bytes);
557 }
558
559 void
560 vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0)
561 {
562 dst_reg max(this, glsl_type::vec4_type);
563 emit_minmax(BRW_CONDITIONAL_GE, max, src0, src_reg(-1.0f));
564
565 dst_reg min(this, glsl_type::vec4_type);
566 emit_minmax(BRW_CONDITIONAL_L, min, src_reg(max), src_reg(1.0f));
567
568 dst_reg scaled(this, glsl_type::vec4_type);
569 emit(MUL(scaled, src_reg(min), src_reg(127.0f)));
570
571 dst_reg rounded(this, glsl_type::vec4_type);
572 emit(RNDE(rounded, src_reg(scaled)));
573
574 dst_reg i(this, glsl_type::ivec4_type);
575 emit(MOV(i, src_reg(rounded)));
576
577 src_reg bytes(i);
578 emit(VEC4_OPCODE_PACK_BYTES, dst, bytes);
579 }
580
581 void
582 vec4_visitor::visit_instructions(const exec_list *list)
583 {
584 foreach_in_list(ir_instruction, ir, list) {
585 base_ir = ir;
586 ir->accept(this);
587 }
588 }
589
590 /**
591 * Returns the minimum number of vec4 elements needed to pack a type.
592 *
593 * For simple types, it will return 1 (a single vec4); for matrices, the
594 * number of columns; for array and struct, the sum of the vec4_size of
595 * each of its elements; and for sampler and atomic, zero.
596 *
597 * This method is useful to calculate how much register space is needed to
598 * store a particular type.
599 */
600 extern "C" int
601 type_size_vec4(const struct glsl_type *type)
602 {
603 unsigned int i;
604 int size;
605
606 switch (type->base_type) {
607 case GLSL_TYPE_UINT:
608 case GLSL_TYPE_INT:
609 case GLSL_TYPE_FLOAT:
610 case GLSL_TYPE_BOOL:
611 if (type->is_matrix()) {
612 return type->matrix_columns;
613 } else {
614 /* Regardless of size of vector, it gets a vec4. This is bad
615 * packing for things like floats, but otherwise arrays become a
616 * mess. Hopefully a later pass over the code can pack scalars
617 * down if appropriate.
618 */
619 return 1;
620 }
621 case GLSL_TYPE_ARRAY:
622 assert(type->length > 0);
623 return type_size_vec4(type->fields.array) * type->length;
624 case GLSL_TYPE_STRUCT:
625 size = 0;
626 for (i = 0; i < type->length; i++) {
627 size += type_size_vec4(type->fields.structure[i].type);
628 }
629 return size;
630 case GLSL_TYPE_SUBROUTINE:
631 return 1;
632
633 case GLSL_TYPE_SAMPLER:
634 /* Samplers take up no register space, since they're baked in at
635 * link time.
636 */
637 return 0;
638 case GLSL_TYPE_ATOMIC_UINT:
639 return 0;
640 case GLSL_TYPE_IMAGE:
641 return DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4);
642 case GLSL_TYPE_VOID:
643 case GLSL_TYPE_DOUBLE:
644 case GLSL_TYPE_ERROR:
645 case GLSL_TYPE_INTERFACE:
646 unreachable("not reached");
647 }
648
649 return 0;
650 }
651
652 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
653 {
654 init();
655
656 this->file = GRF;
657 this->reg = v->alloc.allocate(type_size_vec4(type));
658
659 if (type->is_array() || type->is_record()) {
660 this->swizzle = BRW_SWIZZLE_NOOP;
661 } else {
662 this->swizzle = brw_swizzle_for_size(type->vector_elements);
663 }
664
665 this->type = brw_type_for_base_type(type);
666 }
667
668 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size)
669 {
670 assert(size > 0);
671
672 init();
673
674 this->file = GRF;
675 this->reg = v->alloc.allocate(type_size_vec4(type) * size);
676
677 this->swizzle = BRW_SWIZZLE_NOOP;
678
679 this->type = brw_type_for_base_type(type);
680 }
681
682 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
683 {
684 init();
685
686 this->file = GRF;
687 this->reg = v->alloc.allocate(type_size_vec4(type));
688
689 if (type->is_array() || type->is_record()) {
690 this->writemask = WRITEMASK_XYZW;
691 } else {
692 this->writemask = (1 << type->vector_elements) - 1;
693 }
694
695 this->type = brw_type_for_base_type(type);
696 }
697
698 void
699 vec4_visitor::setup_vec4_uniform_value(unsigned param_offset,
700 const gl_constant_value *values,
701 unsigned n)
702 {
703 static const gl_constant_value zero = { 0 };
704
705 assert(param_offset % 4 == 0);
706
707 for (unsigned i = 0; i < n; ++i)
708 stage_prog_data->param[param_offset + i] = &values[i];
709
710 for (unsigned i = n; i < 4; ++i)
711 stage_prog_data->param[param_offset + i] = &zero;
712
713 uniform_vector_size[param_offset / 4] = n;
714 }
715
716 /* Our support for uniforms is piggy-backed on the struct
717 * gl_fragment_program, because that's where the values actually
718 * get stored, rather than in some global gl_shader_program uniform
719 * store.
720 */
721 void
722 vec4_visitor::setup_uniform_values(ir_variable *ir)
723 {
724 int namelen = strlen(ir->name);
725
726 /* The data for our (non-builtin) uniforms is stored in a series of
727 * gl_uniform_driver_storage structs for each subcomponent that
728 * glGetUniformLocation() could name. We know it's been set up in the same
729 * order we'd walk the type, so walk the list of storage and find anything
730 * with our name, or the prefix of a component that starts with our name.
731 */
732 for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) {
733 struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
734
735 if (storage->builtin)
736 continue;
737
738 if (strncmp(ir->name, storage->name, namelen) != 0 ||
739 (storage->name[namelen] != 0 &&
740 storage->name[namelen] != '.' &&
741 storage->name[namelen] != '[')) {
742 continue;
743 }
744
745 const unsigned vector_count = (MAX2(storage->array_elements, 1) *
746 storage->type->matrix_columns);
747 const unsigned vector_size = storage->type->vector_elements;
748
749 for (unsigned s = 0; s < vector_count; s++) {
750 setup_vec4_uniform_value(uniforms * 4,
751 &storage->storage[s * vector_size],
752 vector_size);
753 uniforms++;
754 }
755 }
756 }
757
758 void
759 vec4_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes)
760 {
761 for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
762 assert(this->uniforms < uniform_array_size);
763 this->uniform_vector_size[this->uniforms] = 4;
764 this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
765 this->userplane[i].type = BRW_REGISTER_TYPE_F;
766 for (int j = 0; j < 4; ++j) {
767 stage_prog_data->param[this->uniforms * 4 + j] =
768 (gl_constant_value *) &clip_planes[i][j];
769 }
770 ++this->uniforms;
771 }
772 }
773
774 /* Our support for builtin uniforms is even scarier than non-builtin.
775 * It sits on top of the PROG_STATE_VAR parameters that are
776 * automatically updated from GL context state.
777 */
778 void
779 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
780 {
781 const ir_state_slot *const slots = ir->get_state_slots();
782 assert(slots != NULL);
783
784 for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
785 /* This state reference has already been setup by ir_to_mesa,
786 * but we'll get the same index back here. We can reference
787 * ParameterValues directly, since unlike brw_fs.cpp, we never
788 * add new state references during compile.
789 */
790 int index = _mesa_add_state_reference(this->prog->Parameters,
791 (gl_state_index *)slots[i].tokens);
792 gl_constant_value *values =
793 &this->prog->Parameters->ParameterValues[index][0];
794
795 assert(this->uniforms < uniform_array_size);
796
797 for (unsigned j = 0; j < 4; j++)
798 stage_prog_data->param[this->uniforms * 4 + j] =
799 &values[GET_SWZ(slots[i].swizzle, j)];
800
801 this->uniform_vector_size[this->uniforms] =
802 (ir->type->is_scalar() || ir->type->is_vector() ||
803 ir->type->is_matrix() ? ir->type->vector_elements : 4);
804
805 this->uniforms++;
806 }
807 }
808
809 dst_reg *
810 vec4_visitor::variable_storage(ir_variable *var)
811 {
812 return (dst_reg *)hash_table_find(this->variable_ht, var);
813 }
814
815 void
816 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir,
817 enum brw_predicate *predicate)
818 {
819 ir_expression *expr = ir->as_expression();
820
821 *predicate = BRW_PREDICATE_NORMAL;
822
823 if (expr && expr->operation != ir_binop_ubo_load) {
824 src_reg op[3];
825 vec4_instruction *inst;
826
827 assert(expr->get_num_operands() <= 3);
828 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
829 expr->operands[i]->accept(this);
830 op[i] = this->result;
831
832 resolve_ud_negate(&op[i]);
833 }
834
835 switch (expr->operation) {
836 case ir_unop_logic_not:
837 inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
838 inst->conditional_mod = BRW_CONDITIONAL_Z;
839 break;
840
841 case ir_binop_logic_xor:
842 if (devinfo->gen <= 5) {
843 src_reg temp = src_reg(this, ir->type);
844 emit(XOR(dst_reg(temp), op[0], op[1]));
845 inst = emit(AND(dst_null_d(), temp, src_reg(1)));
846 } else {
847 inst = emit(XOR(dst_null_d(), op[0], op[1]));
848 }
849 inst->conditional_mod = BRW_CONDITIONAL_NZ;
850 break;
851
852 case ir_binop_logic_or:
853 if (devinfo->gen <= 5) {
854 src_reg temp = src_reg(this, ir->type);
855 emit(OR(dst_reg(temp), op[0], op[1]));
856 inst = emit(AND(dst_null_d(), temp, src_reg(1)));
857 } else {
858 inst = emit(OR(dst_null_d(), op[0], op[1]));
859 }
860 inst->conditional_mod = BRW_CONDITIONAL_NZ;
861 break;
862
863 case ir_binop_logic_and:
864 if (devinfo->gen <= 5) {
865 src_reg temp = src_reg(this, ir->type);
866 emit(AND(dst_reg(temp), op[0], op[1]));
867 inst = emit(AND(dst_null_d(), temp, src_reg(1)));
868 } else {
869 inst = emit(AND(dst_null_d(), op[0], op[1]));
870 }
871 inst->conditional_mod = BRW_CONDITIONAL_NZ;
872 break;
873
874 case ir_unop_f2b:
875 if (devinfo->gen >= 6) {
876 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
877 } else {
878 inst = emit(MOV(dst_null_f(), op[0]));
879 inst->conditional_mod = BRW_CONDITIONAL_NZ;
880 }
881 break;
882
883 case ir_unop_i2b:
884 if (devinfo->gen >= 6) {
885 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
886 } else {
887 inst = emit(MOV(dst_null_d(), op[0]));
888 inst->conditional_mod = BRW_CONDITIONAL_NZ;
889 }
890 break;
891
892 case ir_binop_all_equal:
893 if (devinfo->gen <= 5) {
894 resolve_bool_comparison(expr->operands[0], &op[0]);
895 resolve_bool_comparison(expr->operands[1], &op[1]);
896 }
897 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
898 *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
899 break;
900
901 case ir_binop_any_nequal:
902 if (devinfo->gen <= 5) {
903 resolve_bool_comparison(expr->operands[0], &op[0]);
904 resolve_bool_comparison(expr->operands[1], &op[1]);
905 }
906 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
907 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
908 break;
909
910 case ir_unop_any:
911 if (devinfo->gen <= 5) {
912 resolve_bool_comparison(expr->operands[0], &op[0]);
913 }
914 inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
915 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
916 break;
917
918 case ir_binop_greater:
919 case ir_binop_gequal:
920 case ir_binop_less:
921 case ir_binop_lequal:
922 case ir_binop_equal:
923 case ir_binop_nequal:
924 if (devinfo->gen <= 5) {
925 resolve_bool_comparison(expr->operands[0], &op[0]);
926 resolve_bool_comparison(expr->operands[1], &op[1]);
927 }
928 emit(CMP(dst_null_d(), op[0], op[1],
929 brw_conditional_for_comparison(expr->operation)));
930 break;
931
932 case ir_triop_csel: {
933 /* Expand the boolean condition into the flag register. */
934 inst = emit(MOV(dst_null_d(), op[0]));
935 inst->conditional_mod = BRW_CONDITIONAL_NZ;
936
937 /* Select which boolean to return. */
938 dst_reg temp(this, expr->operands[1]->type);
939 inst = emit(BRW_OPCODE_SEL, temp, op[1], op[2]);
940 inst->predicate = BRW_PREDICATE_NORMAL;
941
942 /* Expand the result to a condition code. */
943 inst = emit(MOV(dst_null_d(), src_reg(temp)));
944 inst->conditional_mod = BRW_CONDITIONAL_NZ;
945 break;
946 }
947
948 default:
949 unreachable("not reached");
950 }
951 return;
952 }
953
954 ir->accept(this);
955
956 resolve_ud_negate(&this->result);
957
958 vec4_instruction *inst = emit(AND(dst_null_d(), this->result, src_reg(1)));
959 inst->conditional_mod = BRW_CONDITIONAL_NZ;
960 }
961
962 /**
963 * Emit a gen6 IF statement with the comparison folded into the IF
964 * instruction.
965 */
966 void
967 vec4_visitor::emit_if_gen6(ir_if *ir)
968 {
969 ir_expression *expr = ir->condition->as_expression();
970
971 if (expr && expr->operation != ir_binop_ubo_load) {
972 src_reg op[3];
973 dst_reg temp;
974
975 assert(expr->get_num_operands() <= 3);
976 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
977 expr->operands[i]->accept(this);
978 op[i] = this->result;
979 }
980
981 switch (expr->operation) {
982 case ir_unop_logic_not:
983 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
984 return;
985
986 case ir_binop_logic_xor:
987 emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
988 return;
989
990 case ir_binop_logic_or:
991 temp = dst_reg(this, glsl_type::bool_type);
992 emit(OR(temp, op[0], op[1]));
993 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
994 return;
995
996 case ir_binop_logic_and:
997 temp = dst_reg(this, glsl_type::bool_type);
998 emit(AND(temp, op[0], op[1]));
999 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
1000 return;
1001
1002 case ir_unop_f2b:
1003 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1004 return;
1005
1006 case ir_unop_i2b:
1007 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1008 return;
1009
1010 case ir_binop_greater:
1011 case ir_binop_gequal:
1012 case ir_binop_less:
1013 case ir_binop_lequal:
1014 case ir_binop_equal:
1015 case ir_binop_nequal:
1016 emit(IF(op[0], op[1],
1017 brw_conditional_for_comparison(expr->operation)));
1018 return;
1019
1020 case ir_binop_all_equal:
1021 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
1022 emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
1023 return;
1024
1025 case ir_binop_any_nequal:
1026 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
1027 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
1028 return;
1029
1030 case ir_unop_any:
1031 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1032 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
1033 return;
1034
1035 case ir_triop_csel: {
1036 /* Expand the boolean condition into the flag register. */
1037 vec4_instruction *inst = emit(MOV(dst_null_d(), op[0]));
1038 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1039
1040 /* Select which boolean to return. */
1041 dst_reg temp(this, expr->operands[1]->type);
1042 inst = emit(BRW_OPCODE_SEL, temp, op[1], op[2]);
1043 inst->predicate = BRW_PREDICATE_NORMAL;
1044
1045 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
1046 return;
1047 }
1048
1049 default:
1050 unreachable("not reached");
1051 }
1052 return;
1053 }
1054
1055 ir->condition->accept(this);
1056
1057 emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
1058 }
1059
1060 void
1061 vec4_visitor::visit(ir_variable *ir)
1062 {
1063 dst_reg *reg = NULL;
1064
1065 if (variable_storage(ir))
1066 return;
1067
1068 switch (ir->data.mode) {
1069 case ir_var_shader_in:
1070 assert(ir->data.location != -1);
1071 reg = new(mem_ctx) dst_reg(ATTR, ir->data.location);
1072 break;
1073
1074 case ir_var_shader_out:
1075 assert(ir->data.location != -1);
1076 reg = new(mem_ctx) dst_reg(this, ir->type);
1077
1078 for (int i = 0; i < type_size_vec4(ir->type); i++) {
1079 output_reg[ir->data.location + i] = *reg;
1080 output_reg[ir->data.location + i].reg_offset = i;
1081 output_reg_annotation[ir->data.location + i] = ir->name;
1082 }
1083 break;
1084
1085 case ir_var_auto:
1086 case ir_var_temporary:
1087 reg = new(mem_ctx) dst_reg(this, ir->type);
1088 break;
1089
1090 case ir_var_uniform:
1091 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
1092
1093 /* Thanks to the lower_ubo_reference pass, we will see only
1094 * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
1095 * variables, so no need for them to be in variable_ht.
1096 *
1097 * Some uniforms, such as samplers and atomic counters, have no actual
1098 * storage, so we should ignore them.
1099 */
1100 if (ir->is_in_buffer_block() || type_size_vec4(ir->type) == 0)
1101 return;
1102
1103 /* Track how big the whole uniform variable is, in case we need to put a
1104 * copy of its data into pull constants for array access.
1105 */
1106 assert(this->uniforms < uniform_array_size);
1107 this->uniform_size[this->uniforms] = type_size_vec4(ir->type);
1108
1109 if (!strncmp(ir->name, "gl_", 3)) {
1110 setup_builtin_uniform_values(ir);
1111 } else {
1112 setup_uniform_values(ir);
1113 }
1114 break;
1115
1116 case ir_var_system_value:
1117 reg = make_reg_for_system_value(ir->data.location, ir->type);
1118 break;
1119
1120 default:
1121 unreachable("not reached");
1122 }
1123
1124 reg->type = brw_type_for_base_type(ir->type);
1125 hash_table_insert(this->variable_ht, reg, ir);
1126 }
1127
1128 void
1129 vec4_visitor::visit(ir_loop *ir)
1130 {
1131 /* We don't want debugging output to print the whole body of the
1132 * loop as the annotation.
1133 */
1134 this->base_ir = NULL;
1135
1136 emit(BRW_OPCODE_DO);
1137
1138 visit_instructions(&ir->body_instructions);
1139
1140 emit(BRW_OPCODE_WHILE);
1141 }
1142
1143 void
1144 vec4_visitor::visit(ir_loop_jump *ir)
1145 {
1146 switch (ir->mode) {
1147 case ir_loop_jump::jump_break:
1148 emit(BRW_OPCODE_BREAK);
1149 break;
1150 case ir_loop_jump::jump_continue:
1151 emit(BRW_OPCODE_CONTINUE);
1152 break;
1153 }
1154 }
1155
1156
1157 void
1158 vec4_visitor::visit(ir_function_signature *)
1159 {
1160 unreachable("not reached");
1161 }
1162
1163 void
1164 vec4_visitor::visit(ir_function *ir)
1165 {
1166 /* Ignore function bodies other than main() -- we shouldn't see calls to
1167 * them since they should all be inlined.
1168 */
1169 if (strcmp(ir->name, "main") == 0) {
1170 const ir_function_signature *sig;
1171 exec_list empty;
1172
1173 sig = ir->matching_signature(NULL, &empty, false);
1174
1175 assert(sig);
1176
1177 visit_instructions(&sig->body);
1178 }
1179 }
1180
1181 bool
1182 vec4_visitor::try_emit_mad(ir_expression *ir)
1183 {
1184 /* 3-src instructions were introduced in gen6. */
1185 if (devinfo->gen < 6)
1186 return false;
1187
1188 /* MAD can only handle floating-point data. */
1189 if (ir->type->base_type != GLSL_TYPE_FLOAT)
1190 return false;
1191
1192 ir_rvalue *nonmul;
1193 ir_expression *mul;
1194 bool mul_negate, mul_abs;
1195
1196 for (int i = 0; i < 2; i++) {
1197 mul_negate = false;
1198 mul_abs = false;
1199
1200 mul = ir->operands[i]->as_expression();
1201 nonmul = ir->operands[1 - i];
1202
1203 if (mul && mul->operation == ir_unop_abs) {
1204 mul = mul->operands[0]->as_expression();
1205 mul_abs = true;
1206 } else if (mul && mul->operation == ir_unop_neg) {
1207 mul = mul->operands[0]->as_expression();
1208 mul_negate = true;
1209 }
1210
1211 if (mul && mul->operation == ir_binop_mul)
1212 break;
1213 }
1214
1215 if (!mul || mul->operation != ir_binop_mul)
1216 return false;
1217
1218 nonmul->accept(this);
1219 src_reg src0 = fix_3src_operand(this->result);
1220
1221 mul->operands[0]->accept(this);
1222 src_reg src1 = fix_3src_operand(this->result);
1223 src1.negate ^= mul_negate;
1224 src1.abs = mul_abs;
1225 if (mul_abs)
1226 src1.negate = false;
1227
1228 mul->operands[1]->accept(this);
1229 src_reg src2 = fix_3src_operand(this->result);
1230 src2.abs = mul_abs;
1231 if (mul_abs)
1232 src2.negate = false;
1233
1234 this->result = src_reg(this, ir->type);
1235 emit(BRW_OPCODE_MAD, dst_reg(this->result), src0, src1, src2);
1236
1237 return true;
1238 }
1239
1240 bool
1241 vec4_visitor::try_emit_b2f_of_compare(ir_expression *ir)
1242 {
1243 /* This optimization relies on CMP setting the destination to 0 when
1244 * false. Early hardware only sets the least significant bit, and
1245 * leaves the other bits undefined. So we can't use it.
1246 */
1247 if (devinfo->gen < 6)
1248 return false;
1249
1250 ir_expression *const cmp = ir->operands[0]->as_expression();
1251
1252 if (cmp == NULL)
1253 return false;
1254
1255 switch (cmp->operation) {
1256 case ir_binop_less:
1257 case ir_binop_greater:
1258 case ir_binop_lequal:
1259 case ir_binop_gequal:
1260 case ir_binop_equal:
1261 case ir_binop_nequal:
1262 break;
1263
1264 default:
1265 return false;
1266 }
1267
1268 cmp->operands[0]->accept(this);
1269 const src_reg cmp_src0 = this->result;
1270
1271 cmp->operands[1]->accept(this);
1272 const src_reg cmp_src1 = this->result;
1273
1274 this->result = src_reg(this, ir->type);
1275
1276 emit(CMP(dst_reg(this->result), cmp_src0, cmp_src1,
1277 brw_conditional_for_comparison(cmp->operation)));
1278
1279 /* If the comparison is false, this->result will just happen to be zero.
1280 */
1281 vec4_instruction *const inst = emit(BRW_OPCODE_SEL, dst_reg(this->result),
1282 this->result, src_reg(1.0f));
1283 inst->predicate = BRW_PREDICATE_NORMAL;
1284 inst->predicate_inverse = true;
1285
1286 return true;
1287 }
1288
1289 vec4_instruction *
1290 vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
1291 src_reg src0, src_reg src1)
1292 {
1293 vec4_instruction *inst;
1294
1295 if (devinfo->gen >= 6) {
1296 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
1297 inst->conditional_mod = conditionalmod;
1298 } else {
1299 emit(CMP(dst, src0, src1, conditionalmod));
1300
1301 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
1302 inst->predicate = BRW_PREDICATE_NORMAL;
1303 }
1304
1305 return inst;
1306 }
1307
1308 vec4_instruction *
1309 vec4_visitor::emit_lrp(const dst_reg &dst,
1310 const src_reg &x, const src_reg &y, const src_reg &a)
1311 {
1312 if (devinfo->gen >= 6) {
1313 /* Note that the instruction's argument order is reversed from GLSL
1314 * and the IR.
1315 */
1316 return emit(LRP(dst, fix_3src_operand(a), fix_3src_operand(y),
1317 fix_3src_operand(x)));
1318 } else {
1319 /* Earlier generations don't support three source operations, so we
1320 * need to emit x*(1-a) + y*a.
1321 */
1322 dst_reg y_times_a = dst_reg(this, glsl_type::vec4_type);
1323 dst_reg one_minus_a = dst_reg(this, glsl_type::vec4_type);
1324 dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type);
1325 y_times_a.writemask = dst.writemask;
1326 one_minus_a.writemask = dst.writemask;
1327 x_times_one_minus_a.writemask = dst.writemask;
1328
1329 emit(MUL(y_times_a, y, a));
1330 emit(ADD(one_minus_a, negate(a), src_reg(1.0f)));
1331 emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a)));
1332 return emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)));
1333 }
1334 }
1335
1336 /**
1337 * Emits the instructions needed to perform a pull constant load. before_block
1338 * and before_inst can be NULL in which case the instruction will be appended
1339 * to the end of the instruction list.
1340 */
1341 void
1342 vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
1343 src_reg surf_index,
1344 src_reg offset_reg,
1345 bblock_t *before_block,
1346 vec4_instruction *before_inst)
1347 {
1348 assert((before_inst == NULL && before_block == NULL) ||
1349 (before_inst && before_block));
1350
1351 vec4_instruction *pull;
1352
1353 if (devinfo->gen >= 9) {
1354 /* Gen9+ needs a message header in order to use SIMD4x2 mode */
1355 src_reg header(this, glsl_type::uvec4_type, 2);
1356
1357 pull = new(mem_ctx)
1358 vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
1359 dst_reg(header));
1360
1361 if (before_inst)
1362 emit_before(before_block, before_inst, pull);
1363 else
1364 emit(pull);
1365
1366 dst_reg index_reg = retype(offset(dst_reg(header), 1),
1367 offset_reg.type);
1368 pull = MOV(writemask(index_reg, WRITEMASK_X), offset_reg);
1369
1370 if (before_inst)
1371 emit_before(before_block, before_inst, pull);
1372 else
1373 emit(pull);
1374
1375 pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
1376 dst,
1377 surf_index,
1378 header);
1379 pull->mlen = 2;
1380 pull->header_size = 1;
1381 } else if (devinfo->gen >= 7) {
1382 dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
1383
1384 grf_offset.type = offset_reg.type;
1385
1386 pull = MOV(grf_offset, offset_reg);
1387
1388 if (before_inst)
1389 emit_before(before_block, before_inst, pull);
1390 else
1391 emit(pull);
1392
1393 pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
1394 dst,
1395 surf_index,
1396 src_reg(grf_offset));
1397 pull->mlen = 1;
1398 } else {
1399 pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD,
1400 dst,
1401 surf_index,
1402 offset_reg);
1403 pull->base_mrf = 14;
1404 pull->mlen = 1;
1405 }
1406
1407 if (before_inst)
1408 emit_before(before_block, before_inst, pull);
1409 else
1410 emit(pull);
1411 }
1412
1413 src_reg
1414 vec4_visitor::emit_uniformize(const src_reg &src)
1415 {
1416 const src_reg chan_index(this, glsl_type::uint_type);
1417 const dst_reg dst = retype(dst_reg(this, glsl_type::uint_type),
1418 src.type);
1419
1420 emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, dst_reg(chan_index))
1421 ->force_writemask_all = true;
1422 emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index)
1423 ->force_writemask_all = true;
1424
1425 return src_reg(dst);
1426 }
1427
1428 void
1429 vec4_visitor::visit(ir_expression *ir)
1430 {
1431 unsigned int operand;
1432 src_reg op[ARRAY_SIZE(ir->operands)];
1433 vec4_instruction *inst;
1434
1435 if (ir->operation == ir_binop_add) {
1436 if (try_emit_mad(ir))
1437 return;
1438 }
1439
1440 if (ir->operation == ir_unop_b2f) {
1441 if (try_emit_b2f_of_compare(ir))
1442 return;
1443 }
1444
1445 /* Storage for our result. Ideally for an assignment we'd be using
1446 * the actual storage for the result here, instead.
1447 */
1448 dst_reg result_dst(this, ir->type);
1449 src_reg result_src(result_dst);
1450
1451 if (ir->operation == ir_triop_csel) {
1452 ir->operands[1]->accept(this);
1453 op[1] = this->result;
1454 ir->operands[2]->accept(this);
1455 op[2] = this->result;
1456
1457 enum brw_predicate predicate;
1458 emit_bool_to_cond_code(ir->operands[0], &predicate);
1459 inst = emit(BRW_OPCODE_SEL, result_dst, op[1], op[2]);
1460 inst->predicate = predicate;
1461 this->result = result_src;
1462 return;
1463 }
1464
1465 for (operand = 0; operand < ir->get_num_operands(); operand++) {
1466 this->result.file = BAD_FILE;
1467 ir->operands[operand]->accept(this);
1468 if (this->result.file == BAD_FILE) {
1469 fprintf(stderr, "Failed to get tree for expression operand:\n");
1470 ir->operands[operand]->fprint(stderr);
1471 exit(1);
1472 }
1473 op[operand] = this->result;
1474
1475 /* Matrix expression operands should have been broken down to vector
1476 * operations already.
1477 */
1478 assert(!ir->operands[operand]->type->is_matrix());
1479 }
1480
1481 /* If nothing special happens, this is the result. */
1482 this->result = result_src;
1483
1484 switch (ir->operation) {
1485 case ir_unop_logic_not:
1486 emit(NOT(result_dst, op[0]));
1487 break;
1488 case ir_unop_neg:
1489 op[0].negate = !op[0].negate;
1490 emit(MOV(result_dst, op[0]));
1491 break;
1492 case ir_unop_abs:
1493 op[0].abs = true;
1494 op[0].negate = false;
1495 emit(MOV(result_dst, op[0]));
1496 break;
1497
1498 case ir_unop_sign:
1499 if (ir->type->is_float()) {
1500 /* AND(val, 0x80000000) gives the sign bit.
1501 *
1502 * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
1503 * zero.
1504 */
1505 emit(CMP(dst_null_f(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1506
1507 op[0].type = BRW_REGISTER_TYPE_UD;
1508 result_dst.type = BRW_REGISTER_TYPE_UD;
1509 emit(AND(result_dst, op[0], src_reg(0x80000000u)));
1510
1511 inst = emit(OR(result_dst, src_reg(result_dst), src_reg(0x3f800000u)));
1512 inst->predicate = BRW_PREDICATE_NORMAL;
1513
1514 this->result.type = BRW_REGISTER_TYPE_F;
1515 } else {
1516 /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
1517 * -> non-negative val generates 0x00000000.
1518 * Predicated OR sets 1 if val is positive.
1519 */
1520 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_G));
1521
1522 emit(ASR(result_dst, op[0], src_reg(31)));
1523
1524 inst = emit(OR(result_dst, src_reg(result_dst), src_reg(1)));
1525 inst->predicate = BRW_PREDICATE_NORMAL;
1526 }
1527 break;
1528
1529 case ir_unop_rcp:
1530 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
1531 break;
1532
1533 case ir_unop_exp2:
1534 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
1535 break;
1536 case ir_unop_log2:
1537 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
1538 break;
1539 case ir_unop_exp:
1540 case ir_unop_log:
1541 unreachable("not reached: should be handled by ir_explog_to_explog2");
1542 case ir_unop_sin:
1543 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
1544 break;
1545 case ir_unop_cos:
1546 emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
1547 break;
1548
1549 case ir_unop_dFdx:
1550 case ir_unop_dFdx_coarse:
1551 case ir_unop_dFdx_fine:
1552 case ir_unop_dFdy:
1553 case ir_unop_dFdy_coarse:
1554 case ir_unop_dFdy_fine:
1555 unreachable("derivatives not valid in vertex shader");
1556
1557 case ir_unop_bitfield_reverse:
1558 emit(BFREV(result_dst, op[0]));
1559 break;
1560 case ir_unop_bit_count:
1561 emit(CBIT(result_dst, op[0]));
1562 break;
1563 case ir_unop_find_msb: {
1564 src_reg temp = src_reg(this, glsl_type::uint_type);
1565
1566 inst = emit(FBH(dst_reg(temp), op[0]));
1567 inst->dst.writemask = WRITEMASK_XYZW;
1568
1569 /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
1570 * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
1571 * subtract the result from 31 to convert the MSB count into an LSB count.
1572 */
1573
1574 /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
1575 temp.swizzle = BRW_SWIZZLE_NOOP;
1576 emit(MOV(result_dst, temp));
1577
1578 src_reg src_tmp = src_reg(result_dst);
1579 emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ));
1580
1581 src_tmp.negate = true;
1582 inst = emit(ADD(result_dst, src_tmp, src_reg(31)));
1583 inst->predicate = BRW_PREDICATE_NORMAL;
1584 break;
1585 }
1586 case ir_unop_find_lsb:
1587 emit(FBL(result_dst, op[0]));
1588 break;
1589 case ir_unop_saturate:
1590 inst = emit(MOV(result_dst, op[0]));
1591 inst->saturate = true;
1592 break;
1593
1594 case ir_unop_noise:
1595 unreachable("not reached: should be handled by lower_noise");
1596
1597 case ir_unop_subroutine_to_int:
1598 emit(MOV(result_dst, op[0]));
1599 break;
1600
1601 case ir_binop_add:
1602 emit(ADD(result_dst, op[0], op[1]));
1603 break;
1604 case ir_binop_sub:
1605 unreachable("not reached: should be handled by ir_sub_to_add_neg");
1606
1607 case ir_binop_mul:
1608 if (devinfo->gen < 8 && ir->type->is_integer()) {
1609 /* For integer multiplication, the MUL uses the low 16 bits of one of
1610 * the operands (src0 through SNB, src1 on IVB and later). The MACH
1611 * accumulates in the contribution of the upper 16 bits of that
1612 * operand. If we can determine that one of the args is in the low
1613 * 16 bits, though, we can just emit a single MUL.
1614 */
1615 if (ir->operands[0]->is_uint16_constant()) {
1616 if (devinfo->gen < 7)
1617 emit(MUL(result_dst, op[0], op[1]));
1618 else
1619 emit(MUL(result_dst, op[1], op[0]));
1620 } else if (ir->operands[1]->is_uint16_constant()) {
1621 if (devinfo->gen < 7)
1622 emit(MUL(result_dst, op[1], op[0]));
1623 else
1624 emit(MUL(result_dst, op[0], op[1]));
1625 } else {
1626 struct brw_reg acc = retype(brw_acc_reg(8), result_dst.type);
1627
1628 emit(MUL(acc, op[0], op[1]));
1629 emit(MACH(dst_null_d(), op[0], op[1]));
1630 emit(MOV(result_dst, src_reg(acc)));
1631 }
1632 } else {
1633 emit(MUL(result_dst, op[0], op[1]));
1634 }
1635 break;
1636 case ir_binop_imul_high: {
1637 struct brw_reg acc = retype(brw_acc_reg(8), result_dst.type);
1638
1639 emit(MUL(acc, op[0], op[1]));
1640 emit(MACH(result_dst, op[0], op[1]));
1641 break;
1642 }
1643 case ir_binop_div:
1644 /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
1645 assert(ir->type->is_integer());
1646 emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
1647 break;
1648
1649 case ir_binop_carry:
1650 unreachable("Should have been lowered by carry_to_arith().");
1651
1652 case ir_binop_borrow:
1653 unreachable("Should have been lowered by borrow_to_arith().");
1654
1655 case ir_binop_mod:
1656 /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
1657 assert(ir->type->is_integer());
1658 emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
1659 break;
1660
1661 case ir_binop_less:
1662 case ir_binop_greater:
1663 case ir_binop_lequal:
1664 case ir_binop_gequal:
1665 case ir_binop_equal:
1666 case ir_binop_nequal: {
1667 if (devinfo->gen <= 5) {
1668 resolve_bool_comparison(ir->operands[0], &op[0]);
1669 resolve_bool_comparison(ir->operands[1], &op[1]);
1670 }
1671 emit(CMP(result_dst, op[0], op[1],
1672 brw_conditional_for_comparison(ir->operation)));
1673 break;
1674 }
1675
1676 case ir_binop_all_equal:
1677 if (devinfo->gen <= 5) {
1678 resolve_bool_comparison(ir->operands[0], &op[0]);
1679 resolve_bool_comparison(ir->operands[1], &op[1]);
1680 }
1681
1682 /* "==" operator producing a scalar boolean. */
1683 if (ir->operands[0]->type->is_vector() ||
1684 ir->operands[1]->type->is_vector()) {
1685 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
1686 emit(MOV(result_dst, src_reg(0)));
1687 inst = emit(MOV(result_dst, src_reg(~0)));
1688 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
1689 } else {
1690 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
1691 }
1692 break;
1693 case ir_binop_any_nequal:
1694 if (devinfo->gen <= 5) {
1695 resolve_bool_comparison(ir->operands[0], &op[0]);
1696 resolve_bool_comparison(ir->operands[1], &op[1]);
1697 }
1698
1699 /* "!=" operator producing a scalar boolean. */
1700 if (ir->operands[0]->type->is_vector() ||
1701 ir->operands[1]->type->is_vector()) {
1702 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
1703
1704 emit(MOV(result_dst, src_reg(0)));
1705 inst = emit(MOV(result_dst, src_reg(~0)));
1706 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1707 } else {
1708 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
1709 }
1710 break;
1711
1712 case ir_unop_any:
1713 if (devinfo->gen <= 5) {
1714 resolve_bool_comparison(ir->operands[0], &op[0]);
1715 }
1716 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1717 emit(MOV(result_dst, src_reg(0)));
1718
1719 inst = emit(MOV(result_dst, src_reg(~0)));
1720 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1721 break;
1722
1723 case ir_binop_logic_xor:
1724 emit(XOR(result_dst, op[0], op[1]));
1725 break;
1726
1727 case ir_binop_logic_or:
1728 emit(OR(result_dst, op[0], op[1]));
1729 break;
1730
1731 case ir_binop_logic_and:
1732 emit(AND(result_dst, op[0], op[1]));
1733 break;
1734
1735 case ir_binop_dot:
1736 assert(ir->operands[0]->type->is_vector());
1737 assert(ir->operands[0]->type == ir->operands[1]->type);
1738 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1739 break;
1740
1741 case ir_unop_sqrt:
1742 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1743 break;
1744 case ir_unop_rsq:
1745 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1746 break;
1747
1748 case ir_unop_bitcast_i2f:
1749 case ir_unop_bitcast_u2f:
1750 this->result = op[0];
1751 this->result.type = BRW_REGISTER_TYPE_F;
1752 break;
1753
1754 case ir_unop_bitcast_f2i:
1755 this->result = op[0];
1756 this->result.type = BRW_REGISTER_TYPE_D;
1757 break;
1758
1759 case ir_unop_bitcast_f2u:
1760 this->result = op[0];
1761 this->result.type = BRW_REGISTER_TYPE_UD;
1762 break;
1763
1764 case ir_unop_i2f:
1765 case ir_unop_i2u:
1766 case ir_unop_u2i:
1767 case ir_unop_u2f:
1768 case ir_unop_f2i:
1769 case ir_unop_f2u:
1770 emit(MOV(result_dst, op[0]));
1771 break;
1772 case ir_unop_b2i:
1773 case ir_unop_b2f:
1774 if (devinfo->gen <= 5) {
1775 resolve_bool_comparison(ir->operands[0], &op[0]);
1776 }
1777 emit(MOV(result_dst, negate(op[0])));
1778 break;
1779 case ir_unop_f2b:
1780 emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1781 break;
1782 case ir_unop_i2b:
1783 emit(CMP(result_dst, op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1784 break;
1785
1786 case ir_unop_trunc:
1787 emit(RNDZ(result_dst, op[0]));
1788 break;
1789 case ir_unop_ceil: {
1790 src_reg tmp = src_reg(this, ir->type);
1791 op[0].negate = !op[0].negate;
1792 emit(RNDD(dst_reg(tmp), op[0]));
1793 tmp.negate = true;
1794 emit(MOV(result_dst, tmp));
1795 }
1796 break;
1797 case ir_unop_floor:
1798 inst = emit(RNDD(result_dst, op[0]));
1799 break;
1800 case ir_unop_fract:
1801 inst = emit(FRC(result_dst, op[0]));
1802 break;
1803 case ir_unop_round_even:
1804 emit(RNDE(result_dst, op[0]));
1805 break;
1806
1807 case ir_binop_min:
1808 emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]);
1809 break;
1810 case ir_binop_max:
1811 emit_minmax(BRW_CONDITIONAL_GE, result_dst, op[0], op[1]);
1812 break;
1813
1814 case ir_binop_pow:
1815 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1816 break;
1817
1818 case ir_unop_bit_not:
1819 inst = emit(NOT(result_dst, op[0]));
1820 break;
1821 case ir_binop_bit_and:
1822 inst = emit(AND(result_dst, op[0], op[1]));
1823 break;
1824 case ir_binop_bit_xor:
1825 inst = emit(XOR(result_dst, op[0], op[1]));
1826 break;
1827 case ir_binop_bit_or:
1828 inst = emit(OR(result_dst, op[0], op[1]));
1829 break;
1830
1831 case ir_binop_lshift:
1832 inst = emit(SHL(result_dst, op[0], op[1]));
1833 break;
1834
1835 case ir_binop_rshift:
1836 if (ir->type->base_type == GLSL_TYPE_INT)
1837 inst = emit(ASR(result_dst, op[0], op[1]));
1838 else
1839 inst = emit(SHR(result_dst, op[0], op[1]));
1840 break;
1841
1842 case ir_binop_bfm:
1843 emit(BFI1(result_dst, op[0], op[1]));
1844 break;
1845
1846 case ir_binop_ubo_load: {
1847 ir_constant *const_uniform_block = ir->operands[0]->as_constant();
1848 ir_constant *const_offset_ir = ir->operands[1]->as_constant();
1849 unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
1850 src_reg offset;
1851
1852 /* Now, load the vector from that offset. */
1853 assert(ir->type->is_vector() || ir->type->is_scalar());
1854
1855 src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
1856 packed_consts.type = result.type;
1857 src_reg surf_index;
1858
1859 if (const_uniform_block) {
1860 /* The block index is a constant, so just emit the binding table entry
1861 * as an immediate.
1862 */
1863 surf_index = src_reg(prog_data->base.binding_table.ubo_start +
1864 const_uniform_block->value.u[0]);
1865 } else {
1866 /* The block index is not a constant. Evaluate the index expression
1867 * per-channel and add the base UBO index; we have to select a value
1868 * from any live channel.
1869 */
1870 surf_index = src_reg(this, glsl_type::uint_type);
1871 emit(ADD(dst_reg(surf_index), op[0],
1872 src_reg(prog_data->base.binding_table.ubo_start)));
1873 surf_index = emit_uniformize(surf_index);
1874
1875 /* Assume this may touch any UBO. It would be nice to provide
1876 * a tighter bound, but the array information is already lowered away.
1877 */
1878 brw_mark_surface_used(&prog_data->base,
1879 prog_data->base.binding_table.ubo_start +
1880 shader_prog->NumUniformBlocks - 1);
1881 }
1882
1883 if (const_offset_ir) {
1884 if (devinfo->gen >= 8) {
1885 /* Store the offset in a GRF so we can send-from-GRF. */
1886 offset = src_reg(this, glsl_type::int_type);
1887 emit(MOV(dst_reg(offset), src_reg(const_offset / 16)));
1888 } else {
1889 /* Immediates are fine on older generations since they'll be moved
1890 * to a (potentially fake) MRF at the generator level.
1891 */
1892 offset = src_reg(const_offset / 16);
1893 }
1894 } else {
1895 offset = src_reg(this, glsl_type::uint_type);
1896 emit(SHR(dst_reg(offset), op[1], src_reg(4u)));
1897 }
1898
1899 emit_pull_constant_load_reg(dst_reg(packed_consts),
1900 surf_index,
1901 offset,
1902 NULL, NULL /* before_block/inst */);
1903
1904 packed_consts.swizzle = brw_swizzle_for_size(ir->type->vector_elements);
1905 packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4,
1906 const_offset % 16 / 4,
1907 const_offset % 16 / 4,
1908 const_offset % 16 / 4);
1909
1910 /* UBO bools are any nonzero int. We need to convert them to 0/~0. */
1911 if (ir->type->base_type == GLSL_TYPE_BOOL) {
1912 emit(CMP(result_dst, packed_consts, src_reg(0u),
1913 BRW_CONDITIONAL_NZ));
1914 } else {
1915 emit(MOV(result_dst, packed_consts));
1916 }
1917 break;
1918 }
1919
1920 case ir_binop_vector_extract:
1921 unreachable("should have been lowered by vec_index_to_cond_assign");
1922
1923 case ir_triop_fma:
1924 op[0] = fix_3src_operand(op[0]);
1925 op[1] = fix_3src_operand(op[1]);
1926 op[2] = fix_3src_operand(op[2]);
1927 /* Note that the instruction's argument order is reversed from GLSL
1928 * and the IR.
1929 */
1930 emit(MAD(result_dst, op[2], op[1], op[0]));
1931 break;
1932
1933 case ir_triop_lrp:
1934 emit_lrp(result_dst, op[0], op[1], op[2]);
1935 break;
1936
1937 case ir_triop_csel:
1938 unreachable("already handled above");
1939 break;
1940
1941 case ir_triop_bfi:
1942 op[0] = fix_3src_operand(op[0]);
1943 op[1] = fix_3src_operand(op[1]);
1944 op[2] = fix_3src_operand(op[2]);
1945 emit(BFI2(result_dst, op[0], op[1], op[2]));
1946 break;
1947
1948 case ir_triop_bitfield_extract:
1949 op[0] = fix_3src_operand(op[0]);
1950 op[1] = fix_3src_operand(op[1]);
1951 op[2] = fix_3src_operand(op[2]);
1952 /* Note that the instruction's argument order is reversed from GLSL
1953 * and the IR.
1954 */
1955 emit(BFE(result_dst, op[2], op[1], op[0]));
1956 break;
1957
1958 case ir_triop_vector_insert:
1959 unreachable("should have been lowered by lower_vector_insert");
1960
1961 case ir_quadop_bitfield_insert:
1962 unreachable("not reached: should be handled by "
1963 "bitfield_insert_to_bfm_bfi\n");
1964
1965 case ir_quadop_vector:
1966 unreachable("not reached: should be handled by lower_quadop_vector");
1967
1968 case ir_unop_pack_half_2x16:
1969 emit_pack_half_2x16(result_dst, op[0]);
1970 break;
1971 case ir_unop_unpack_half_2x16:
1972 emit_unpack_half_2x16(result_dst, op[0]);
1973 break;
1974 case ir_unop_unpack_unorm_4x8:
1975 emit_unpack_unorm_4x8(result_dst, op[0]);
1976 break;
1977 case ir_unop_unpack_snorm_4x8:
1978 emit_unpack_snorm_4x8(result_dst, op[0]);
1979 break;
1980 case ir_unop_pack_unorm_4x8:
1981 emit_pack_unorm_4x8(result_dst, op[0]);
1982 break;
1983 case ir_unop_pack_snorm_4x8:
1984 emit_pack_snorm_4x8(result_dst, op[0]);
1985 break;
1986 case ir_unop_pack_snorm_2x16:
1987 case ir_unop_pack_unorm_2x16:
1988 case ir_unop_unpack_snorm_2x16:
1989 case ir_unop_unpack_unorm_2x16:
1990 unreachable("not reached: should be handled by lower_packing_builtins");
1991 case ir_unop_unpack_half_2x16_split_x:
1992 case ir_unop_unpack_half_2x16_split_y:
1993 case ir_binop_pack_half_2x16_split:
1994 case ir_unop_interpolate_at_centroid:
1995 case ir_binop_interpolate_at_sample:
1996 case ir_binop_interpolate_at_offset:
1997 unreachable("not reached: should not occur in vertex shader");
1998 case ir_binop_ldexp:
1999 unreachable("not reached: should be handled by ldexp_to_arith()");
2000 case ir_unop_d2f:
2001 case ir_unop_f2d:
2002 case ir_unop_d2i:
2003 case ir_unop_i2d:
2004 case ir_unop_d2u:
2005 case ir_unop_u2d:
2006 case ir_unop_d2b:
2007 case ir_unop_pack_double_2x32:
2008 case ir_unop_unpack_double_2x32:
2009 case ir_unop_frexp_sig:
2010 case ir_unop_frexp_exp:
2011 unreachable("fp64 todo");
2012 }
2013 }
2014
2015
2016 void
2017 vec4_visitor::visit(ir_swizzle *ir)
2018 {
2019 /* Note that this is only swizzles in expressions, not those on the left
2020 * hand side of an assignment, which do write masking. See ir_assignment
2021 * for that.
2022 */
2023 const unsigned swz = brw_compose_swizzle(
2024 brw_swizzle_for_size(ir->type->vector_elements),
2025 BRW_SWIZZLE4(ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w));
2026
2027 ir->val->accept(this);
2028 this->result = swizzle(this->result, swz);
2029 }
2030
2031 void
2032 vec4_visitor::visit(ir_dereference_variable *ir)
2033 {
2034 const struct glsl_type *type = ir->type;
2035 dst_reg *reg = variable_storage(ir->var);
2036
2037 if (!reg) {
2038 fail("Failed to find variable storage for %s\n", ir->var->name);
2039 this->result = src_reg(brw_null_reg());
2040 return;
2041 }
2042
2043 this->result = src_reg(*reg);
2044
2045 /* System values get their swizzle from the dst_reg writemask */
2046 if (ir->var->data.mode == ir_var_system_value)
2047 return;
2048
2049 if (type->is_scalar() || type->is_vector() || type->is_matrix())
2050 this->result.swizzle = brw_swizzle_for_size(type->vector_elements);
2051 }
2052
2053
2054 int
2055 vec4_visitor::compute_array_stride(ir_dereference_array *ir)
2056 {
2057 /* Under normal circumstances array elements are stored consecutively, so
2058 * the stride is equal to the size of the array element.
2059 */
2060 return type_size_vec4(ir->type);
2061 }
2062
2063
2064 void
2065 vec4_visitor::visit(ir_dereference_array *ir)
2066 {
2067 ir_constant *constant_index;
2068 src_reg src;
2069 int array_stride = compute_array_stride(ir);
2070
2071 constant_index = ir->array_index->constant_expression_value();
2072
2073 ir->array->accept(this);
2074 src = this->result;
2075
2076 if (constant_index) {
2077 src.reg_offset += constant_index->value.i[0] * array_stride;
2078 } else {
2079 /* Variable index array dereference. It eats the "vec4" of the
2080 * base of the array and an index that offsets the Mesa register
2081 * index.
2082 */
2083 ir->array_index->accept(this);
2084
2085 src_reg index_reg;
2086
2087 if (array_stride == 1) {
2088 index_reg = this->result;
2089 } else {
2090 index_reg = src_reg(this, glsl_type::int_type);
2091
2092 emit(MUL(dst_reg(index_reg), this->result, src_reg(array_stride)));
2093 }
2094
2095 if (src.reladdr) {
2096 src_reg temp = src_reg(this, glsl_type::int_type);
2097
2098 emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
2099
2100 index_reg = temp;
2101 }
2102
2103 src.reladdr = ralloc(mem_ctx, src_reg);
2104 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
2105 }
2106
2107 /* If the type is smaller than a vec4, replicate the last channel out. */
2108 if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
2109 src.swizzle = brw_swizzle_for_size(ir->type->vector_elements);
2110 else
2111 src.swizzle = BRW_SWIZZLE_NOOP;
2112 src.type = brw_type_for_base_type(ir->type);
2113
2114 this->result = src;
2115 }
2116
2117 void
2118 vec4_visitor::visit(ir_dereference_record *ir)
2119 {
2120 unsigned int i;
2121 const glsl_type *struct_type = ir->record->type;
2122 int offset = 0;
2123
2124 ir->record->accept(this);
2125
2126 for (i = 0; i < struct_type->length; i++) {
2127 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
2128 break;
2129 offset += type_size_vec4(struct_type->fields.structure[i].type);
2130 }
2131
2132 /* If the type is smaller than a vec4, replicate the last channel out. */
2133 if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
2134 this->result.swizzle = brw_swizzle_for_size(ir->type->vector_elements);
2135 else
2136 this->result.swizzle = BRW_SWIZZLE_NOOP;
2137 this->result.type = brw_type_for_base_type(ir->type);
2138
2139 this->result.reg_offset += offset;
2140 }
2141
2142 /**
2143 * We want to be careful in assignment setup to hit the actual storage
2144 * instead of potentially using a temporary like we might with the
2145 * ir_dereference handler.
2146 */
2147 static dst_reg
2148 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
2149 {
2150 /* The LHS must be a dereference. If the LHS is a variable indexed array
2151 * access of a vector, it must be separated into a series conditional moves
2152 * before reaching this point (see ir_vec_index_to_cond_assign).
2153 */
2154 assert(ir->as_dereference());
2155 ir_dereference_array *deref_array = ir->as_dereference_array();
2156 if (deref_array) {
2157 assert(!deref_array->array->type->is_vector());
2158 }
2159
2160 /* Use the rvalue deref handler for the most part. We'll ignore
2161 * swizzles in it and write swizzles using writemask, though.
2162 */
2163 ir->accept(v);
2164 return dst_reg(v->result);
2165 }
2166
2167 void
2168 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
2169 const struct glsl_type *type,
2170 enum brw_predicate predicate)
2171 {
2172 if (type->base_type == GLSL_TYPE_STRUCT) {
2173 for (unsigned int i = 0; i < type->length; i++) {
2174 emit_block_move(dst, src, type->fields.structure[i].type, predicate);
2175 }
2176 return;
2177 }
2178
2179 if (type->is_array()) {
2180 for (unsigned int i = 0; i < type->length; i++) {
2181 emit_block_move(dst, src, type->fields.array, predicate);
2182 }
2183 return;
2184 }
2185
2186 if (type->is_matrix()) {
2187 const struct glsl_type *vec_type;
2188
2189 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
2190 type->vector_elements, 1);
2191
2192 for (int i = 0; i < type->matrix_columns; i++) {
2193 emit_block_move(dst, src, vec_type, predicate);
2194 }
2195 return;
2196 }
2197
2198 assert(type->is_scalar() || type->is_vector());
2199
2200 dst->type = brw_type_for_base_type(type);
2201 src->type = dst->type;
2202
2203 dst->writemask = (1 << type->vector_elements) - 1;
2204
2205 src->swizzle = brw_swizzle_for_size(type->vector_elements);
2206
2207 vec4_instruction *inst = emit(MOV(*dst, *src));
2208 inst->predicate = predicate;
2209
2210 dst->reg_offset++;
2211 src->reg_offset++;
2212 }
2213
2214
2215 /* If the RHS processing resulted in an instruction generating a
2216 * temporary value, and it would be easy to rewrite the instruction to
2217 * generate its result right into the LHS instead, do so. This ends
2218 * up reliably removing instructions where it can be tricky to do so
2219 * later without real UD chain information.
2220 */
2221 bool
2222 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
2223 dst_reg dst,
2224 src_reg src,
2225 vec4_instruction *pre_rhs_inst,
2226 vec4_instruction *last_rhs_inst)
2227 {
2228 /* This could be supported, but it would take more smarts. */
2229 if (ir->condition)
2230 return false;
2231
2232 if (pre_rhs_inst == last_rhs_inst)
2233 return false; /* No instructions generated to work with. */
2234
2235 /* Make sure the last instruction generated our source reg. */
2236 if (src.file != GRF ||
2237 src.file != last_rhs_inst->dst.file ||
2238 src.reg != last_rhs_inst->dst.reg ||
2239 src.reg_offset != last_rhs_inst->dst.reg_offset ||
2240 src.reladdr ||
2241 src.abs ||
2242 src.negate ||
2243 last_rhs_inst->predicate != BRW_PREDICATE_NONE)
2244 return false;
2245
2246 /* Check that that last instruction fully initialized the channels
2247 * we want to use, in the order we want to use them. We could
2248 * potentially reswizzle the operands of many instructions so that
2249 * we could handle out of order channels, but don't yet.
2250 */
2251
2252 for (unsigned i = 0; i < 4; i++) {
2253 if (dst.writemask & (1 << i)) {
2254 if (!(last_rhs_inst->dst.writemask & (1 << i)))
2255 return false;
2256
2257 if (BRW_GET_SWZ(src.swizzle, i) != i)
2258 return false;
2259 }
2260 }
2261
2262 /* Success! Rewrite the instruction. */
2263 last_rhs_inst->dst.file = dst.file;
2264 last_rhs_inst->dst.reg = dst.reg;
2265 last_rhs_inst->dst.reg_offset = dst.reg_offset;
2266 last_rhs_inst->dst.reladdr = dst.reladdr;
2267 last_rhs_inst->dst.writemask &= dst.writemask;
2268
2269 return true;
2270 }
2271
2272 void
2273 vec4_visitor::visit(ir_assignment *ir)
2274 {
2275 dst_reg dst = get_assignment_lhs(ir->lhs, this);
2276 enum brw_predicate predicate = BRW_PREDICATE_NONE;
2277
2278 if (!ir->lhs->type->is_scalar() &&
2279 !ir->lhs->type->is_vector()) {
2280 ir->rhs->accept(this);
2281 src_reg src = this->result;
2282
2283 if (ir->condition) {
2284 emit_bool_to_cond_code(ir->condition, &predicate);
2285 }
2286
2287 /* emit_block_move doesn't account for swizzles in the source register.
2288 * This should be ok, since the source register is a structure or an
2289 * array, and those can't be swizzled. But double-check to be sure.
2290 */
2291 assert(src.swizzle ==
2292 (ir->rhs->type->is_matrix()
2293 ? brw_swizzle_for_size(ir->rhs->type->vector_elements)
2294 : BRW_SWIZZLE_NOOP));
2295
2296 emit_block_move(&dst, &src, ir->rhs->type, predicate);
2297 return;
2298 }
2299
2300 /* Now we're down to just a scalar/vector with writemasks. */
2301 int i;
2302
2303 vec4_instruction *pre_rhs_inst, *last_rhs_inst;
2304 pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
2305
2306 ir->rhs->accept(this);
2307
2308 last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
2309
2310 int swizzles[4];
2311 int src_chan = 0;
2312
2313 assert(ir->lhs->type->is_vector() ||
2314 ir->lhs->type->is_scalar());
2315 dst.writemask = ir->write_mask;
2316
2317 /* Swizzle a small RHS vector into the channels being written.
2318 *
2319 * glsl ir treats write_mask as dictating how many channels are
2320 * present on the RHS while in our instructions we need to make
2321 * those channels appear in the slots of the vec4 they're written to.
2322 */
2323 for (int i = 0; i < 4; i++)
2324 swizzles[i] = (ir->write_mask & (1 << i) ? src_chan++ : 0);
2325
2326 src_reg src = swizzle(this->result,
2327 BRW_SWIZZLE4(swizzles[0], swizzles[1],
2328 swizzles[2], swizzles[3]));
2329
2330 if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
2331 return;
2332 }
2333
2334 if (ir->condition) {
2335 emit_bool_to_cond_code(ir->condition, &predicate);
2336 }
2337
2338 for (i = 0; i < type_size_vec4(ir->lhs->type); i++) {
2339 vec4_instruction *inst = emit(MOV(dst, src));
2340 inst->predicate = predicate;
2341
2342 dst.reg_offset++;
2343 src.reg_offset++;
2344 }
2345 }
2346
2347 void
2348 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
2349 {
2350 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
2351 foreach_in_list(ir_constant, field_value, &ir->components) {
2352 emit_constant_values(dst, field_value);
2353 }
2354 return;
2355 }
2356
2357 if (ir->type->is_array()) {
2358 for (unsigned int i = 0; i < ir->type->length; i++) {
2359 emit_constant_values(dst, ir->array_elements[i]);
2360 }
2361 return;
2362 }
2363
2364 if (ir->type->is_matrix()) {
2365 for (int i = 0; i < ir->type->matrix_columns; i++) {
2366 float *vec = &ir->value.f[i * ir->type->vector_elements];
2367
2368 for (int j = 0; j < ir->type->vector_elements; j++) {
2369 dst->writemask = 1 << j;
2370 dst->type = BRW_REGISTER_TYPE_F;
2371
2372 emit(MOV(*dst, src_reg(vec[j])));
2373 }
2374 dst->reg_offset++;
2375 }
2376 return;
2377 }
2378
2379 int remaining_writemask = (1 << ir->type->vector_elements) - 1;
2380
2381 for (int i = 0; i < ir->type->vector_elements; i++) {
2382 if (!(remaining_writemask & (1 << i)))
2383 continue;
2384
2385 dst->writemask = 1 << i;
2386 dst->type = brw_type_for_base_type(ir->type);
2387
2388 /* Find other components that match the one we're about to
2389 * write. Emits fewer instructions for things like vec4(0.5,
2390 * 1.5, 1.5, 1.5).
2391 */
2392 for (int j = i + 1; j < ir->type->vector_elements; j++) {
2393 if (ir->type->base_type == GLSL_TYPE_BOOL) {
2394 if (ir->value.b[i] == ir->value.b[j])
2395 dst->writemask |= (1 << j);
2396 } else {
2397 /* u, i, and f storage all line up, so no need for a
2398 * switch case for comparing each type.
2399 */
2400 if (ir->value.u[i] == ir->value.u[j])
2401 dst->writemask |= (1 << j);
2402 }
2403 }
2404
2405 switch (ir->type->base_type) {
2406 case GLSL_TYPE_FLOAT:
2407 emit(MOV(*dst, src_reg(ir->value.f[i])));
2408 break;
2409 case GLSL_TYPE_INT:
2410 emit(MOV(*dst, src_reg(ir->value.i[i])));
2411 break;
2412 case GLSL_TYPE_UINT:
2413 emit(MOV(*dst, src_reg(ir->value.u[i])));
2414 break;
2415 case GLSL_TYPE_BOOL:
2416 emit(MOV(*dst, src_reg(ir->value.b[i] != 0 ? ~0 : 0)));
2417 break;
2418 default:
2419 unreachable("Non-float/uint/int/bool constant");
2420 }
2421
2422 remaining_writemask &= ~dst->writemask;
2423 }
2424 dst->reg_offset++;
2425 }
2426
2427 void
2428 vec4_visitor::visit(ir_constant *ir)
2429 {
2430 dst_reg dst = dst_reg(this, ir->type);
2431 this->result = src_reg(dst);
2432
2433 emit_constant_values(&dst, ir);
2434 }
2435
2436 void
2437 vec4_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
2438 {
2439 ir_dereference *deref = static_cast<ir_dereference *>(
2440 ir->actual_parameters.get_head());
2441 ir_variable *location = deref->variable_referenced();
2442 unsigned surf_index = (prog_data->base.binding_table.abo_start +
2443 location->data.binding);
2444
2445 /* Calculate the surface offset */
2446 src_reg offset(this, glsl_type::uint_type);
2447 ir_dereference_array *deref_array = deref->as_dereference_array();
2448 if (deref_array) {
2449 deref_array->array_index->accept(this);
2450
2451 src_reg tmp(this, glsl_type::uint_type);
2452 emit(MUL(dst_reg(tmp), this->result, ATOMIC_COUNTER_SIZE));
2453 emit(ADD(dst_reg(offset), tmp, location->data.atomic.offset));
2454 } else {
2455 offset = location->data.atomic.offset;
2456 }
2457
2458 /* Emit the appropriate machine instruction */
2459 const char *callee = ir->callee->function_name();
2460 dst_reg dst = get_assignment_lhs(ir->return_deref, this);
2461
2462 if (!strcmp("__intrinsic_atomic_read", callee)) {
2463 emit_untyped_surface_read(surf_index, dst, offset);
2464
2465 } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
2466 emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset,
2467 src_reg(), src_reg());
2468
2469 } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
2470 emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset,
2471 src_reg(), src_reg());
2472 }
2473
2474 brw_mark_surface_used(stage_prog_data, surf_index);
2475 }
2476
2477 void
2478 vec4_visitor::visit(ir_call *ir)
2479 {
2480 const char *callee = ir->callee->function_name();
2481
2482 if (!strcmp("__intrinsic_atomic_read", callee) ||
2483 !strcmp("__intrinsic_atomic_increment", callee) ||
2484 !strcmp("__intrinsic_atomic_predecrement", callee)) {
2485 visit_atomic_counter_intrinsic(ir);
2486 } else {
2487 unreachable("Unsupported intrinsic.");
2488 }
2489 }
2490
2491 src_reg
2492 vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type,
2493 src_reg coordinate, src_reg sampler)
2494 {
2495 vec4_instruction *inst =
2496 new(mem_ctx) vec4_instruction(SHADER_OPCODE_TXF_MCS,
2497 dst_reg(this, glsl_type::uvec4_type));
2498 inst->base_mrf = 2;
2499 inst->src[1] = sampler;
2500
2501 int param_base;
2502
2503 if (devinfo->gen >= 9) {
2504 /* Gen9+ needs a message header in order to use SIMD4x2 mode */
2505 vec4_instruction *header_inst = new(mem_ctx)
2506 vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
2507 dst_reg(MRF, inst->base_mrf));
2508
2509 emit(header_inst);
2510
2511 inst->mlen = 2;
2512 inst->header_size = 1;
2513 param_base = inst->base_mrf + 1;
2514 } else {
2515 inst->mlen = 1;
2516 param_base = inst->base_mrf;
2517 }
2518
2519 /* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */
2520 int coord_mask = (1 << coordinate_type->vector_elements) - 1;
2521 int zero_mask = 0xf & ~coord_mask;
2522
2523 emit(MOV(dst_reg(MRF, param_base, coordinate_type, coord_mask),
2524 coordinate));
2525
2526 emit(MOV(dst_reg(MRF, param_base, coordinate_type, zero_mask),
2527 src_reg(0)));
2528
2529 emit(inst);
2530 return src_reg(inst->dst);
2531 }
2532
2533 bool
2534 vec4_visitor::is_high_sampler(src_reg sampler)
2535 {
2536 if (devinfo->gen < 8 && !devinfo->is_haswell)
2537 return false;
2538
2539 return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16;
2540 }
2541
2542 void
2543 vec4_visitor::emit_texture(ir_texture_opcode op,
2544 dst_reg dest,
2545 const glsl_type *dest_type,
2546 src_reg coordinate,
2547 int coord_components,
2548 src_reg shadow_comparitor,
2549 src_reg lod, src_reg lod2,
2550 src_reg sample_index,
2551 uint32_t constant_offset,
2552 src_reg offset_value,
2553 src_reg mcs,
2554 bool is_cube_array,
2555 uint32_t sampler,
2556 src_reg sampler_reg)
2557 {
2558 enum opcode opcode;
2559 switch (op) {
2560 case ir_tex: opcode = SHADER_OPCODE_TXL; break;
2561 case ir_txl: opcode = SHADER_OPCODE_TXL; break;
2562 case ir_txd: opcode = SHADER_OPCODE_TXD; break;
2563 case ir_txf: opcode = SHADER_OPCODE_TXF; break;
2564 case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break;
2565 case ir_txs: opcode = SHADER_OPCODE_TXS; break;
2566 case ir_tg4: opcode = offset_value.file != BAD_FILE
2567 ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
2568 case ir_query_levels: opcode = SHADER_OPCODE_TXS; break;
2569 case ir_txb:
2570 unreachable("TXB is not valid for vertex shaders.");
2571 case ir_lod:
2572 unreachable("LOD is not valid for vertex shaders.");
2573 default:
2574 unreachable("Unrecognized tex op");
2575 }
2576
2577 vec4_instruction *inst = new(mem_ctx) vec4_instruction(
2578 opcode, dst_reg(this, dest_type));
2579
2580 inst->offset = constant_offset;
2581
2582 /* The message header is necessary for:
2583 * - Gen4 (always)
2584 * - Gen9+ for selecting SIMD4x2
2585 * - Texel offsets
2586 * - Gather channel selection
2587 * - Sampler indices too large to fit in a 4-bit value.
2588 */
2589 inst->header_size =
2590 (devinfo->gen < 5 || devinfo->gen >= 9 ||
2591 inst->offset != 0 || op == ir_tg4 ||
2592 is_high_sampler(sampler_reg)) ? 1 : 0;
2593 inst->base_mrf = 2;
2594 inst->mlen = inst->header_size + 1; /* always at least one */
2595 inst->dst.writemask = WRITEMASK_XYZW;
2596 inst->shadow_compare = shadow_comparitor.file != BAD_FILE;
2597
2598 inst->src[1] = sampler_reg;
2599
2600 /* MRF for the first parameter */
2601 int param_base = inst->base_mrf + inst->header_size;
2602
2603 if (op == ir_txs || op == ir_query_levels) {
2604 int writemask = devinfo->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
2605 emit(MOV(dst_reg(MRF, param_base, lod.type, writemask), lod));
2606 } else {
2607 /* Load the coordinate */
2608 /* FINISHME: gl_clamp_mask and saturate */
2609 int coord_mask = (1 << coord_components) - 1;
2610 int zero_mask = 0xf & ~coord_mask;
2611
2612 emit(MOV(dst_reg(MRF, param_base, coordinate.type, coord_mask),
2613 coordinate));
2614
2615 if (zero_mask != 0) {
2616 emit(MOV(dst_reg(MRF, param_base, coordinate.type, zero_mask),
2617 src_reg(0)));
2618 }
2619 /* Load the shadow comparitor */
2620 if (shadow_comparitor.file != BAD_FILE && op != ir_txd && (op != ir_tg4 || offset_value.file == BAD_FILE)) {
2621 emit(MOV(dst_reg(MRF, param_base + 1, shadow_comparitor.type,
2622 WRITEMASK_X),
2623 shadow_comparitor));
2624 inst->mlen++;
2625 }
2626
2627 /* Load the LOD info */
2628 if (op == ir_tex || op == ir_txl) {
2629 int mrf, writemask;
2630 if (devinfo->gen >= 5) {
2631 mrf = param_base + 1;
2632 if (shadow_comparitor.file != BAD_FILE) {
2633 writemask = WRITEMASK_Y;
2634 /* mlen already incremented */
2635 } else {
2636 writemask = WRITEMASK_X;
2637 inst->mlen++;
2638 }
2639 } else /* devinfo->gen == 4 */ {
2640 mrf = param_base;
2641 writemask = WRITEMASK_W;
2642 }
2643 lod.swizzle = BRW_SWIZZLE_XXXX;
2644 emit(MOV(dst_reg(MRF, mrf, lod.type, writemask), lod));
2645 } else if (op == ir_txf) {
2646 emit(MOV(dst_reg(MRF, param_base, lod.type, WRITEMASK_W), lod));
2647 } else if (op == ir_txf_ms) {
2648 emit(MOV(dst_reg(MRF, param_base + 1, sample_index.type, WRITEMASK_X),
2649 sample_index));
2650 if (devinfo->gen >= 7) {
2651 /* MCS data is in the first channel of `mcs`, but we need to get it into
2652 * the .y channel of the second vec4 of params, so replicate .x across
2653 * the whole vec4 and then mask off everything except .y
2654 */
2655 mcs.swizzle = BRW_SWIZZLE_XXXX;
2656 emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::uint_type, WRITEMASK_Y),
2657 mcs));
2658 }
2659 inst->mlen++;
2660 } else if (op == ir_txd) {
2661 const brw_reg_type type = lod.type;
2662
2663 if (devinfo->gen >= 5) {
2664 lod.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
2665 lod2.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
2666 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), lod));
2667 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), lod2));
2668 inst->mlen++;
2669
2670 if (dest_type->vector_elements == 3 || shadow_comparitor.file != BAD_FILE) {
2671 lod.swizzle = BRW_SWIZZLE_ZZZZ;
2672 lod2.swizzle = BRW_SWIZZLE_ZZZZ;
2673 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), lod));
2674 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), lod2));
2675 inst->mlen++;
2676
2677 if (shadow_comparitor.file != BAD_FILE) {
2678 emit(MOV(dst_reg(MRF, param_base + 2,
2679 shadow_comparitor.type, WRITEMASK_Z),
2680 shadow_comparitor));
2681 }
2682 }
2683 } else /* devinfo->gen == 4 */ {
2684 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), lod));
2685 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), lod2));
2686 inst->mlen += 2;
2687 }
2688 } else if (op == ir_tg4 && offset_value.file != BAD_FILE) {
2689 if (shadow_comparitor.file != BAD_FILE) {
2690 emit(MOV(dst_reg(MRF, param_base, shadow_comparitor.type, WRITEMASK_W),
2691 shadow_comparitor));
2692 }
2693
2694 emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::ivec2_type, WRITEMASK_XY),
2695 offset_value));
2696 inst->mlen++;
2697 }
2698 }
2699
2700 emit(inst);
2701
2702 /* fixup num layers (z) for cube arrays: hardware returns faces * layers;
2703 * spec requires layers.
2704 */
2705 if (op == ir_txs && is_cube_array) {
2706 emit_math(SHADER_OPCODE_INT_QUOTIENT,
2707 writemask(inst->dst, WRITEMASK_Z),
2708 src_reg(inst->dst), src_reg(6));
2709 }
2710
2711 if (devinfo->gen == 6 && op == ir_tg4) {
2712 emit_gen6_gather_wa(key->tex.gen6_gather_wa[sampler], inst->dst);
2713 }
2714
2715 swizzle_result(op, dest,
2716 src_reg(inst->dst), sampler, dest_type);
2717 }
2718
2719 void
2720 vec4_visitor::visit(ir_texture *ir)
2721 {
2722 uint32_t sampler =
2723 _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog);
2724
2725 ir_rvalue *nonconst_sampler_index =
2726 _mesa_get_sampler_array_nonconst_index(ir->sampler);
2727
2728 /* Handle non-constant sampler array indexing */
2729 src_reg sampler_reg;
2730 if (nonconst_sampler_index) {
2731 /* The highest sampler which may be used by this operation is
2732 * the last element of the array. Mark it here, because the generator
2733 * doesn't have enough information to determine the bound.
2734 */
2735 uint32_t array_size = ir->sampler->as_dereference_array()
2736 ->array->type->array_size();
2737
2738 uint32_t max_used = sampler + array_size - 1;
2739 if (ir->op == ir_tg4 && devinfo->gen < 8) {
2740 max_used += prog_data->base.binding_table.gather_texture_start;
2741 } else {
2742 max_used += prog_data->base.binding_table.texture_start;
2743 }
2744
2745 brw_mark_surface_used(&prog_data->base, max_used);
2746
2747 /* Emit code to evaluate the actual indexing expression */
2748 nonconst_sampler_index->accept(this);
2749 src_reg temp(this, glsl_type::uint_type);
2750 emit(ADD(dst_reg(temp), this->result, src_reg(sampler)));
2751 sampler_reg = emit_uniformize(temp);
2752 } else {
2753 /* Single sampler, or constant array index; the indexing expression
2754 * is just an immediate.
2755 */
2756 sampler_reg = src_reg(sampler);
2757 }
2758
2759 /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother
2760 * emitting anything other than setting up the constant result.
2761 */
2762 if (ir->op == ir_tg4) {
2763 ir_constant *chan = ir->lod_info.component->as_constant();
2764 int swiz = GET_SWZ(key->tex.swizzles[sampler], chan->value.i[0]);
2765 if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
2766 dst_reg result(this, ir->type);
2767 this->result = src_reg(result);
2768 emit(MOV(result, src_reg(swiz == SWIZZLE_ONE ? 1.0f : 0.0f)));
2769 return;
2770 }
2771 }
2772
2773 /* Should be lowered by do_lower_texture_projection */
2774 assert(!ir->projector);
2775
2776 /* Should be lowered */
2777 assert(!ir->offset || !ir->offset->type->is_array());
2778
2779 /* Generate code to compute all the subexpression trees. This has to be
2780 * done before loading any values into MRFs for the sampler message since
2781 * generating these values may involve SEND messages that need the MRFs.
2782 */
2783 src_reg coordinate;
2784 int coord_components = 0;
2785 if (ir->coordinate) {
2786 coord_components = ir->coordinate->type->vector_elements;
2787 ir->coordinate->accept(this);
2788 coordinate = this->result;
2789 }
2790
2791 src_reg shadow_comparitor;
2792 if (ir->shadow_comparitor) {
2793 ir->shadow_comparitor->accept(this);
2794 shadow_comparitor = this->result;
2795 }
2796
2797 bool has_nonconstant_offset = ir->offset && !ir->offset->as_constant();
2798 src_reg offset_value;
2799 if (has_nonconstant_offset) {
2800 ir->offset->accept(this);
2801 offset_value = src_reg(this->result);
2802 }
2803
2804 src_reg lod, lod2, sample_index, mcs;
2805 switch (ir->op) {
2806 case ir_tex:
2807 lod = src_reg(0.0f);
2808 break;
2809 case ir_txf:
2810 case ir_txl:
2811 case ir_txs:
2812 ir->lod_info.lod->accept(this);
2813 lod = this->result;
2814 break;
2815 case ir_query_levels:
2816 lod = src_reg(0);
2817 break;
2818 case ir_txf_ms:
2819 ir->lod_info.sample_index->accept(this);
2820 sample_index = this->result;
2821
2822 if (devinfo->gen >= 7 && key->tex.compressed_multisample_layout_mask & (1<<sampler))
2823 mcs = emit_mcs_fetch(ir->coordinate->type, coordinate, sampler_reg);
2824 else
2825 mcs = src_reg(0u);
2826 break;
2827 case ir_txd:
2828 ir->lod_info.grad.dPdx->accept(this);
2829 lod = this->result;
2830
2831 ir->lod_info.grad.dPdy->accept(this);
2832 lod2 = this->result;
2833 break;
2834 case ir_txb:
2835 case ir_lod:
2836 case ir_tg4:
2837 break;
2838 }
2839
2840 uint32_t constant_offset = 0;
2841 if (ir->offset != NULL && !has_nonconstant_offset) {
2842 constant_offset =
2843 brw_texture_offset(ir->offset->as_constant()->value.i,
2844 ir->offset->type->vector_elements);
2845 }
2846
2847 /* Stuff the channel select bits in the top of the texture offset */
2848 if (ir->op == ir_tg4)
2849 constant_offset |=
2850 gather_channel( ir->lod_info.component->as_constant()->value.i[0],
2851 sampler) << 16;
2852
2853 glsl_type const *type = ir->sampler->type;
2854 bool is_cube_array = type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
2855 type->sampler_array;
2856
2857 this->result = src_reg(this, ir->type);
2858 dst_reg dest = dst_reg(this->result);
2859
2860 emit_texture(ir->op, dest, ir->type, coordinate, coord_components,
2861 shadow_comparitor,
2862 lod, lod2, sample_index,
2863 constant_offset, offset_value,
2864 mcs, is_cube_array, sampler, sampler_reg);
2865 }
2866
2867 /**
2868 * Apply workarounds for Gen6 gather with UINT/SINT
2869 */
2870 void
2871 vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst)
2872 {
2873 if (!wa)
2874 return;
2875
2876 int width = (wa & WA_8BIT) ? 8 : 16;
2877 dst_reg dst_f = dst;
2878 dst_f.type = BRW_REGISTER_TYPE_F;
2879
2880 /* Convert from UNORM to UINT */
2881 emit(MUL(dst_f, src_reg(dst_f), src_reg((float)((1 << width) - 1))));
2882 emit(MOV(dst, src_reg(dst_f)));
2883
2884 if (wa & WA_SIGN) {
2885 /* Reinterpret the UINT value as a signed INT value by
2886 * shifting the sign bit into place, then shifting back
2887 * preserving sign.
2888 */
2889 emit(SHL(dst, src_reg(dst), src_reg(32 - width)));
2890 emit(ASR(dst, src_reg(dst), src_reg(32 - width)));
2891 }
2892 }
2893
2894 /**
2895 * Set up the gather channel based on the swizzle, for gather4.
2896 */
2897 uint32_t
2898 vec4_visitor::gather_channel(unsigned gather_component, uint32_t sampler)
2899 {
2900 int swiz = GET_SWZ(key->tex.swizzles[sampler], gather_component);
2901 switch (swiz) {
2902 case SWIZZLE_X: return 0;
2903 case SWIZZLE_Y:
2904 /* gather4 sampler is broken for green channel on RG32F --
2905 * we must ask for blue instead.
2906 */
2907 if (key->tex.gather_channel_quirk_mask & (1<<sampler))
2908 return 2;
2909 return 1;
2910 case SWIZZLE_Z: return 2;
2911 case SWIZZLE_W: return 3;
2912 default:
2913 unreachable("Not reached"); /* zero, one swizzles handled already */
2914 }
2915 }
2916
2917 void
2918 vec4_visitor::swizzle_result(ir_texture_opcode op, dst_reg dest,
2919 src_reg orig_val, uint32_t sampler,
2920 const glsl_type *dest_type)
2921 {
2922 int s = key->tex.swizzles[sampler];
2923
2924 dst_reg swizzled_result = dest;
2925
2926 if (op == ir_query_levels) {
2927 /* # levels is in .w */
2928 orig_val.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
2929 emit(MOV(swizzled_result, orig_val));
2930 return;
2931 }
2932
2933 if (op == ir_txs || dest_type == glsl_type::float_type
2934 || s == SWIZZLE_NOOP || op == ir_tg4) {
2935 emit(MOV(swizzled_result, orig_val));
2936 return;
2937 }
2938
2939
2940 int zero_mask = 0, one_mask = 0, copy_mask = 0;
2941 int swizzle[4] = {0};
2942
2943 for (int i = 0; i < 4; i++) {
2944 switch (GET_SWZ(s, i)) {
2945 case SWIZZLE_ZERO:
2946 zero_mask |= (1 << i);
2947 break;
2948 case SWIZZLE_ONE:
2949 one_mask |= (1 << i);
2950 break;
2951 default:
2952 copy_mask |= (1 << i);
2953 swizzle[i] = GET_SWZ(s, i);
2954 break;
2955 }
2956 }
2957
2958 if (copy_mask) {
2959 orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
2960 swizzled_result.writemask = copy_mask;
2961 emit(MOV(swizzled_result, orig_val));
2962 }
2963
2964 if (zero_mask) {
2965 swizzled_result.writemask = zero_mask;
2966 emit(MOV(swizzled_result, src_reg(0.0f)));
2967 }
2968
2969 if (one_mask) {
2970 swizzled_result.writemask = one_mask;
2971 emit(MOV(swizzled_result, src_reg(1.0f)));
2972 }
2973 }
2974
2975 void
2976 vec4_visitor::visit(ir_return *)
2977 {
2978 unreachable("not reached");
2979 }
2980
2981 void
2982 vec4_visitor::visit(ir_discard *)
2983 {
2984 unreachable("not reached");
2985 }
2986
2987 void
2988 vec4_visitor::visit(ir_if *ir)
2989 {
2990 /* Don't point the annotation at the if statement, because then it plus
2991 * the then and else blocks get printed.
2992 */
2993 this->base_ir = ir->condition;
2994
2995 if (devinfo->gen == 6) {
2996 emit_if_gen6(ir);
2997 } else {
2998 enum brw_predicate predicate;
2999 emit_bool_to_cond_code(ir->condition, &predicate);
3000 emit(IF(predicate));
3001 }
3002
3003 visit_instructions(&ir->then_instructions);
3004
3005 if (!ir->else_instructions.is_empty()) {
3006 this->base_ir = ir->condition;
3007 emit(BRW_OPCODE_ELSE);
3008
3009 visit_instructions(&ir->else_instructions);
3010 }
3011
3012 this->base_ir = ir->condition;
3013 emit(BRW_OPCODE_ENDIF);
3014 }
3015
3016 void
3017 vec4_visitor::gs_emit_vertex(int stream_id)
3018 {
3019 unreachable("not reached");
3020 }
3021
3022 void
3023 vec4_visitor::visit(ir_emit_vertex *)
3024 {
3025 unreachable("not reached");
3026 }
3027
3028 void
3029 vec4_visitor::gs_end_primitive()
3030 {
3031 unreachable("not reached");
3032 }
3033
3034
3035 void
3036 vec4_visitor::visit(ir_end_primitive *)
3037 {
3038 unreachable("not reached");
3039 }
3040
3041 void
3042 vec4_visitor::visit(ir_barrier *)
3043 {
3044 unreachable("not reached");
3045 }
3046
3047 void
3048 vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
3049 dst_reg dst, src_reg offset,
3050 src_reg src0, src_reg src1)
3051 {
3052 unsigned mlen = 0;
3053
3054 /* Set the atomic operation offset. */
3055 emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), offset));
3056 mlen++;
3057
3058 /* Set the atomic operation arguments. */
3059 if (src0.file != BAD_FILE) {
3060 emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src0));
3061 mlen++;
3062 }
3063
3064 if (src1.file != BAD_FILE) {
3065 emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src1));
3066 mlen++;
3067 }
3068
3069 /* Emit the instruction. Note that this maps to the normal SIMD8
3070 * untyped atomic message on Ivy Bridge, but that's OK because
3071 * unused channels will be masked out.
3072 */
3073 vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst,
3074 brw_message_reg(0),
3075 src_reg(surf_index), src_reg(atomic_op));
3076 inst->mlen = mlen;
3077 }
3078
3079 void
3080 vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
3081 src_reg offset)
3082 {
3083 /* Set the surface read offset. */
3084 emit(MOV(brw_writemask(brw_uvec_mrf(8, 0, 0), WRITEMASK_X), offset));
3085
3086 /* Emit the instruction. Note that this maps to the normal SIMD8
3087 * untyped surface read message, but that's OK because unused
3088 * channels will be masked out.
3089 */
3090 vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst,
3091 brw_message_reg(0),
3092 src_reg(surf_index), src_reg(1));
3093 inst->mlen = 1;
3094 }
3095
3096 void
3097 vec4_visitor::emit_ndc_computation()
3098 {
3099 /* Get the position */
3100 src_reg pos = src_reg(output_reg[VARYING_SLOT_POS]);
3101
3102 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
3103 dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
3104 output_reg[BRW_VARYING_SLOT_NDC] = ndc;
3105
3106 current_annotation = "NDC";
3107 dst_reg ndc_w = ndc;
3108 ndc_w.writemask = WRITEMASK_W;
3109 src_reg pos_w = pos;
3110 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
3111 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
3112
3113 dst_reg ndc_xyz = ndc;
3114 ndc_xyz.writemask = WRITEMASK_XYZ;
3115
3116 emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
3117 }
3118
3119 void
3120 vec4_visitor::emit_psiz_and_flags(dst_reg reg)
3121 {
3122 if (devinfo->gen < 6 &&
3123 ((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) ||
3124 output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE ||
3125 devinfo->has_negative_rhw_bug)) {
3126 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
3127 dst_reg header1_w = header1;
3128 header1_w.writemask = WRITEMASK_W;
3129
3130 emit(MOV(header1, 0u));
3131
3132 if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
3133 src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]);
3134
3135 current_annotation = "Point size";
3136 emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
3137 emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
3138 }
3139
3140 if (output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE) {
3141 current_annotation = "Clipping flags";
3142 dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
3143 dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
3144
3145 emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), src_reg(0.0f), BRW_CONDITIONAL_L));
3146 emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, src_reg(0));
3147 emit(OR(header1_w, src_reg(header1_w), src_reg(flags0)));
3148
3149 emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), src_reg(0.0f), BRW_CONDITIONAL_L));
3150 emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, src_reg(0));
3151 emit(SHL(flags1, src_reg(flags1), src_reg(4)));
3152 emit(OR(header1_w, src_reg(header1_w), src_reg(flags1)));
3153 }
3154
3155 /* i965 clipping workaround:
3156 * 1) Test for -ve rhw
3157 * 2) If set,
3158 * set ndc = (0,0,0,0)
3159 * set ucp[6] = 1
3160 *
3161 * Later, clipping will detect ucp[6] and ensure the primitive is
3162 * clipped against all fixed planes.
3163 */
3164 if (devinfo->has_negative_rhw_bug) {
3165 src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]);
3166 ndc_w.swizzle = BRW_SWIZZLE_WWWW;
3167 emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L));
3168 vec4_instruction *inst;
3169 inst = emit(OR(header1_w, src_reg(header1_w), src_reg(1u << 6)));
3170 inst->predicate = BRW_PREDICATE_NORMAL;
3171 output_reg[BRW_VARYING_SLOT_NDC].type = BRW_REGISTER_TYPE_F;
3172 inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f)));
3173 inst->predicate = BRW_PREDICATE_NORMAL;
3174 }
3175
3176 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
3177 } else if (devinfo->gen < 6) {
3178 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
3179 } else {
3180 emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
3181 if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
3182 dst_reg reg_w = reg;
3183 reg_w.writemask = WRITEMASK_W;
3184 src_reg reg_as_src = src_reg(output_reg[VARYING_SLOT_PSIZ]);
3185 reg_as_src.type = reg_w.type;
3186 reg_as_src.swizzle = brw_swizzle_for_size(1);
3187 emit(MOV(reg_w, reg_as_src));
3188 }
3189 if (prog_data->vue_map.slots_valid & VARYING_BIT_LAYER) {
3190 dst_reg reg_y = reg;
3191 reg_y.writemask = WRITEMASK_Y;
3192 reg_y.type = BRW_REGISTER_TYPE_D;
3193 output_reg[VARYING_SLOT_LAYER].type = reg_y.type;
3194 emit(MOV(reg_y, src_reg(output_reg[VARYING_SLOT_LAYER])));
3195 }
3196 if (prog_data->vue_map.slots_valid & VARYING_BIT_VIEWPORT) {
3197 dst_reg reg_z = reg;
3198 reg_z.writemask = WRITEMASK_Z;
3199 reg_z.type = BRW_REGISTER_TYPE_D;
3200 output_reg[VARYING_SLOT_VIEWPORT].type = reg_z.type;
3201 emit(MOV(reg_z, src_reg(output_reg[VARYING_SLOT_VIEWPORT])));
3202 }
3203 }
3204 }
3205
3206 void
3207 vec4_visitor::emit_clip_distances(dst_reg reg, int offset)
3208 {
3209 /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
3210 *
3211 * "If a linked set of shaders forming the vertex stage contains no
3212 * static write to gl_ClipVertex or gl_ClipDistance, but the
3213 * application has requested clipping against user clip planes through
3214 * the API, then the coordinate written to gl_Position is used for
3215 * comparison against the user clip planes."
3216 *
3217 * This function is only called if the shader didn't write to
3218 * gl_ClipDistance. Accordingly, we use gl_ClipVertex to perform clipping
3219 * if the user wrote to it; otherwise we use gl_Position.
3220 */
3221 gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX;
3222 if (!(prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) {
3223 clip_vertex = VARYING_SLOT_POS;
3224 }
3225
3226 for (int i = 0; i + offset < key->nr_userclip_plane_consts && i < 4;
3227 ++i) {
3228 reg.writemask = 1 << i;
3229 emit(DP4(reg,
3230 src_reg(output_reg[clip_vertex]),
3231 src_reg(this->userplane[i + offset])));
3232 }
3233 }
3234
3235 vec4_instruction *
3236 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
3237 {
3238 assert(varying < VARYING_SLOT_MAX);
3239 assert(output_reg[varying].type == reg.type);
3240 current_annotation = output_reg_annotation[varying];
3241 /* Copy the register, saturating if necessary */
3242 return emit(MOV(reg, src_reg(output_reg[varying])));
3243 }
3244
3245 void
3246 vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
3247 {
3248 reg.type = BRW_REGISTER_TYPE_F;
3249 output_reg[varying].type = reg.type;
3250
3251 switch (varying) {
3252 case VARYING_SLOT_PSIZ:
3253 {
3254 /* PSIZ is always in slot 0, and is coupled with other flags. */
3255 current_annotation = "indices, point width, clip flags";
3256 emit_psiz_and_flags(reg);
3257 break;
3258 }
3259 case BRW_VARYING_SLOT_NDC:
3260 current_annotation = "NDC";
3261 emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
3262 break;
3263 case VARYING_SLOT_POS:
3264 current_annotation = "gl_Position";
3265 emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
3266 break;
3267 case VARYING_SLOT_EDGE:
3268 /* This is present when doing unfilled polygons. We're supposed to copy
3269 * the edge flag from the user-provided vertex array
3270 * (glEdgeFlagPointer), or otherwise we'll copy from the current value
3271 * of that attribute (starts as 1.0f). This is then used in clipping to
3272 * determine which edges should be drawn as wireframe.
3273 */
3274 current_annotation = "edge flag";
3275 emit(MOV(reg, src_reg(dst_reg(ATTR, VERT_ATTRIB_EDGEFLAG,
3276 glsl_type::float_type, WRITEMASK_XYZW))));
3277 break;
3278 case BRW_VARYING_SLOT_PAD:
3279 /* No need to write to this slot */
3280 break;
3281 case VARYING_SLOT_COL0:
3282 case VARYING_SLOT_COL1:
3283 case VARYING_SLOT_BFC0:
3284 case VARYING_SLOT_BFC1: {
3285 /* These built-in varyings are only supported in compatibility mode,
3286 * and we only support GS in core profile. So, this must be a vertex
3287 * shader.
3288 */
3289 assert(stage == MESA_SHADER_VERTEX);
3290 vec4_instruction *inst = emit_generic_urb_slot(reg, varying);
3291 if (((struct brw_vs_prog_key *) key)->clamp_vertex_color)
3292 inst->saturate = true;
3293 break;
3294 }
3295
3296 default:
3297 emit_generic_urb_slot(reg, varying);
3298 break;
3299 }
3300 }
3301
3302 static int
3303 align_interleaved_urb_mlen(const struct brw_device_info *devinfo, int mlen)
3304 {
3305 if (devinfo->gen >= 6) {
3306 /* URB data written (does not include the message header reg) must
3307 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
3308 * section 5.4.3.2.2: URB_INTERLEAVED.
3309 *
3310 * URB entries are allocated on a multiple of 1024 bits, so an
3311 * extra 128 bits written here to make the end align to 256 is
3312 * no problem.
3313 */
3314 if ((mlen % 2) != 1)
3315 mlen++;
3316 }
3317
3318 return mlen;
3319 }
3320
3321
3322 /**
3323 * Generates the VUE payload plus the necessary URB write instructions to
3324 * output it.
3325 *
3326 * The VUE layout is documented in Volume 2a.
3327 */
3328 void
3329 vec4_visitor::emit_vertex()
3330 {
3331 /* MRF 0 is reserved for the debugger, so start with message header
3332 * in MRF 1.
3333 */
3334 int base_mrf = 1;
3335 int mrf = base_mrf;
3336 /* In the process of generating our URB write message contents, we
3337 * may need to unspill a register or load from an array. Those
3338 * reads would use MRFs 14-15.
3339 */
3340 int max_usable_mrf = 13;
3341
3342 /* The following assertion verifies that max_usable_mrf causes an
3343 * even-numbered amount of URB write data, which will meet gen6's
3344 * requirements for length alignment.
3345 */
3346 assert ((max_usable_mrf - base_mrf) % 2 == 0);
3347
3348 /* First mrf is the g0-based message header containing URB handles and
3349 * such.
3350 */
3351 emit_urb_write_header(mrf++);
3352
3353 if (devinfo->gen < 6) {
3354 emit_ndc_computation();
3355 }
3356
3357 /* Lower legacy ff and ClipVertex clipping to clip distances */
3358 if (key->nr_userclip_plane_consts > 0) {
3359 current_annotation = "user clip distances";
3360
3361 output_reg[VARYING_SLOT_CLIP_DIST0] = dst_reg(this, glsl_type::vec4_type);
3362 output_reg[VARYING_SLOT_CLIP_DIST1] = dst_reg(this, glsl_type::vec4_type);
3363
3364 emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST0], 0);
3365 emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1], 4);
3366 }
3367
3368 /* We may need to split this up into several URB writes, so do them in a
3369 * loop.
3370 */
3371 int slot = 0;
3372 bool complete = false;
3373 do {
3374 /* URB offset is in URB row increments, and each of our MRFs is half of
3375 * one of those, since we're doing interleaved writes.
3376 */
3377 int offset = slot / 2;
3378
3379 mrf = base_mrf + 1;
3380 for (; slot < prog_data->vue_map.num_slots; ++slot) {
3381 emit_urb_slot(dst_reg(MRF, mrf++),
3382 prog_data->vue_map.slot_to_varying[slot]);
3383
3384 /* If this was max_usable_mrf, we can't fit anything more into this
3385 * URB WRITE.
3386 */
3387 if (mrf > max_usable_mrf) {
3388 slot++;
3389 break;
3390 }
3391 }
3392
3393 complete = slot >= prog_data->vue_map.num_slots;
3394 current_annotation = "URB write";
3395 vec4_instruction *inst = emit_urb_write_opcode(complete);
3396 inst->base_mrf = base_mrf;
3397 inst->mlen = align_interleaved_urb_mlen(devinfo, mrf - base_mrf);
3398 inst->offset += offset;
3399 } while(!complete);
3400 }
3401
3402
3403 src_reg
3404 vec4_visitor::get_scratch_offset(bblock_t *block, vec4_instruction *inst,
3405 src_reg *reladdr, int reg_offset)
3406 {
3407 /* Because we store the values to scratch interleaved like our
3408 * vertex data, we need to scale the vec4 index by 2.
3409 */
3410 int message_header_scale = 2;
3411
3412 /* Pre-gen6, the message header uses byte offsets instead of vec4
3413 * (16-byte) offset units.
3414 */
3415 if (devinfo->gen < 6)
3416 message_header_scale *= 16;
3417
3418 if (reladdr) {
3419 src_reg index = src_reg(this, glsl_type::int_type);
3420
3421 emit_before(block, inst, ADD(dst_reg(index), *reladdr,
3422 src_reg(reg_offset)));
3423 emit_before(block, inst, MUL(dst_reg(index), index,
3424 src_reg(message_header_scale)));
3425
3426 return index;
3427 } else {
3428 return src_reg(reg_offset * message_header_scale);
3429 }
3430 }
3431
3432 src_reg
3433 vec4_visitor::get_pull_constant_offset(bblock_t * block, vec4_instruction *inst,
3434 src_reg *reladdr, int reg_offset)
3435 {
3436 if (reladdr) {
3437 src_reg index = src_reg(this, glsl_type::int_type);
3438
3439 emit_before(block, inst, ADD(dst_reg(index), *reladdr,
3440 src_reg(reg_offset)));
3441
3442 /* Pre-gen6, the message header uses byte offsets instead of vec4
3443 * (16-byte) offset units.
3444 */
3445 if (devinfo->gen < 6) {
3446 emit_before(block, inst, MUL(dst_reg(index), index, src_reg(16)));
3447 }
3448
3449 return index;
3450 } else if (devinfo->gen >= 8) {
3451 /* Store the offset in a GRF so we can send-from-GRF. */
3452 src_reg offset = src_reg(this, glsl_type::int_type);
3453 emit_before(block, inst, MOV(dst_reg(offset), src_reg(reg_offset)));
3454 return offset;
3455 } else {
3456 int message_header_scale = devinfo->gen < 6 ? 16 : 1;
3457 return src_reg(reg_offset * message_header_scale);
3458 }
3459 }
3460
3461 /**
3462 * Emits an instruction before @inst to load the value named by @orig_src
3463 * from scratch space at @base_offset to @temp.
3464 *
3465 * @base_offset is measured in 32-byte units (the size of a register).
3466 */
3467 void
3468 vec4_visitor::emit_scratch_read(bblock_t *block, vec4_instruction *inst,
3469 dst_reg temp, src_reg orig_src,
3470 int base_offset)
3471 {
3472 int reg_offset = base_offset + orig_src.reg_offset;
3473 src_reg index = get_scratch_offset(block, inst, orig_src.reladdr,
3474 reg_offset);
3475
3476 emit_before(block, inst, SCRATCH_READ(temp, index));
3477 }
3478
3479 /**
3480 * Emits an instruction after @inst to store the value to be written
3481 * to @orig_dst to scratch space at @base_offset, from @temp.
3482 *
3483 * @base_offset is measured in 32-byte units (the size of a register).
3484 */
3485 void
3486 vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst,
3487 int base_offset)
3488 {
3489 int reg_offset = base_offset + inst->dst.reg_offset;
3490 src_reg index = get_scratch_offset(block, inst, inst->dst.reladdr,
3491 reg_offset);
3492
3493 /* Create a temporary register to store *inst's result in.
3494 *
3495 * We have to be careful in MOVing from our temporary result register in
3496 * the scratch write. If we swizzle from channels of the temporary that
3497 * weren't initialized, it will confuse live interval analysis, which will
3498 * make spilling fail to make progress.
3499 */
3500 const src_reg temp = swizzle(retype(src_reg(this, glsl_type::vec4_type),
3501 inst->dst.type),
3502 brw_swizzle_for_mask(inst->dst.writemask));
3503 dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
3504 inst->dst.writemask));
3505 vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
3506 if (inst->opcode != BRW_OPCODE_SEL)
3507 write->predicate = inst->predicate;
3508 write->ir = inst->ir;
3509 write->annotation = inst->annotation;
3510 inst->insert_after(block, write);
3511
3512 inst->dst.file = temp.file;
3513 inst->dst.reg = temp.reg;
3514 inst->dst.reg_offset = temp.reg_offset;
3515 inst->dst.reladdr = NULL;
3516 }
3517
3518 /**
3519 * Checks if \p src and/or \p src.reladdr require a scratch read, and if so,
3520 * adds the scratch read(s) before \p inst. The function also checks for
3521 * recursive reladdr scratch accesses, issuing the corresponding scratch
3522 * loads and rewriting reladdr references accordingly.
3523 *
3524 * \return \p src if it did not require a scratch load, otherwise, the
3525 * register holding the result of the scratch load that the caller should
3526 * use to rewrite src.
3527 */
3528 src_reg
3529 vec4_visitor::emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
3530 vec4_instruction *inst, src_reg src)
3531 {
3532 /* Resolve recursive reladdr scratch access by calling ourselves
3533 * with src.reladdr
3534 */
3535 if (src.reladdr)
3536 *src.reladdr = emit_resolve_reladdr(scratch_loc, block, inst,
3537 *src.reladdr);
3538
3539 /* Now handle scratch access on src */
3540 if (src.file == GRF && scratch_loc[src.reg] != -1) {
3541 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
3542 emit_scratch_read(block, inst, temp, src, scratch_loc[src.reg]);
3543 src.reg = temp.reg;
3544 src.reg_offset = temp.reg_offset;
3545 src.reladdr = NULL;
3546 }
3547
3548 return src;
3549 }
3550
3551 /**
3552 * We can't generally support array access in GRF space, because a
3553 * single instruction's destination can only span 2 contiguous
3554 * registers. So, we send all GRF arrays that get variable index
3555 * access to scratch space.
3556 */
3557 void
3558 vec4_visitor::move_grf_array_access_to_scratch()
3559 {
3560 int scratch_loc[this->alloc.count];
3561 memset(scratch_loc, -1, sizeof(scratch_loc));
3562
3563 /* First, calculate the set of virtual GRFs that need to be punted
3564 * to scratch due to having any array access on them, and where in
3565 * scratch.
3566 */
3567 foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
3568 if (inst->dst.file == GRF && inst->dst.reladdr) {
3569 if (scratch_loc[inst->dst.reg] == -1) {
3570 scratch_loc[inst->dst.reg] = last_scratch;
3571 last_scratch += this->alloc.sizes[inst->dst.reg];
3572 }
3573
3574 for (src_reg *iter = inst->dst.reladdr;
3575 iter->reladdr;
3576 iter = iter->reladdr) {
3577 if (iter->file == GRF && scratch_loc[iter->reg] == -1) {
3578 scratch_loc[iter->reg] = last_scratch;
3579 last_scratch += this->alloc.sizes[iter->reg];
3580 }
3581 }
3582 }
3583
3584 for (int i = 0 ; i < 3; i++) {
3585 for (src_reg *iter = &inst->src[i];
3586 iter->reladdr;
3587 iter = iter->reladdr) {
3588 if (iter->file == GRF && scratch_loc[iter->reg] == -1) {
3589 scratch_loc[iter->reg] = last_scratch;
3590 last_scratch += this->alloc.sizes[iter->reg];
3591 }
3592 }
3593 }
3594 }
3595
3596 /* Now, for anything that will be accessed through scratch, rewrite
3597 * it to load/store. Note that this is a _safe list walk, because
3598 * we may generate a new scratch_write instruction after the one
3599 * we're processing.
3600 */
3601 foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
3602 /* Set up the annotation tracking for new generated instructions. */
3603 base_ir = inst->ir;
3604 current_annotation = inst->annotation;
3605
3606 /* First handle scratch access on the dst. Notice we have to handle
3607 * the case where the dst's reladdr also points to scratch space.
3608 */
3609 if (inst->dst.reladdr)
3610 *inst->dst.reladdr = emit_resolve_reladdr(scratch_loc, block, inst,
3611 *inst->dst.reladdr);
3612
3613 /* Now that we have handled any (possibly recursive) reladdr scratch
3614 * accesses for dst we can safely do the scratch write for dst itself
3615 */
3616 if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1)
3617 emit_scratch_write(block, inst, scratch_loc[inst->dst.reg]);
3618
3619 /* Now handle scratch access on any src. In this case, since inst->src[i]
3620 * already is a src_reg, we can just call emit_resolve_reladdr with
3621 * inst->src[i] and it will take care of handling scratch loads for
3622 * both src and src.reladdr (recursively).
3623 */
3624 for (int i = 0 ; i < 3; i++) {
3625 inst->src[i] = emit_resolve_reladdr(scratch_loc, block, inst,
3626 inst->src[i]);
3627 }
3628 }
3629 }
3630
3631 /**
3632 * Emits an instruction before @inst to load the value named by @orig_src
3633 * from the pull constant buffer (surface) at @base_offset to @temp.
3634 */
3635 void
3636 vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
3637 dst_reg temp, src_reg orig_src,
3638 int base_offset)
3639 {
3640 int reg_offset = base_offset + orig_src.reg_offset;
3641 src_reg index = src_reg(prog_data->base.binding_table.pull_constants_start);
3642 src_reg offset = get_pull_constant_offset(block, inst, orig_src.reladdr,
3643 reg_offset);
3644
3645 emit_pull_constant_load_reg(temp,
3646 index,
3647 offset,
3648 block, inst);
3649 }
3650
3651 /**
3652 * Implements array access of uniforms by inserting a
3653 * PULL_CONSTANT_LOAD instruction.
3654 *
3655 * Unlike temporary GRF array access (where we don't support it due to
3656 * the difficulty of doing relative addressing on instruction
3657 * destinations), we could potentially do array access of uniforms
3658 * that were loaded in GRF space as push constants. In real-world
3659 * usage we've seen, though, the arrays being used are always larger
3660 * than we could load as push constants, so just always move all
3661 * uniform array access out to a pull constant buffer.
3662 */
3663 void
3664 vec4_visitor::move_uniform_array_access_to_pull_constants()
3665 {
3666 int pull_constant_loc[this->uniforms];
3667 memset(pull_constant_loc, -1, sizeof(pull_constant_loc));
3668 bool nested_reladdr;
3669
3670 /* Walk through and find array access of uniforms. Put a copy of that
3671 * uniform in the pull constant buffer.
3672 *
3673 * Note that we don't move constant-indexed accesses to arrays. No
3674 * testing has been done of the performance impact of this choice.
3675 */
3676 do {
3677 nested_reladdr = false;
3678
3679 foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
3680 for (int i = 0 ; i < 3; i++) {
3681 if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
3682 continue;
3683
3684 int uniform = inst->src[i].reg;
3685
3686 if (inst->src[i].reladdr->reladdr)
3687 nested_reladdr = true; /* will need another pass */
3688
3689 /* If this array isn't already present in the pull constant buffer,
3690 * add it.
3691 */
3692 if (pull_constant_loc[uniform] == -1) {
3693 const gl_constant_value **values =
3694 &stage_prog_data->param[uniform * 4];
3695
3696 pull_constant_loc[uniform] = stage_prog_data->nr_pull_params / 4;
3697
3698 assert(uniform < uniform_array_size);
3699 for (int j = 0; j < uniform_size[uniform] * 4; j++) {
3700 stage_prog_data->pull_param[stage_prog_data->nr_pull_params++]
3701 = values[j];
3702 }
3703 }
3704
3705 /* Set up the annotation tracking for new generated instructions. */
3706 base_ir = inst->ir;
3707 current_annotation = inst->annotation;
3708
3709 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
3710
3711 emit_pull_constant_load(block, inst, temp, inst->src[i],
3712 pull_constant_loc[uniform]);
3713
3714 inst->src[i].file = temp.file;
3715 inst->src[i].reg = temp.reg;
3716 inst->src[i].reg_offset = temp.reg_offset;
3717 inst->src[i].reladdr = NULL;
3718 }
3719 }
3720 } while (nested_reladdr);
3721
3722 /* Now there are no accesses of the UNIFORM file with a reladdr, so
3723 * no need to track them as larger-than-vec4 objects. This will be
3724 * relied on in cutting out unused uniform vectors from push
3725 * constants.
3726 */
3727 split_uniform_registers();
3728 }
3729
3730 void
3731 vec4_visitor::resolve_ud_negate(src_reg *reg)
3732 {
3733 if (reg->type != BRW_REGISTER_TYPE_UD ||
3734 !reg->negate)
3735 return;
3736
3737 src_reg temp = src_reg(this, glsl_type::uvec4_type);
3738 emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
3739 *reg = temp;
3740 }
3741
3742 /**
3743 * Resolve the result of a Gen4-5 CMP instruction to a proper boolean.
3744 *
3745 * CMP on Gen4-5 only sets the LSB of the result; the rest are undefined.
3746 * If we need a proper boolean value, we have to fix it up to be 0 or ~0.
3747 */
3748 void
3749 vec4_visitor::resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg)
3750 {
3751 assert(devinfo->gen <= 5);
3752
3753 if (!rvalue->type->is_boolean())
3754 return;
3755
3756 src_reg and_result = src_reg(this, rvalue->type);
3757 src_reg neg_result = src_reg(this, rvalue->type);
3758 emit(AND(dst_reg(and_result), *reg, src_reg(1)));
3759 emit(MOV(dst_reg(neg_result), negate(and_result)));
3760 *reg = neg_result;
3761 }
3762
3763 vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
3764 void *log_data,
3765 struct gl_program *prog,
3766 const struct brw_vue_prog_key *key,
3767 struct brw_vue_prog_data *prog_data,
3768 struct gl_shader_program *shader_prog,
3769 gl_shader_stage stage,
3770 void *mem_ctx,
3771 bool no_spills,
3772 int shader_time_index)
3773 : backend_shader(compiler, log_data, mem_ctx,
3774 shader_prog, prog, &prog_data->base, stage),
3775 key(key),
3776 prog_data(prog_data),
3777 sanity_param_count(0),
3778 fail_msg(NULL),
3779 first_non_payload_grf(0),
3780 need_all_constants_in_pull_buffer(false),
3781 no_spills(no_spills),
3782 shader_time_index(shader_time_index),
3783 last_scratch(0)
3784 {
3785 this->failed = false;
3786
3787 this->base_ir = NULL;
3788 this->current_annotation = NULL;
3789 memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
3790
3791 this->variable_ht = hash_table_ctor(0,
3792 hash_table_pointer_hash,
3793 hash_table_pointer_compare);
3794
3795 this->virtual_grf_start = NULL;
3796 this->virtual_grf_end = NULL;
3797 this->live_intervals = NULL;
3798
3799 this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
3800
3801 this->uniforms = 0;
3802
3803 /* Initialize uniform_array_size to at least 1 because pre-gen6 VS requires
3804 * at least one. See setup_uniforms() in brw_vec4.cpp.
3805 */
3806 this->uniform_array_size = 1;
3807 if (prog_data) {
3808 this->uniform_array_size =
3809 MAX2(DIV_ROUND_UP(stage_prog_data->nr_params, 4), 1);
3810 }
3811
3812 this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
3813 this->uniform_vector_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
3814 }
3815
3816 vec4_visitor::~vec4_visitor()
3817 {
3818 hash_table_dtor(this->variable_ht);
3819 }
3820
3821
3822 void
3823 vec4_visitor::fail(const char *format, ...)
3824 {
3825 va_list va;
3826 char *msg;
3827
3828 if (failed)
3829 return;
3830
3831 failed = true;
3832
3833 va_start(va, format);
3834 msg = ralloc_vasprintf(mem_ctx, format, va);
3835 va_end(va);
3836 msg = ralloc_asprintf(mem_ctx, "%s compile failed: %s\n", stage_abbrev, msg);
3837
3838 this->fail_msg = msg;
3839
3840 if (debug_enabled) {
3841 fprintf(stderr, "%s", msg);
3842 }
3843 }
3844
3845 } /* namespace brw */