af01d8eed96dff18f58f9b3598654f776eafbeb9
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_visitor.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25 #include "brw_cfg.h"
26 #include "glsl/ir_uniform.h"
27 #include "program/sampler.h"
28
29 #define FIRST_SPILL_MRF(gen) (gen == 6 ? 21 : 13)
30
31 namespace brw {
32
33 vec4_instruction::vec4_instruction(enum opcode opcode, const dst_reg &dst,
34 const src_reg &src0, const src_reg &src1,
35 const src_reg &src2)
36 {
37 this->opcode = opcode;
38 this->dst = dst;
39 this->src[0] = src0;
40 this->src[1] = src1;
41 this->src[2] = src2;
42 this->saturate = false;
43 this->force_writemask_all = false;
44 this->no_dd_clear = false;
45 this->no_dd_check = false;
46 this->writes_accumulator = false;
47 this->conditional_mod = BRW_CONDITIONAL_NONE;
48 this->predicate = BRW_PREDICATE_NONE;
49 this->predicate_inverse = false;
50 this->target = 0;
51 this->regs_written = (dst.file == BAD_FILE ? 0 : 1);
52 this->shadow_compare = false;
53 this->ir = NULL;
54 this->urb_write_flags = BRW_URB_WRITE_NO_FLAGS;
55 this->header_size = 0;
56 this->flag_subreg = 0;
57 this->mlen = 0;
58 this->base_mrf = 0;
59 this->offset = 0;
60 this->annotation = NULL;
61 }
62
63 vec4_instruction *
64 vec4_visitor::emit(vec4_instruction *inst)
65 {
66 inst->ir = this->base_ir;
67 inst->annotation = this->current_annotation;
68
69 this->instructions.push_tail(inst);
70
71 return inst;
72 }
73
74 vec4_instruction *
75 vec4_visitor::emit_before(bblock_t *block, vec4_instruction *inst,
76 vec4_instruction *new_inst)
77 {
78 new_inst->ir = inst->ir;
79 new_inst->annotation = inst->annotation;
80
81 inst->insert_before(block, new_inst);
82
83 return inst;
84 }
85
86 vec4_instruction *
87 vec4_visitor::emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
88 const src_reg &src1, const src_reg &src2)
89 {
90 return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0, src1, src2));
91 }
92
93
94 vec4_instruction *
95 vec4_visitor::emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
96 const src_reg &src1)
97 {
98 return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0, src1));
99 }
100
101 vec4_instruction *
102 vec4_visitor::emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0)
103 {
104 return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0));
105 }
106
107 vec4_instruction *
108 vec4_visitor::emit(enum opcode opcode, const dst_reg &dst)
109 {
110 return emit(new(mem_ctx) vec4_instruction(opcode, dst));
111 }
112
113 vec4_instruction *
114 vec4_visitor::emit(enum opcode opcode)
115 {
116 return emit(new(mem_ctx) vec4_instruction(opcode, dst_reg()));
117 }
118
119 #define ALU1(op) \
120 vec4_instruction * \
121 vec4_visitor::op(const dst_reg &dst, const src_reg &src0) \
122 { \
123 return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, src0); \
124 }
125
126 #define ALU2(op) \
127 vec4_instruction * \
128 vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
129 const src_reg &src1) \
130 { \
131 return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, \
132 src0, src1); \
133 }
134
135 #define ALU2_ACC(op) \
136 vec4_instruction * \
137 vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
138 const src_reg &src1) \
139 { \
140 vec4_instruction *inst = new(mem_ctx) vec4_instruction( \
141 BRW_OPCODE_##op, dst, src0, src1); \
142 inst->writes_accumulator = true; \
143 return inst; \
144 }
145
146 #define ALU3(op) \
147 vec4_instruction * \
148 vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
149 const src_reg &src1, const src_reg &src2) \
150 { \
151 assert(devinfo->gen >= 6); \
152 return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, \
153 src0, src1, src2); \
154 }
155
156 ALU1(NOT)
157 ALU1(MOV)
158 ALU1(FRC)
159 ALU1(RNDD)
160 ALU1(RNDE)
161 ALU1(RNDZ)
162 ALU1(F32TO16)
163 ALU1(F16TO32)
164 ALU2(ADD)
165 ALU2(MUL)
166 ALU2_ACC(MACH)
167 ALU2(AND)
168 ALU2(OR)
169 ALU2(XOR)
170 ALU2(DP3)
171 ALU2(DP4)
172 ALU2(DPH)
173 ALU2(SHL)
174 ALU2(SHR)
175 ALU2(ASR)
176 ALU3(LRP)
177 ALU1(BFREV)
178 ALU3(BFE)
179 ALU2(BFI1)
180 ALU3(BFI2)
181 ALU1(FBH)
182 ALU1(FBL)
183 ALU1(CBIT)
184 ALU3(MAD)
185 ALU2_ACC(ADDC)
186 ALU2_ACC(SUBB)
187 ALU2(MAC)
188
189 /** Gen4 predicated IF. */
190 vec4_instruction *
191 vec4_visitor::IF(enum brw_predicate predicate)
192 {
193 vec4_instruction *inst;
194
195 inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_IF);
196 inst->predicate = predicate;
197
198 return inst;
199 }
200
201 /** Gen6 IF with embedded comparison. */
202 vec4_instruction *
203 vec4_visitor::IF(src_reg src0, src_reg src1,
204 enum brw_conditional_mod condition)
205 {
206 assert(devinfo->gen == 6);
207
208 vec4_instruction *inst;
209
210 resolve_ud_negate(&src0);
211 resolve_ud_negate(&src1);
212
213 inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_IF, dst_null_d(),
214 src0, src1);
215 inst->conditional_mod = condition;
216
217 return inst;
218 }
219
220 /**
221 * CMP: Sets the low bit of the destination channels with the result
222 * of the comparison, while the upper bits are undefined, and updates
223 * the flag register with the packed 16 bits of the result.
224 */
225 vec4_instruction *
226 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1,
227 enum brw_conditional_mod condition)
228 {
229 vec4_instruction *inst;
230
231 /* Take the instruction:
232 *
233 * CMP null<d> src0<f> src1<f>
234 *
235 * Original gen4 does type conversion to the destination type before
236 * comparison, producing garbage results for floating point comparisons.
237 *
238 * The destination type doesn't matter on newer generations, so we set the
239 * type to match src0 so we can compact the instruction.
240 */
241 dst.type = src0.type;
242 if (dst.file == HW_REG)
243 dst.fixed_hw_reg.type = dst.type;
244
245 resolve_ud_negate(&src0);
246 resolve_ud_negate(&src1);
247
248 inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_CMP, dst, src0, src1);
249 inst->conditional_mod = condition;
250
251 return inst;
252 }
253
254 vec4_instruction *
255 vec4_visitor::SCRATCH_READ(const dst_reg &dst, const src_reg &index)
256 {
257 vec4_instruction *inst;
258
259 inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_READ,
260 dst, index);
261 inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
262 inst->mlen = 2;
263
264 return inst;
265 }
266
267 vec4_instruction *
268 vec4_visitor::SCRATCH_WRITE(const dst_reg &dst, const src_reg &src,
269 const src_reg &index)
270 {
271 vec4_instruction *inst;
272
273 inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_WRITE,
274 dst, src, index);
275 inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen);
276 inst->mlen = 3;
277
278 return inst;
279 }
280
281 void
282 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
283 {
284 static enum opcode dot_opcodes[] = {
285 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
286 };
287
288 emit(dot_opcodes[elements - 2], dst, src0, src1);
289 }
290
291 src_reg
292 vec4_visitor::fix_3src_operand(const src_reg &src)
293 {
294 /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
295 * able to use vertical stride of zero to replicate the vec4 uniform, like
296 *
297 * g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
298 *
299 * But you can't, since vertical stride is always four in three-source
300 * instructions. Instead, insert a MOV instruction to do the replication so
301 * that the three-source instruction can consume it.
302 */
303
304 /* The MOV is only needed if the source is a uniform or immediate. */
305 if (src.file != UNIFORM && src.file != IMM)
306 return src;
307
308 if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle))
309 return src;
310
311 dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
312 expanded.type = src.type;
313 emit(VEC4_OPCODE_UNPACK_UNIFORM, expanded, src);
314 return src_reg(expanded);
315 }
316
317 src_reg
318 vec4_visitor::resolve_source_modifiers(const src_reg &src)
319 {
320 if (!src.abs && !src.negate)
321 return src;
322
323 dst_reg resolved = dst_reg(this, glsl_type::ivec4_type);
324 resolved.type = src.type;
325 emit(MOV(resolved, src));
326
327 return src_reg(resolved);
328 }
329
330 src_reg
331 vec4_visitor::fix_math_operand(const src_reg &src)
332 {
333 if (devinfo->gen < 6 || devinfo->gen >= 8 || src.file == BAD_FILE)
334 return src;
335
336 /* The gen6 math instruction ignores the source modifiers --
337 * swizzle, abs, negate, and at least some parts of the register
338 * region description.
339 *
340 * Rather than trying to enumerate all these cases, *always* expand the
341 * operand to a temp GRF for gen6.
342 *
343 * For gen7, keep the operand as-is, except if immediate, which gen7 still
344 * can't use.
345 */
346
347 if (devinfo->gen == 7 && src.file != IMM)
348 return src;
349
350 dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
351 expanded.type = src.type;
352 emit(MOV(expanded, src));
353 return src_reg(expanded);
354 }
355
356 vec4_instruction *
357 vec4_visitor::emit_math(enum opcode opcode,
358 const dst_reg &dst,
359 const src_reg &src0, const src_reg &src1)
360 {
361 vec4_instruction *math =
362 emit(opcode, dst, fix_math_operand(src0), fix_math_operand(src1));
363
364 if (devinfo->gen == 6 && dst.writemask != WRITEMASK_XYZW) {
365 /* MATH on Gen6 must be align1, so we can't do writemasks. */
366 math->dst = dst_reg(this, glsl_type::vec4_type);
367 math->dst.type = dst.type;
368 math = emit(MOV(dst, src_reg(math->dst)));
369 } else if (devinfo->gen < 6) {
370 math->base_mrf = 1;
371 math->mlen = src1.file == BAD_FILE ? 1 : 2;
372 }
373
374 return math;
375 }
376
377 void
378 vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0)
379 {
380 if (devinfo->gen < 7) {
381 unreachable("ir_unop_pack_half_2x16 should be lowered");
382 }
383
384 assert(dst.type == BRW_REGISTER_TYPE_UD);
385 assert(src0.type == BRW_REGISTER_TYPE_F);
386
387 /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
388 *
389 * Because this instruction does not have a 16-bit floating-point type,
390 * the destination data type must be Word (W).
391 *
392 * The destination must be DWord-aligned and specify a horizontal stride
393 * (HorzStride) of 2. The 16-bit result is stored in the lower word of
394 * each destination channel and the upper word is not modified.
395 *
396 * The above restriction implies that the f32to16 instruction must use
397 * align1 mode, because only in align1 mode is it possible to specify
398 * horizontal stride. We choose here to defy the hardware docs and emit
399 * align16 instructions.
400 *
401 * (I [chadv] did attempt to emit align1 instructions for VS f32to16
402 * instructions. I was partially successful in that the code passed all
403 * tests. However, the code was dubiously correct and fragile, and the
404 * tests were not harsh enough to probe that frailty. Not trusting the
405 * code, I chose instead to remain in align16 mode in defiance of the hw
406 * docs).
407 *
408 * I've [chadv] experimentally confirmed that, on gen7 hardware and the
409 * simulator, emitting a f32to16 in align16 mode with UD as destination
410 * data type is safe. The behavior differs from that specified in the PRM
411 * in that the upper word of each destination channel is cleared to 0.
412 */
413
414 dst_reg tmp_dst(this, glsl_type::uvec2_type);
415 src_reg tmp_src(tmp_dst);
416
417 #if 0
418 /* Verify the undocumented behavior on which the following instructions
419 * rely. If f32to16 fails to clear the upper word of the X and Y channels,
420 * then the result of the bit-or instruction below will be incorrect.
421 *
422 * You should inspect the disasm output in order to verify that the MOV is
423 * not optimized away.
424 */
425 emit(MOV(tmp_dst, src_reg(0x12345678u)));
426 #endif
427
428 /* Give tmp the form below, where "." means untouched.
429 *
430 * w z y x w z y x
431 * |.|.|0x0000hhhh|0x0000llll|.|.|0x0000hhhh|0x0000llll|
432 *
433 * That the upper word of each write-channel be 0 is required for the
434 * following bit-shift and bit-or instructions to work. Note that this
435 * relies on the undocumented hardware behavior mentioned above.
436 */
437 tmp_dst.writemask = WRITEMASK_XY;
438 emit(F32TO16(tmp_dst, src0));
439
440 /* Give the write-channels of dst the form:
441 * 0xhhhh0000
442 */
443 tmp_src.swizzle = BRW_SWIZZLE_YYYY;
444 emit(SHL(dst, tmp_src, src_reg(16u)));
445
446 /* Finally, give the write-channels of dst the form of packHalf2x16's
447 * output:
448 * 0xhhhhllll
449 */
450 tmp_src.swizzle = BRW_SWIZZLE_XXXX;
451 emit(OR(dst, src_reg(dst), tmp_src));
452 }
453
454 void
455 vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0)
456 {
457 if (devinfo->gen < 7) {
458 unreachable("ir_unop_unpack_half_2x16 should be lowered");
459 }
460
461 assert(dst.type == BRW_REGISTER_TYPE_F);
462 assert(src0.type == BRW_REGISTER_TYPE_UD);
463
464 /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
465 *
466 * Because this instruction does not have a 16-bit floating-point type,
467 * the source data type must be Word (W). The destination type must be
468 * F (Float).
469 *
470 * To use W as the source data type, we must adjust horizontal strides,
471 * which is only possible in align1 mode. All my [chadv] attempts at
472 * emitting align1 instructions for unpackHalf2x16 failed to pass the
473 * Piglit tests, so I gave up.
474 *
475 * I've verified that, on gen7 hardware and the simulator, it is safe to
476 * emit f16to32 in align16 mode with UD as source data type.
477 */
478
479 dst_reg tmp_dst(this, glsl_type::uvec2_type);
480 src_reg tmp_src(tmp_dst);
481
482 tmp_dst.writemask = WRITEMASK_X;
483 emit(AND(tmp_dst, src0, src_reg(0xffffu)));
484
485 tmp_dst.writemask = WRITEMASK_Y;
486 emit(SHR(tmp_dst, src0, src_reg(16u)));
487
488 dst.writemask = WRITEMASK_XY;
489 emit(F16TO32(dst, tmp_src));
490 }
491
492 void
493 vec4_visitor::emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0)
494 {
495 /* Instead of splitting the 32-bit integer, shifting, and ORing it back
496 * together, we can shift it by <0, 8, 16, 24>. The packed integer immediate
497 * is not suitable to generate the shift values, but we can use the packed
498 * vector float and a type-converting MOV.
499 */
500 dst_reg shift(this, glsl_type::uvec4_type);
501 emit(MOV(shift, src_reg(0x00, 0x60, 0x70, 0x78)));
502
503 dst_reg shifted(this, glsl_type::uvec4_type);
504 src0.swizzle = BRW_SWIZZLE_XXXX;
505 emit(SHR(shifted, src0, src_reg(shift)));
506
507 shifted.type = BRW_REGISTER_TYPE_UB;
508 dst_reg f(this, glsl_type::vec4_type);
509 emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted));
510
511 emit(MUL(dst, src_reg(f), src_reg(1.0f / 255.0f)));
512 }
513
514 void
515 vec4_visitor::emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0)
516 {
517 /* Instead of splitting the 32-bit integer, shifting, and ORing it back
518 * together, we can shift it by <0, 8, 16, 24>. The packed integer immediate
519 * is not suitable to generate the shift values, but we can use the packed
520 * vector float and a type-converting MOV.
521 */
522 dst_reg shift(this, glsl_type::uvec4_type);
523 emit(MOV(shift, src_reg(0x00, 0x60, 0x70, 0x78)));
524
525 dst_reg shifted(this, glsl_type::uvec4_type);
526 src0.swizzle = BRW_SWIZZLE_XXXX;
527 emit(SHR(shifted, src0, src_reg(shift)));
528
529 shifted.type = BRW_REGISTER_TYPE_B;
530 dst_reg f(this, glsl_type::vec4_type);
531 emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted));
532
533 dst_reg scaled(this, glsl_type::vec4_type);
534 emit(MUL(scaled, src_reg(f), src_reg(1.0f / 127.0f)));
535
536 dst_reg max(this, glsl_type::vec4_type);
537 emit_minmax(BRW_CONDITIONAL_GE, max, src_reg(scaled), src_reg(-1.0f));
538 emit_minmax(BRW_CONDITIONAL_L, dst, src_reg(max), src_reg(1.0f));
539 }
540
541 void
542 vec4_visitor::emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0)
543 {
544 dst_reg saturated(this, glsl_type::vec4_type);
545 vec4_instruction *inst = emit(MOV(saturated, src0));
546 inst->saturate = true;
547
548 dst_reg scaled(this, glsl_type::vec4_type);
549 emit(MUL(scaled, src_reg(saturated), src_reg(255.0f)));
550
551 dst_reg rounded(this, glsl_type::vec4_type);
552 emit(RNDE(rounded, src_reg(scaled)));
553
554 dst_reg u(this, glsl_type::uvec4_type);
555 emit(MOV(u, src_reg(rounded)));
556
557 src_reg bytes(u);
558 emit(VEC4_OPCODE_PACK_BYTES, dst, bytes);
559 }
560
561 void
562 vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0)
563 {
564 dst_reg max(this, glsl_type::vec4_type);
565 emit_minmax(BRW_CONDITIONAL_GE, max, src0, src_reg(-1.0f));
566
567 dst_reg min(this, glsl_type::vec4_type);
568 emit_minmax(BRW_CONDITIONAL_L, min, src_reg(max), src_reg(1.0f));
569
570 dst_reg scaled(this, glsl_type::vec4_type);
571 emit(MUL(scaled, src_reg(min), src_reg(127.0f)));
572
573 dst_reg rounded(this, glsl_type::vec4_type);
574 emit(RNDE(rounded, src_reg(scaled)));
575
576 dst_reg i(this, glsl_type::ivec4_type);
577 emit(MOV(i, src_reg(rounded)));
578
579 src_reg bytes(i);
580 emit(VEC4_OPCODE_PACK_BYTES, dst, bytes);
581 }
582
583 /**
584 * Returns the minimum number of vec4 elements needed to pack a type.
585 *
586 * For simple types, it will return 1 (a single vec4); for matrices, the
587 * number of columns; for array and struct, the sum of the vec4_size of
588 * each of its elements; and for sampler and atomic, zero.
589 *
590 * This method is useful to calculate how much register space is needed to
591 * store a particular type.
592 */
593 extern "C" int
594 type_size_vec4(const struct glsl_type *type)
595 {
596 unsigned int i;
597 int size;
598
599 switch (type->base_type) {
600 case GLSL_TYPE_UINT:
601 case GLSL_TYPE_INT:
602 case GLSL_TYPE_FLOAT:
603 case GLSL_TYPE_BOOL:
604 if (type->is_matrix()) {
605 return type->matrix_columns;
606 } else {
607 /* Regardless of size of vector, it gets a vec4. This is bad
608 * packing for things like floats, but otherwise arrays become a
609 * mess. Hopefully a later pass over the code can pack scalars
610 * down if appropriate.
611 */
612 return 1;
613 }
614 case GLSL_TYPE_ARRAY:
615 assert(type->length > 0);
616 return type_size_vec4(type->fields.array) * type->length;
617 case GLSL_TYPE_STRUCT:
618 size = 0;
619 for (i = 0; i < type->length; i++) {
620 size += type_size_vec4(type->fields.structure[i].type);
621 }
622 return size;
623 case GLSL_TYPE_SUBROUTINE:
624 return 1;
625
626 case GLSL_TYPE_SAMPLER:
627 /* Samplers take up no register space, since they're baked in at
628 * link time.
629 */
630 return 0;
631 case GLSL_TYPE_ATOMIC_UINT:
632 return 0;
633 case GLSL_TYPE_IMAGE:
634 return DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4);
635 case GLSL_TYPE_VOID:
636 case GLSL_TYPE_DOUBLE:
637 case GLSL_TYPE_ERROR:
638 case GLSL_TYPE_INTERFACE:
639 unreachable("not reached");
640 }
641
642 return 0;
643 }
644
645 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
646 {
647 init();
648
649 this->file = GRF;
650 this->reg = v->alloc.allocate(type_size_vec4(type));
651
652 if (type->is_array() || type->is_record()) {
653 this->swizzle = BRW_SWIZZLE_NOOP;
654 } else {
655 this->swizzle = brw_swizzle_for_size(type->vector_elements);
656 }
657
658 this->type = brw_type_for_base_type(type);
659 }
660
661 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size)
662 {
663 assert(size > 0);
664
665 init();
666
667 this->file = GRF;
668 this->reg = v->alloc.allocate(type_size_vec4(type) * size);
669
670 this->swizzle = BRW_SWIZZLE_NOOP;
671
672 this->type = brw_type_for_base_type(type);
673 }
674
675 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
676 {
677 init();
678
679 this->file = GRF;
680 this->reg = v->alloc.allocate(type_size_vec4(type));
681
682 if (type->is_array() || type->is_record()) {
683 this->writemask = WRITEMASK_XYZW;
684 } else {
685 this->writemask = (1 << type->vector_elements) - 1;
686 }
687
688 this->type = brw_type_for_base_type(type);
689 }
690
691 void
692 vec4_visitor::setup_vec4_uniform_value(unsigned param_offset,
693 const gl_constant_value *values,
694 unsigned n)
695 {
696 static const gl_constant_value zero = { 0 };
697
698 assert(param_offset % 4 == 0);
699
700 for (unsigned i = 0; i < n; ++i)
701 stage_prog_data->param[param_offset + i] = &values[i];
702
703 for (unsigned i = n; i < 4; ++i)
704 stage_prog_data->param[param_offset + i] = &zero;
705 }
706
707 vec4_instruction *
708 vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
709 src_reg src0, src_reg src1)
710 {
711 vec4_instruction *inst;
712
713 if (devinfo->gen >= 6) {
714 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
715 inst->conditional_mod = conditionalmod;
716 } else {
717 emit(CMP(dst, src0, src1, conditionalmod));
718
719 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
720 inst->predicate = BRW_PREDICATE_NORMAL;
721 }
722
723 return inst;
724 }
725
726 vec4_instruction *
727 vec4_visitor::emit_lrp(const dst_reg &dst,
728 const src_reg &x, const src_reg &y, const src_reg &a)
729 {
730 if (devinfo->gen >= 6) {
731 /* Note that the instruction's argument order is reversed from GLSL
732 * and the IR.
733 */
734 return emit(LRP(dst, fix_3src_operand(a), fix_3src_operand(y),
735 fix_3src_operand(x)));
736 } else {
737 /* Earlier generations don't support three source operations, so we
738 * need to emit x*(1-a) + y*a.
739 */
740 dst_reg y_times_a = dst_reg(this, glsl_type::vec4_type);
741 dst_reg one_minus_a = dst_reg(this, glsl_type::vec4_type);
742 dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type);
743 y_times_a.writemask = dst.writemask;
744 one_minus_a.writemask = dst.writemask;
745 x_times_one_minus_a.writemask = dst.writemask;
746
747 emit(MUL(y_times_a, y, a));
748 emit(ADD(one_minus_a, negate(a), src_reg(1.0f)));
749 emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a)));
750 return emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)));
751 }
752 }
753
754 /**
755 * Emits the instructions needed to perform a pull constant load. before_block
756 * and before_inst can be NULL in which case the instruction will be appended
757 * to the end of the instruction list.
758 */
759 void
760 vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
761 src_reg surf_index,
762 src_reg offset_reg,
763 bblock_t *before_block,
764 vec4_instruction *before_inst)
765 {
766 assert((before_inst == NULL && before_block == NULL) ||
767 (before_inst && before_block));
768
769 vec4_instruction *pull;
770
771 if (devinfo->gen >= 9) {
772 /* Gen9+ needs a message header in order to use SIMD4x2 mode */
773 src_reg header(this, glsl_type::uvec4_type, 2);
774
775 pull = new(mem_ctx)
776 vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
777 dst_reg(header));
778
779 if (before_inst)
780 emit_before(before_block, before_inst, pull);
781 else
782 emit(pull);
783
784 dst_reg index_reg = retype(offset(dst_reg(header), 1),
785 offset_reg.type);
786 pull = MOV(writemask(index_reg, WRITEMASK_X), offset_reg);
787
788 if (before_inst)
789 emit_before(before_block, before_inst, pull);
790 else
791 emit(pull);
792
793 pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
794 dst,
795 surf_index,
796 header);
797 pull->mlen = 2;
798 pull->header_size = 1;
799 } else if (devinfo->gen >= 7) {
800 dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
801
802 grf_offset.type = offset_reg.type;
803
804 pull = MOV(grf_offset, offset_reg);
805
806 if (before_inst)
807 emit_before(before_block, before_inst, pull);
808 else
809 emit(pull);
810
811 pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
812 dst,
813 surf_index,
814 src_reg(grf_offset));
815 pull->mlen = 1;
816 } else {
817 pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD,
818 dst,
819 surf_index,
820 offset_reg);
821 pull->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
822 pull->mlen = 1;
823 }
824
825 if (before_inst)
826 emit_before(before_block, before_inst, pull);
827 else
828 emit(pull);
829 }
830
831 src_reg
832 vec4_visitor::emit_uniformize(const src_reg &src)
833 {
834 const src_reg chan_index(this, glsl_type::uint_type);
835 const dst_reg dst = retype(dst_reg(this, glsl_type::uint_type),
836 src.type);
837
838 emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, dst_reg(chan_index))
839 ->force_writemask_all = true;
840 emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index)
841 ->force_writemask_all = true;
842
843 return src_reg(dst);
844 }
845
846 src_reg
847 vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type,
848 src_reg coordinate, src_reg sampler)
849 {
850 vec4_instruction *inst =
851 new(mem_ctx) vec4_instruction(SHADER_OPCODE_TXF_MCS,
852 dst_reg(this, glsl_type::uvec4_type));
853 inst->base_mrf = 2;
854 inst->src[1] = sampler;
855
856 int param_base;
857
858 if (devinfo->gen >= 9) {
859 /* Gen9+ needs a message header in order to use SIMD4x2 mode */
860 vec4_instruction *header_inst = new(mem_ctx)
861 vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
862 dst_reg(MRF, inst->base_mrf));
863
864 emit(header_inst);
865
866 inst->mlen = 2;
867 inst->header_size = 1;
868 param_base = inst->base_mrf + 1;
869 } else {
870 inst->mlen = 1;
871 param_base = inst->base_mrf;
872 }
873
874 /* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */
875 int coord_mask = (1 << coordinate_type->vector_elements) - 1;
876 int zero_mask = 0xf & ~coord_mask;
877
878 emit(MOV(dst_reg(MRF, param_base, coordinate_type, coord_mask),
879 coordinate));
880
881 emit(MOV(dst_reg(MRF, param_base, coordinate_type, zero_mask),
882 src_reg(0)));
883
884 emit(inst);
885 return src_reg(inst->dst);
886 }
887
888 bool
889 vec4_visitor::is_high_sampler(src_reg sampler)
890 {
891 if (devinfo->gen < 8 && !devinfo->is_haswell)
892 return false;
893
894 return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16;
895 }
896
897 void
898 vec4_visitor::emit_texture(ir_texture_opcode op,
899 dst_reg dest,
900 const glsl_type *dest_type,
901 src_reg coordinate,
902 int coord_components,
903 src_reg shadow_comparitor,
904 src_reg lod, src_reg lod2,
905 src_reg sample_index,
906 uint32_t constant_offset,
907 src_reg offset_value,
908 src_reg mcs,
909 bool is_cube_array,
910 uint32_t sampler,
911 src_reg sampler_reg)
912 {
913 enum opcode opcode;
914 switch (op) {
915 case ir_tex: opcode = SHADER_OPCODE_TXL; break;
916 case ir_txl: opcode = SHADER_OPCODE_TXL; break;
917 case ir_txd: opcode = SHADER_OPCODE_TXD; break;
918 case ir_txf: opcode = SHADER_OPCODE_TXF; break;
919 case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break;
920 case ir_txs: opcode = SHADER_OPCODE_TXS; break;
921 case ir_tg4: opcode = offset_value.file != BAD_FILE
922 ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
923 case ir_query_levels: opcode = SHADER_OPCODE_TXS; break;
924 case ir_texture_samples: opcode = SHADER_OPCODE_SAMPLEINFO; break;
925 case ir_txb:
926 unreachable("TXB is not valid for vertex shaders.");
927 case ir_lod:
928 unreachable("LOD is not valid for vertex shaders.");
929 default:
930 unreachable("Unrecognized tex op");
931 }
932
933 vec4_instruction *inst = new(mem_ctx) vec4_instruction(
934 opcode, dst_reg(this, dest_type));
935
936 inst->offset = constant_offset;
937
938 /* The message header is necessary for:
939 * - Gen4 (always)
940 * - Gen9+ for selecting SIMD4x2
941 * - Texel offsets
942 * - Gather channel selection
943 * - Sampler indices too large to fit in a 4-bit value.
944 * - Sampleinfo message - takes no parameters, but mlen = 0 is illegal
945 */
946 inst->header_size =
947 (devinfo->gen < 5 || devinfo->gen >= 9 ||
948 inst->offset != 0 || op == ir_tg4 ||
949 op == ir_texture_samples ||
950 is_high_sampler(sampler_reg)) ? 1 : 0;
951 inst->base_mrf = 2;
952 inst->mlen = inst->header_size;
953 inst->dst.writemask = WRITEMASK_XYZW;
954 inst->shadow_compare = shadow_comparitor.file != BAD_FILE;
955
956 inst->src[1] = sampler_reg;
957
958 /* MRF for the first parameter */
959 int param_base = inst->base_mrf + inst->header_size;
960
961 if (op == ir_txs || op == ir_query_levels) {
962 int writemask = devinfo->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
963 emit(MOV(dst_reg(MRF, param_base, lod.type, writemask), lod));
964 inst->mlen++;
965 } else if (op == ir_texture_samples) {
966 inst->dst.writemask = WRITEMASK_X;
967 } else {
968 /* Load the coordinate */
969 /* FINISHME: gl_clamp_mask and saturate */
970 int coord_mask = (1 << coord_components) - 1;
971 int zero_mask = 0xf & ~coord_mask;
972
973 emit(MOV(dst_reg(MRF, param_base, coordinate.type, coord_mask),
974 coordinate));
975 inst->mlen++;
976
977 if (zero_mask != 0) {
978 emit(MOV(dst_reg(MRF, param_base, coordinate.type, zero_mask),
979 src_reg(0)));
980 }
981 /* Load the shadow comparitor */
982 if (shadow_comparitor.file != BAD_FILE && op != ir_txd && (op != ir_tg4 || offset_value.file == BAD_FILE)) {
983 emit(MOV(dst_reg(MRF, param_base + 1, shadow_comparitor.type,
984 WRITEMASK_X),
985 shadow_comparitor));
986 inst->mlen++;
987 }
988
989 /* Load the LOD info */
990 if (op == ir_tex || op == ir_txl) {
991 int mrf, writemask;
992 if (devinfo->gen >= 5) {
993 mrf = param_base + 1;
994 if (shadow_comparitor.file != BAD_FILE) {
995 writemask = WRITEMASK_Y;
996 /* mlen already incremented */
997 } else {
998 writemask = WRITEMASK_X;
999 inst->mlen++;
1000 }
1001 } else /* devinfo->gen == 4 */ {
1002 mrf = param_base;
1003 writemask = WRITEMASK_W;
1004 }
1005 emit(MOV(dst_reg(MRF, mrf, lod.type, writemask), lod));
1006 } else if (op == ir_txf) {
1007 emit(MOV(dst_reg(MRF, param_base, lod.type, WRITEMASK_W), lod));
1008 } else if (op == ir_txf_ms) {
1009 emit(MOV(dst_reg(MRF, param_base + 1, sample_index.type, WRITEMASK_X),
1010 sample_index));
1011 if (devinfo->gen >= 7) {
1012 /* MCS data is in the first channel of `mcs`, but we need to get it into
1013 * the .y channel of the second vec4 of params, so replicate .x across
1014 * the whole vec4 and then mask off everything except .y
1015 */
1016 mcs.swizzle = BRW_SWIZZLE_XXXX;
1017 emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::uint_type, WRITEMASK_Y),
1018 mcs));
1019 }
1020 inst->mlen++;
1021 } else if (op == ir_txd) {
1022 const brw_reg_type type = lod.type;
1023
1024 if (devinfo->gen >= 5) {
1025 lod.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
1026 lod2.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
1027 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), lod));
1028 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), lod2));
1029 inst->mlen++;
1030
1031 if (dest_type->vector_elements == 3 || shadow_comparitor.file != BAD_FILE) {
1032 lod.swizzle = BRW_SWIZZLE_ZZZZ;
1033 lod2.swizzle = BRW_SWIZZLE_ZZZZ;
1034 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), lod));
1035 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), lod2));
1036 inst->mlen++;
1037
1038 if (shadow_comparitor.file != BAD_FILE) {
1039 emit(MOV(dst_reg(MRF, param_base + 2,
1040 shadow_comparitor.type, WRITEMASK_Z),
1041 shadow_comparitor));
1042 }
1043 }
1044 } else /* devinfo->gen == 4 */ {
1045 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), lod));
1046 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), lod2));
1047 inst->mlen += 2;
1048 }
1049 } else if (op == ir_tg4 && offset_value.file != BAD_FILE) {
1050 if (shadow_comparitor.file != BAD_FILE) {
1051 emit(MOV(dst_reg(MRF, param_base, shadow_comparitor.type, WRITEMASK_W),
1052 shadow_comparitor));
1053 }
1054
1055 emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::ivec2_type, WRITEMASK_XY),
1056 offset_value));
1057 inst->mlen++;
1058 }
1059 }
1060
1061 emit(inst);
1062
1063 /* fixup num layers (z) for cube arrays: hardware returns faces * layers;
1064 * spec requires layers.
1065 */
1066 if (op == ir_txs && is_cube_array) {
1067 emit_math(SHADER_OPCODE_INT_QUOTIENT,
1068 writemask(inst->dst, WRITEMASK_Z),
1069 src_reg(inst->dst), src_reg(6));
1070 }
1071
1072 if (devinfo->gen == 6 && op == ir_tg4) {
1073 emit_gen6_gather_wa(key_tex->gen6_gather_wa[sampler], inst->dst);
1074 }
1075
1076 swizzle_result(op, dest,
1077 src_reg(inst->dst), sampler, dest_type);
1078 }
1079
1080 /**
1081 * Apply workarounds for Gen6 gather with UINT/SINT
1082 */
1083 void
1084 vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst)
1085 {
1086 if (!wa)
1087 return;
1088
1089 int width = (wa & WA_8BIT) ? 8 : 16;
1090 dst_reg dst_f = dst;
1091 dst_f.type = BRW_REGISTER_TYPE_F;
1092
1093 /* Convert from UNORM to UINT */
1094 emit(MUL(dst_f, src_reg(dst_f), src_reg((float)((1 << width) - 1))));
1095 emit(MOV(dst, src_reg(dst_f)));
1096
1097 if (wa & WA_SIGN) {
1098 /* Reinterpret the UINT value as a signed INT value by
1099 * shifting the sign bit into place, then shifting back
1100 * preserving sign.
1101 */
1102 emit(SHL(dst, src_reg(dst), src_reg(32 - width)));
1103 emit(ASR(dst, src_reg(dst), src_reg(32 - width)));
1104 }
1105 }
1106
1107 /**
1108 * Set up the gather channel based on the swizzle, for gather4.
1109 */
1110 uint32_t
1111 vec4_visitor::gather_channel(unsigned gather_component, uint32_t sampler)
1112 {
1113 int swiz = GET_SWZ(key_tex->swizzles[sampler], gather_component);
1114 switch (swiz) {
1115 case SWIZZLE_X: return 0;
1116 case SWIZZLE_Y:
1117 /* gather4 sampler is broken for green channel on RG32F --
1118 * we must ask for blue instead.
1119 */
1120 if (key_tex->gather_channel_quirk_mask & (1 << sampler))
1121 return 2;
1122 return 1;
1123 case SWIZZLE_Z: return 2;
1124 case SWIZZLE_W: return 3;
1125 default:
1126 unreachable("Not reached"); /* zero, one swizzles handled already */
1127 }
1128 }
1129
1130 void
1131 vec4_visitor::swizzle_result(ir_texture_opcode op, dst_reg dest,
1132 src_reg orig_val, uint32_t sampler,
1133 const glsl_type *dest_type)
1134 {
1135 int s = key_tex->swizzles[sampler];
1136
1137 dst_reg swizzled_result = dest;
1138
1139 if (op == ir_query_levels) {
1140 /* # levels is in .w */
1141 orig_val.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
1142 emit(MOV(swizzled_result, orig_val));
1143 return;
1144 }
1145
1146 if (op == ir_txs || dest_type == glsl_type::float_type
1147 || s == SWIZZLE_NOOP || op == ir_tg4) {
1148 emit(MOV(swizzled_result, orig_val));
1149 return;
1150 }
1151
1152
1153 int zero_mask = 0, one_mask = 0, copy_mask = 0;
1154 int swizzle[4] = {0};
1155
1156 for (int i = 0; i < 4; i++) {
1157 switch (GET_SWZ(s, i)) {
1158 case SWIZZLE_ZERO:
1159 zero_mask |= (1 << i);
1160 break;
1161 case SWIZZLE_ONE:
1162 one_mask |= (1 << i);
1163 break;
1164 default:
1165 copy_mask |= (1 << i);
1166 swizzle[i] = GET_SWZ(s, i);
1167 break;
1168 }
1169 }
1170
1171 if (copy_mask) {
1172 orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1173 swizzled_result.writemask = copy_mask;
1174 emit(MOV(swizzled_result, orig_val));
1175 }
1176
1177 if (zero_mask) {
1178 swizzled_result.writemask = zero_mask;
1179 emit(MOV(swizzled_result, src_reg(0.0f)));
1180 }
1181
1182 if (one_mask) {
1183 swizzled_result.writemask = one_mask;
1184 emit(MOV(swizzled_result, src_reg(1.0f)));
1185 }
1186 }
1187
1188 void
1189 vec4_visitor::gs_emit_vertex(int stream_id)
1190 {
1191 unreachable("not reached");
1192 }
1193
1194 void
1195 vec4_visitor::gs_end_primitive()
1196 {
1197 unreachable("not reached");
1198 }
1199
1200 void
1201 vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
1202 dst_reg dst, src_reg offset,
1203 src_reg src0, src_reg src1)
1204 {
1205 unsigned mlen = 0;
1206
1207 /* Set the atomic operation offset. */
1208 emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), offset));
1209 mlen++;
1210
1211 /* Set the atomic operation arguments. */
1212 if (src0.file != BAD_FILE) {
1213 emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src0));
1214 mlen++;
1215 }
1216
1217 if (src1.file != BAD_FILE) {
1218 emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src1));
1219 mlen++;
1220 }
1221
1222 /* Emit the instruction. Note that this maps to the normal SIMD8
1223 * untyped atomic message on Ivy Bridge, but that's OK because
1224 * unused channels will be masked out.
1225 */
1226 vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst,
1227 brw_message_reg(0),
1228 src_reg(surf_index), src_reg(atomic_op));
1229 inst->mlen = mlen;
1230 }
1231
1232 void
1233 vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
1234 src_reg offset)
1235 {
1236 /* Set the surface read offset. */
1237 emit(MOV(brw_writemask(brw_uvec_mrf(8, 0, 0), WRITEMASK_X), offset));
1238
1239 /* Emit the instruction. Note that this maps to the normal SIMD8
1240 * untyped surface read message, but that's OK because unused
1241 * channels will be masked out.
1242 */
1243 vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst,
1244 brw_message_reg(0),
1245 src_reg(surf_index), src_reg(1));
1246 inst->mlen = 1;
1247 }
1248
1249 void
1250 vec4_visitor::emit_ndc_computation()
1251 {
1252 /* Get the position */
1253 src_reg pos = src_reg(output_reg[VARYING_SLOT_POS]);
1254
1255 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
1256 dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
1257 output_reg[BRW_VARYING_SLOT_NDC] = ndc;
1258
1259 current_annotation = "NDC";
1260 dst_reg ndc_w = ndc;
1261 ndc_w.writemask = WRITEMASK_W;
1262 src_reg pos_w = pos;
1263 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
1264 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
1265
1266 dst_reg ndc_xyz = ndc;
1267 ndc_xyz.writemask = WRITEMASK_XYZ;
1268
1269 emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
1270 }
1271
1272 void
1273 vec4_visitor::emit_psiz_and_flags(dst_reg reg)
1274 {
1275 if (devinfo->gen < 6 &&
1276 ((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) ||
1277 output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE ||
1278 devinfo->has_negative_rhw_bug)) {
1279 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
1280 dst_reg header1_w = header1;
1281 header1_w.writemask = WRITEMASK_W;
1282
1283 emit(MOV(header1, 0u));
1284
1285 if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
1286 src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]);
1287
1288 current_annotation = "Point size";
1289 emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
1290 emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
1291 }
1292
1293 if (output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE) {
1294 current_annotation = "Clipping flags";
1295 dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
1296 dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
1297
1298 emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), src_reg(0.0f), BRW_CONDITIONAL_L));
1299 emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, src_reg(0));
1300 emit(OR(header1_w, src_reg(header1_w), src_reg(flags0)));
1301
1302 emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), src_reg(0.0f), BRW_CONDITIONAL_L));
1303 emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, src_reg(0));
1304 emit(SHL(flags1, src_reg(flags1), src_reg(4)));
1305 emit(OR(header1_w, src_reg(header1_w), src_reg(flags1)));
1306 }
1307
1308 /* i965 clipping workaround:
1309 * 1) Test for -ve rhw
1310 * 2) If set,
1311 * set ndc = (0,0,0,0)
1312 * set ucp[6] = 1
1313 *
1314 * Later, clipping will detect ucp[6] and ensure the primitive is
1315 * clipped against all fixed planes.
1316 */
1317 if (devinfo->has_negative_rhw_bug) {
1318 src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]);
1319 ndc_w.swizzle = BRW_SWIZZLE_WWWW;
1320 emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L));
1321 vec4_instruction *inst;
1322 inst = emit(OR(header1_w, src_reg(header1_w), src_reg(1u << 6)));
1323 inst->predicate = BRW_PREDICATE_NORMAL;
1324 output_reg[BRW_VARYING_SLOT_NDC].type = BRW_REGISTER_TYPE_F;
1325 inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f)));
1326 inst->predicate = BRW_PREDICATE_NORMAL;
1327 }
1328
1329 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
1330 } else if (devinfo->gen < 6) {
1331 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
1332 } else {
1333 emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
1334 if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
1335 dst_reg reg_w = reg;
1336 reg_w.writemask = WRITEMASK_W;
1337 src_reg reg_as_src = src_reg(output_reg[VARYING_SLOT_PSIZ]);
1338 reg_as_src.type = reg_w.type;
1339 reg_as_src.swizzle = brw_swizzle_for_size(1);
1340 emit(MOV(reg_w, reg_as_src));
1341 }
1342 if (prog_data->vue_map.slots_valid & VARYING_BIT_LAYER) {
1343 dst_reg reg_y = reg;
1344 reg_y.writemask = WRITEMASK_Y;
1345 reg_y.type = BRW_REGISTER_TYPE_D;
1346 output_reg[VARYING_SLOT_LAYER].type = reg_y.type;
1347 emit(MOV(reg_y, src_reg(output_reg[VARYING_SLOT_LAYER])));
1348 }
1349 if (prog_data->vue_map.slots_valid & VARYING_BIT_VIEWPORT) {
1350 dst_reg reg_z = reg;
1351 reg_z.writemask = WRITEMASK_Z;
1352 reg_z.type = BRW_REGISTER_TYPE_D;
1353 output_reg[VARYING_SLOT_VIEWPORT].type = reg_z.type;
1354 emit(MOV(reg_z, src_reg(output_reg[VARYING_SLOT_VIEWPORT])));
1355 }
1356 }
1357 }
1358
1359 vec4_instruction *
1360 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
1361 {
1362 assert(varying < VARYING_SLOT_MAX);
1363 assert(output_reg[varying].type == reg.type);
1364 current_annotation = output_reg_annotation[varying];
1365 /* Copy the register, saturating if necessary */
1366 return emit(MOV(reg, src_reg(output_reg[varying])));
1367 }
1368
1369 void
1370 vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
1371 {
1372 reg.type = BRW_REGISTER_TYPE_F;
1373 output_reg[varying].type = reg.type;
1374
1375 switch (varying) {
1376 case VARYING_SLOT_PSIZ:
1377 {
1378 /* PSIZ is always in slot 0, and is coupled with other flags. */
1379 current_annotation = "indices, point width, clip flags";
1380 emit_psiz_and_flags(reg);
1381 break;
1382 }
1383 case BRW_VARYING_SLOT_NDC:
1384 current_annotation = "NDC";
1385 emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
1386 break;
1387 case VARYING_SLOT_POS:
1388 current_annotation = "gl_Position";
1389 emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
1390 break;
1391 case VARYING_SLOT_EDGE:
1392 /* This is present when doing unfilled polygons. We're supposed to copy
1393 * the edge flag from the user-provided vertex array
1394 * (glEdgeFlagPointer), or otherwise we'll copy from the current value
1395 * of that attribute (starts as 1.0f). This is then used in clipping to
1396 * determine which edges should be drawn as wireframe.
1397 */
1398 current_annotation = "edge flag";
1399 emit(MOV(reg, src_reg(dst_reg(ATTR, VERT_ATTRIB_EDGEFLAG,
1400 glsl_type::float_type, WRITEMASK_XYZW))));
1401 break;
1402 case BRW_VARYING_SLOT_PAD:
1403 /* No need to write to this slot */
1404 break;
1405 default:
1406 emit_generic_urb_slot(reg, varying);
1407 break;
1408 }
1409 }
1410
1411 static int
1412 align_interleaved_urb_mlen(const struct brw_device_info *devinfo, int mlen)
1413 {
1414 if (devinfo->gen >= 6) {
1415 /* URB data written (does not include the message header reg) must
1416 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
1417 * section 5.4.3.2.2: URB_INTERLEAVED.
1418 *
1419 * URB entries are allocated on a multiple of 1024 bits, so an
1420 * extra 128 bits written here to make the end align to 256 is
1421 * no problem.
1422 */
1423 if ((mlen % 2) != 1)
1424 mlen++;
1425 }
1426
1427 return mlen;
1428 }
1429
1430
1431 /**
1432 * Generates the VUE payload plus the necessary URB write instructions to
1433 * output it.
1434 *
1435 * The VUE layout is documented in Volume 2a.
1436 */
1437 void
1438 vec4_visitor::emit_vertex()
1439 {
1440 /* MRF 0 is reserved for the debugger, so start with message header
1441 * in MRF 1.
1442 */
1443 int base_mrf = 1;
1444 int mrf = base_mrf;
1445 /* In the process of generating our URB write message contents, we
1446 * may need to unspill a register or load from an array. Those
1447 * reads would use MRFs 14-15.
1448 */
1449 int max_usable_mrf = FIRST_SPILL_MRF(devinfo->gen);
1450
1451 /* The following assertion verifies that max_usable_mrf causes an
1452 * even-numbered amount of URB write data, which will meet gen6's
1453 * requirements for length alignment.
1454 */
1455 assert ((max_usable_mrf - base_mrf) % 2 == 0);
1456
1457 /* First mrf is the g0-based message header containing URB handles and
1458 * such.
1459 */
1460 emit_urb_write_header(mrf++);
1461
1462 if (devinfo->gen < 6) {
1463 emit_ndc_computation();
1464 }
1465
1466 /* We may need to split this up into several URB writes, so do them in a
1467 * loop.
1468 */
1469 int slot = 0;
1470 bool complete = false;
1471 do {
1472 /* URB offset is in URB row increments, and each of our MRFs is half of
1473 * one of those, since we're doing interleaved writes.
1474 */
1475 int offset = slot / 2;
1476
1477 mrf = base_mrf + 1;
1478 for (; slot < prog_data->vue_map.num_slots; ++slot) {
1479 emit_urb_slot(dst_reg(MRF, mrf++),
1480 prog_data->vue_map.slot_to_varying[slot]);
1481
1482 /* If this was max_usable_mrf, we can't fit anything more into this
1483 * URB WRITE. Same thing if we reached the maximum length available.
1484 */
1485 if (mrf > max_usable_mrf ||
1486 align_interleaved_urb_mlen(devinfo, mrf - base_mrf + 1) > BRW_MAX_MSG_LENGTH) {
1487 slot++;
1488 break;
1489 }
1490 }
1491
1492 complete = slot >= prog_data->vue_map.num_slots;
1493 current_annotation = "URB write";
1494 vec4_instruction *inst = emit_urb_write_opcode(complete);
1495 inst->base_mrf = base_mrf;
1496 inst->mlen = align_interleaved_urb_mlen(devinfo, mrf - base_mrf);
1497 inst->offset += offset;
1498 } while(!complete);
1499 }
1500
1501
1502 src_reg
1503 vec4_visitor::get_scratch_offset(bblock_t *block, vec4_instruction *inst,
1504 src_reg *reladdr, int reg_offset)
1505 {
1506 /* Because we store the values to scratch interleaved like our
1507 * vertex data, we need to scale the vec4 index by 2.
1508 */
1509 int message_header_scale = 2;
1510
1511 /* Pre-gen6, the message header uses byte offsets instead of vec4
1512 * (16-byte) offset units.
1513 */
1514 if (devinfo->gen < 6)
1515 message_header_scale *= 16;
1516
1517 if (reladdr) {
1518 src_reg index = src_reg(this, glsl_type::int_type);
1519
1520 emit_before(block, inst, ADD(dst_reg(index), *reladdr,
1521 src_reg(reg_offset)));
1522 emit_before(block, inst, MUL(dst_reg(index), index,
1523 src_reg(message_header_scale)));
1524
1525 return index;
1526 } else {
1527 return src_reg(reg_offset * message_header_scale);
1528 }
1529 }
1530
1531 src_reg
1532 vec4_visitor::get_pull_constant_offset(bblock_t * block, vec4_instruction *inst,
1533 src_reg *reladdr, int reg_offset)
1534 {
1535 if (reladdr) {
1536 src_reg index = src_reg(this, glsl_type::int_type);
1537
1538 emit_before(block, inst, ADD(dst_reg(index), *reladdr,
1539 src_reg(reg_offset)));
1540
1541 /* Pre-gen6, the message header uses byte offsets instead of vec4
1542 * (16-byte) offset units.
1543 */
1544 if (devinfo->gen < 6) {
1545 emit_before(block, inst, MUL(dst_reg(index), index, src_reg(16)));
1546 }
1547
1548 return index;
1549 } else if (devinfo->gen >= 8) {
1550 /* Store the offset in a GRF so we can send-from-GRF. */
1551 src_reg offset = src_reg(this, glsl_type::int_type);
1552 emit_before(block, inst, MOV(dst_reg(offset), src_reg(reg_offset)));
1553 return offset;
1554 } else {
1555 int message_header_scale = devinfo->gen < 6 ? 16 : 1;
1556 return src_reg(reg_offset * message_header_scale);
1557 }
1558 }
1559
1560 /**
1561 * Emits an instruction before @inst to load the value named by @orig_src
1562 * from scratch space at @base_offset to @temp.
1563 *
1564 * @base_offset is measured in 32-byte units (the size of a register).
1565 */
1566 void
1567 vec4_visitor::emit_scratch_read(bblock_t *block, vec4_instruction *inst,
1568 dst_reg temp, src_reg orig_src,
1569 int base_offset)
1570 {
1571 int reg_offset = base_offset + orig_src.reg_offset;
1572 src_reg index = get_scratch_offset(block, inst, orig_src.reladdr,
1573 reg_offset);
1574
1575 emit_before(block, inst, SCRATCH_READ(temp, index));
1576 }
1577
1578 /**
1579 * Emits an instruction after @inst to store the value to be written
1580 * to @orig_dst to scratch space at @base_offset, from @temp.
1581 *
1582 * @base_offset is measured in 32-byte units (the size of a register).
1583 */
1584 void
1585 vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst,
1586 int base_offset)
1587 {
1588 int reg_offset = base_offset + inst->dst.reg_offset;
1589 src_reg index = get_scratch_offset(block, inst, inst->dst.reladdr,
1590 reg_offset);
1591
1592 /* Create a temporary register to store *inst's result in.
1593 *
1594 * We have to be careful in MOVing from our temporary result register in
1595 * the scratch write. If we swizzle from channels of the temporary that
1596 * weren't initialized, it will confuse live interval analysis, which will
1597 * make spilling fail to make progress.
1598 */
1599 const src_reg temp = swizzle(retype(src_reg(this, glsl_type::vec4_type),
1600 inst->dst.type),
1601 brw_swizzle_for_mask(inst->dst.writemask));
1602 dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
1603 inst->dst.writemask));
1604 vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
1605 if (inst->opcode != BRW_OPCODE_SEL)
1606 write->predicate = inst->predicate;
1607 write->ir = inst->ir;
1608 write->annotation = inst->annotation;
1609 inst->insert_after(block, write);
1610
1611 inst->dst.file = temp.file;
1612 inst->dst.reg = temp.reg;
1613 inst->dst.reg_offset = temp.reg_offset;
1614 inst->dst.reladdr = NULL;
1615 }
1616
1617 /**
1618 * Checks if \p src and/or \p src.reladdr require a scratch read, and if so,
1619 * adds the scratch read(s) before \p inst. The function also checks for
1620 * recursive reladdr scratch accesses, issuing the corresponding scratch
1621 * loads and rewriting reladdr references accordingly.
1622 *
1623 * \return \p src if it did not require a scratch load, otherwise, the
1624 * register holding the result of the scratch load that the caller should
1625 * use to rewrite src.
1626 */
1627 src_reg
1628 vec4_visitor::emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
1629 vec4_instruction *inst, src_reg src)
1630 {
1631 /* Resolve recursive reladdr scratch access by calling ourselves
1632 * with src.reladdr
1633 */
1634 if (src.reladdr)
1635 *src.reladdr = emit_resolve_reladdr(scratch_loc, block, inst,
1636 *src.reladdr);
1637
1638 /* Now handle scratch access on src */
1639 if (src.file == GRF && scratch_loc[src.reg] != -1) {
1640 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
1641 emit_scratch_read(block, inst, temp, src, scratch_loc[src.reg]);
1642 src.reg = temp.reg;
1643 src.reg_offset = temp.reg_offset;
1644 src.reladdr = NULL;
1645 }
1646
1647 return src;
1648 }
1649
1650 /**
1651 * We can't generally support array access in GRF space, because a
1652 * single instruction's destination can only span 2 contiguous
1653 * registers. So, we send all GRF arrays that get variable index
1654 * access to scratch space.
1655 */
1656 void
1657 vec4_visitor::move_grf_array_access_to_scratch()
1658 {
1659 int scratch_loc[this->alloc.count];
1660 memset(scratch_loc, -1, sizeof(scratch_loc));
1661
1662 /* First, calculate the set of virtual GRFs that need to be punted
1663 * to scratch due to having any array access on them, and where in
1664 * scratch.
1665 */
1666 foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
1667 if (inst->dst.file == GRF && inst->dst.reladdr) {
1668 if (scratch_loc[inst->dst.reg] == -1) {
1669 scratch_loc[inst->dst.reg] = last_scratch;
1670 last_scratch += this->alloc.sizes[inst->dst.reg];
1671 }
1672
1673 for (src_reg *iter = inst->dst.reladdr;
1674 iter->reladdr;
1675 iter = iter->reladdr) {
1676 if (iter->file == GRF && scratch_loc[iter->reg] == -1) {
1677 scratch_loc[iter->reg] = last_scratch;
1678 last_scratch += this->alloc.sizes[iter->reg];
1679 }
1680 }
1681 }
1682
1683 for (int i = 0 ; i < 3; i++) {
1684 for (src_reg *iter = &inst->src[i];
1685 iter->reladdr;
1686 iter = iter->reladdr) {
1687 if (iter->file == GRF && scratch_loc[iter->reg] == -1) {
1688 scratch_loc[iter->reg] = last_scratch;
1689 last_scratch += this->alloc.sizes[iter->reg];
1690 }
1691 }
1692 }
1693 }
1694
1695 /* Now, for anything that will be accessed through scratch, rewrite
1696 * it to load/store. Note that this is a _safe list walk, because
1697 * we may generate a new scratch_write instruction after the one
1698 * we're processing.
1699 */
1700 foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
1701 /* Set up the annotation tracking for new generated instructions. */
1702 base_ir = inst->ir;
1703 current_annotation = inst->annotation;
1704
1705 /* First handle scratch access on the dst. Notice we have to handle
1706 * the case where the dst's reladdr also points to scratch space.
1707 */
1708 if (inst->dst.reladdr)
1709 *inst->dst.reladdr = emit_resolve_reladdr(scratch_loc, block, inst,
1710 *inst->dst.reladdr);
1711
1712 /* Now that we have handled any (possibly recursive) reladdr scratch
1713 * accesses for dst we can safely do the scratch write for dst itself
1714 */
1715 if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1)
1716 emit_scratch_write(block, inst, scratch_loc[inst->dst.reg]);
1717
1718 /* Now handle scratch access on any src. In this case, since inst->src[i]
1719 * already is a src_reg, we can just call emit_resolve_reladdr with
1720 * inst->src[i] and it will take care of handling scratch loads for
1721 * both src and src.reladdr (recursively).
1722 */
1723 for (int i = 0 ; i < 3; i++) {
1724 inst->src[i] = emit_resolve_reladdr(scratch_loc, block, inst,
1725 inst->src[i]);
1726 }
1727 }
1728 }
1729
1730 /**
1731 * Emits an instruction before @inst to load the value named by @orig_src
1732 * from the pull constant buffer (surface) at @base_offset to @temp.
1733 */
1734 void
1735 vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
1736 dst_reg temp, src_reg orig_src,
1737 int base_offset)
1738 {
1739 int reg_offset = base_offset + orig_src.reg_offset;
1740 src_reg index = src_reg(prog_data->base.binding_table.pull_constants_start);
1741 src_reg offset = get_pull_constant_offset(block, inst, orig_src.reladdr,
1742 reg_offset);
1743
1744 emit_pull_constant_load_reg(temp,
1745 index,
1746 offset,
1747 block, inst);
1748 }
1749
1750 /**
1751 * Implements array access of uniforms by inserting a
1752 * PULL_CONSTANT_LOAD instruction.
1753 *
1754 * Unlike temporary GRF array access (where we don't support it due to
1755 * the difficulty of doing relative addressing on instruction
1756 * destinations), we could potentially do array access of uniforms
1757 * that were loaded in GRF space as push constants. In real-world
1758 * usage we've seen, though, the arrays being used are always larger
1759 * than we could load as push constants, so just always move all
1760 * uniform array access out to a pull constant buffer.
1761 */
1762 void
1763 vec4_visitor::move_uniform_array_access_to_pull_constants()
1764 {
1765 int pull_constant_loc[this->uniforms];
1766 memset(pull_constant_loc, -1, sizeof(pull_constant_loc));
1767 bool nested_reladdr;
1768
1769 /* Walk through and find array access of uniforms. Put a copy of that
1770 * uniform in the pull constant buffer.
1771 *
1772 * Note that we don't move constant-indexed accesses to arrays. No
1773 * testing has been done of the performance impact of this choice.
1774 */
1775 do {
1776 nested_reladdr = false;
1777
1778 foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
1779 for (int i = 0 ; i < 3; i++) {
1780 if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
1781 continue;
1782
1783 int uniform = inst->src[i].reg;
1784
1785 if (inst->src[i].reladdr->reladdr)
1786 nested_reladdr = true; /* will need another pass */
1787
1788 /* If this array isn't already present in the pull constant buffer,
1789 * add it.
1790 */
1791 if (pull_constant_loc[uniform] == -1) {
1792 const gl_constant_value **values =
1793 &stage_prog_data->param[uniform * 4];
1794
1795 pull_constant_loc[uniform] = stage_prog_data->nr_pull_params / 4;
1796
1797 assert(uniform < uniform_array_size);
1798 for (int j = 0; j < uniform_size[uniform] * 4; j++) {
1799 stage_prog_data->pull_param[stage_prog_data->nr_pull_params++]
1800 = values[j];
1801 }
1802 }
1803
1804 /* Set up the annotation tracking for new generated instructions. */
1805 base_ir = inst->ir;
1806 current_annotation = inst->annotation;
1807
1808 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
1809
1810 emit_pull_constant_load(block, inst, temp, inst->src[i],
1811 pull_constant_loc[uniform]);
1812
1813 inst->src[i].file = temp.file;
1814 inst->src[i].reg = temp.reg;
1815 inst->src[i].reg_offset = temp.reg_offset;
1816 inst->src[i].reladdr = NULL;
1817 }
1818 }
1819 } while (nested_reladdr);
1820
1821 /* Now there are no accesses of the UNIFORM file with a reladdr, so
1822 * no need to track them as larger-than-vec4 objects. This will be
1823 * relied on in cutting out unused uniform vectors from push
1824 * constants.
1825 */
1826 split_uniform_registers();
1827 }
1828
1829 void
1830 vec4_visitor::resolve_ud_negate(src_reg *reg)
1831 {
1832 if (reg->type != BRW_REGISTER_TYPE_UD ||
1833 !reg->negate)
1834 return;
1835
1836 src_reg temp = src_reg(this, glsl_type::uvec4_type);
1837 emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
1838 *reg = temp;
1839 }
1840
1841 vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
1842 void *log_data,
1843 struct gl_program *prog,
1844 const struct brw_sampler_prog_key_data *key_tex,
1845 struct brw_vue_prog_data *prog_data,
1846 struct gl_shader_program *shader_prog,
1847 gl_shader_stage stage,
1848 void *mem_ctx,
1849 bool no_spills,
1850 int shader_time_index)
1851 : backend_shader(compiler, log_data, mem_ctx,
1852 shader_prog, prog, &prog_data->base, stage),
1853 key_tex(key_tex),
1854 prog_data(prog_data),
1855 sanity_param_count(0),
1856 fail_msg(NULL),
1857 first_non_payload_grf(0),
1858 need_all_constants_in_pull_buffer(false),
1859 no_spills(no_spills),
1860 shader_time_index(shader_time_index),
1861 last_scratch(0)
1862 {
1863 this->failed = false;
1864
1865 this->base_ir = NULL;
1866 this->current_annotation = NULL;
1867 memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
1868
1869 this->virtual_grf_start = NULL;
1870 this->virtual_grf_end = NULL;
1871 this->live_intervals = NULL;
1872
1873 this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
1874
1875 this->uniforms = 0;
1876
1877 /* Initialize uniform_array_size to at least 1 because pre-gen6 VS requires
1878 * at least one. See setup_uniforms() in brw_vec4.cpp.
1879 */
1880 this->uniform_array_size = 1;
1881 if (prog_data) {
1882 this->uniform_array_size =
1883 MAX2(DIV_ROUND_UP(stage_prog_data->nr_params, 4), 1);
1884 }
1885
1886 this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
1887 }
1888
1889 vec4_visitor::~vec4_visitor()
1890 {
1891 }
1892
1893
1894 void
1895 vec4_visitor::fail(const char *format, ...)
1896 {
1897 va_list va;
1898 char *msg;
1899
1900 if (failed)
1901 return;
1902
1903 failed = true;
1904
1905 va_start(va, format);
1906 msg = ralloc_vasprintf(mem_ctx, format, va);
1907 va_end(va);
1908 msg = ralloc_asprintf(mem_ctx, "%s compile failed: %s\n", stage_abbrev, msg);
1909
1910 this->fail_msg = msg;
1911
1912 if (debug_enabled) {
1913 fprintf(stderr, "%s", msg);
1914 }
1915 }
1916
1917 } /* namespace brw */