Merge remote-tracking branch 'mesa-public/master' into vulkan
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_visitor.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25 #include "brw_cfg.h"
26 #include "glsl/ir_uniform.h"
27 #include "program/sampler.h"
28
29 namespace brw {
30
31 vec4_instruction::vec4_instruction(enum opcode opcode, const dst_reg &dst,
32 const src_reg &src0, const src_reg &src1,
33 const src_reg &src2)
34 {
35 this->opcode = opcode;
36 this->dst = dst;
37 this->src[0] = src0;
38 this->src[1] = src1;
39 this->src[2] = src2;
40 this->saturate = false;
41 this->force_writemask_all = false;
42 this->no_dd_clear = false;
43 this->no_dd_check = false;
44 this->writes_accumulator = false;
45 this->conditional_mod = BRW_CONDITIONAL_NONE;
46 this->predicate = BRW_PREDICATE_NONE;
47 this->predicate_inverse = false;
48 this->target = 0;
49 this->regs_written = (dst.file == BAD_FILE ? 0 : 1);
50 this->shadow_compare = false;
51 this->ir = NULL;
52 this->urb_write_flags = BRW_URB_WRITE_NO_FLAGS;
53 this->header_size = 0;
54 this->flag_subreg = 0;
55 this->mlen = 0;
56 this->base_mrf = 0;
57 this->offset = 0;
58 this->annotation = NULL;
59 }
60
61 vec4_instruction *
62 vec4_visitor::emit(vec4_instruction *inst)
63 {
64 inst->ir = this->base_ir;
65 inst->annotation = this->current_annotation;
66
67 this->instructions.push_tail(inst);
68
69 return inst;
70 }
71
72 vec4_instruction *
73 vec4_visitor::emit_before(bblock_t *block, vec4_instruction *inst,
74 vec4_instruction *new_inst)
75 {
76 new_inst->ir = inst->ir;
77 new_inst->annotation = inst->annotation;
78
79 inst->insert_before(block, new_inst);
80
81 return inst;
82 }
83
84 vec4_instruction *
85 vec4_visitor::emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
86 const src_reg &src1, const src_reg &src2)
87 {
88 return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0, src1, src2));
89 }
90
91
92 vec4_instruction *
93 vec4_visitor::emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
94 const src_reg &src1)
95 {
96 return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0, src1));
97 }
98
99 vec4_instruction *
100 vec4_visitor::emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0)
101 {
102 return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0));
103 }
104
105 vec4_instruction *
106 vec4_visitor::emit(enum opcode opcode, const dst_reg &dst)
107 {
108 return emit(new(mem_ctx) vec4_instruction(opcode, dst));
109 }
110
111 vec4_instruction *
112 vec4_visitor::emit(enum opcode opcode)
113 {
114 return emit(new(mem_ctx) vec4_instruction(opcode, dst_reg()));
115 }
116
117 #define ALU1(op) \
118 vec4_instruction * \
119 vec4_visitor::op(const dst_reg &dst, const src_reg &src0) \
120 { \
121 return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, src0); \
122 }
123
124 #define ALU2(op) \
125 vec4_instruction * \
126 vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
127 const src_reg &src1) \
128 { \
129 return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, \
130 src0, src1); \
131 }
132
133 #define ALU2_ACC(op) \
134 vec4_instruction * \
135 vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
136 const src_reg &src1) \
137 { \
138 vec4_instruction *inst = new(mem_ctx) vec4_instruction( \
139 BRW_OPCODE_##op, dst, src0, src1); \
140 inst->writes_accumulator = true; \
141 return inst; \
142 }
143
144 #define ALU3(op) \
145 vec4_instruction * \
146 vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
147 const src_reg &src1, const src_reg &src2) \
148 { \
149 assert(devinfo->gen >= 6); \
150 return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, \
151 src0, src1, src2); \
152 }
153
154 ALU1(NOT)
155 ALU1(MOV)
156 ALU1(FRC)
157 ALU1(RNDD)
158 ALU1(RNDE)
159 ALU1(RNDZ)
160 ALU1(F32TO16)
161 ALU1(F16TO32)
162 ALU2(ADD)
163 ALU2(MUL)
164 ALU2_ACC(MACH)
165 ALU2(AND)
166 ALU2(OR)
167 ALU2(XOR)
168 ALU2(DP3)
169 ALU2(DP4)
170 ALU2(DPH)
171 ALU2(SHL)
172 ALU2(SHR)
173 ALU2(ASR)
174 ALU3(LRP)
175 ALU1(BFREV)
176 ALU3(BFE)
177 ALU2(BFI1)
178 ALU3(BFI2)
179 ALU1(FBH)
180 ALU1(FBL)
181 ALU1(CBIT)
182 ALU3(MAD)
183 ALU2_ACC(ADDC)
184 ALU2_ACC(SUBB)
185 ALU2(MAC)
186
187 /** Gen4 predicated IF. */
188 vec4_instruction *
189 vec4_visitor::IF(enum brw_predicate predicate)
190 {
191 vec4_instruction *inst;
192
193 inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_IF);
194 inst->predicate = predicate;
195
196 return inst;
197 }
198
199 /** Gen6 IF with embedded comparison. */
200 vec4_instruction *
201 vec4_visitor::IF(src_reg src0, src_reg src1,
202 enum brw_conditional_mod condition)
203 {
204 assert(devinfo->gen == 6);
205
206 vec4_instruction *inst;
207
208 resolve_ud_negate(&src0);
209 resolve_ud_negate(&src1);
210
211 inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_IF, dst_null_d(),
212 src0, src1);
213 inst->conditional_mod = condition;
214
215 return inst;
216 }
217
218 /**
219 * CMP: Sets the low bit of the destination channels with the result
220 * of the comparison, while the upper bits are undefined, and updates
221 * the flag register with the packed 16 bits of the result.
222 */
223 vec4_instruction *
224 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1,
225 enum brw_conditional_mod condition)
226 {
227 vec4_instruction *inst;
228
229 /* Take the instruction:
230 *
231 * CMP null<d> src0<f> src1<f>
232 *
233 * Original gen4 does type conversion to the destination type before
234 * comparison, producing garbage results for floating point comparisons.
235 *
236 * The destination type doesn't matter on newer generations, so we set the
237 * type to match src0 so we can compact the instruction.
238 */
239 dst.type = src0.type;
240 if (dst.file == HW_REG)
241 dst.fixed_hw_reg.type = dst.type;
242
243 resolve_ud_negate(&src0);
244 resolve_ud_negate(&src1);
245
246 inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_CMP, dst, src0, src1);
247 inst->conditional_mod = condition;
248
249 return inst;
250 }
251
252 vec4_instruction *
253 vec4_visitor::SCRATCH_READ(const dst_reg &dst, const src_reg &index)
254 {
255 vec4_instruction *inst;
256
257 inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_READ,
258 dst, index);
259 inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
260 inst->mlen = 2;
261
262 return inst;
263 }
264
265 vec4_instruction *
266 vec4_visitor::SCRATCH_WRITE(const dst_reg &dst, const src_reg &src,
267 const src_reg &index)
268 {
269 vec4_instruction *inst;
270
271 inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_WRITE,
272 dst, src, index);
273 inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen);
274 inst->mlen = 3;
275
276 return inst;
277 }
278
279 src_reg
280 vec4_visitor::fix_3src_operand(const src_reg &src)
281 {
282 /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
283 * able to use vertical stride of zero to replicate the vec4 uniform, like
284 *
285 * g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
286 *
287 * But you can't, since vertical stride is always four in three-source
288 * instructions. Instead, insert a MOV instruction to do the replication so
289 * that the three-source instruction can consume it.
290 */
291
292 /* The MOV is only needed if the source is a uniform or immediate. */
293 if (src.file != UNIFORM && src.file != IMM)
294 return src;
295
296 if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle))
297 return src;
298
299 dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
300 expanded.type = src.type;
301 emit(VEC4_OPCODE_UNPACK_UNIFORM, expanded, src);
302 return src_reg(expanded);
303 }
304
305 src_reg
306 vec4_visitor::resolve_source_modifiers(const src_reg &src)
307 {
308 if (!src.abs && !src.negate)
309 return src;
310
311 dst_reg resolved = dst_reg(this, glsl_type::ivec4_type);
312 resolved.type = src.type;
313 emit(MOV(resolved, src));
314
315 return src_reg(resolved);
316 }
317
318 src_reg
319 vec4_visitor::fix_math_operand(const src_reg &src)
320 {
321 if (devinfo->gen < 6 || devinfo->gen >= 8 || src.file == BAD_FILE)
322 return src;
323
324 /* The gen6 math instruction ignores the source modifiers --
325 * swizzle, abs, negate, and at least some parts of the register
326 * region description.
327 *
328 * Rather than trying to enumerate all these cases, *always* expand the
329 * operand to a temp GRF for gen6.
330 *
331 * For gen7, keep the operand as-is, except if immediate, which gen7 still
332 * can't use.
333 */
334
335 if (devinfo->gen == 7 && src.file != IMM)
336 return src;
337
338 dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
339 expanded.type = src.type;
340 emit(MOV(expanded, src));
341 return src_reg(expanded);
342 }
343
344 vec4_instruction *
345 vec4_visitor::emit_math(enum opcode opcode,
346 const dst_reg &dst,
347 const src_reg &src0, const src_reg &src1)
348 {
349 vec4_instruction *math =
350 emit(opcode, dst, fix_math_operand(src0), fix_math_operand(src1));
351
352 if (devinfo->gen == 6 && dst.writemask != WRITEMASK_XYZW) {
353 /* MATH on Gen6 must be align1, so we can't do writemasks. */
354 math->dst = dst_reg(this, glsl_type::vec4_type);
355 math->dst.type = dst.type;
356 math = emit(MOV(dst, src_reg(math->dst)));
357 } else if (devinfo->gen < 6) {
358 math->base_mrf = 1;
359 math->mlen = src1.file == BAD_FILE ? 1 : 2;
360 }
361
362 return math;
363 }
364
365 void
366 vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0)
367 {
368 if (devinfo->gen < 7) {
369 unreachable("ir_unop_pack_half_2x16 should be lowered");
370 }
371
372 assert(dst.type == BRW_REGISTER_TYPE_UD);
373 assert(src0.type == BRW_REGISTER_TYPE_F);
374
375 /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
376 *
377 * Because this instruction does not have a 16-bit floating-point type,
378 * the destination data type must be Word (W).
379 *
380 * The destination must be DWord-aligned and specify a horizontal stride
381 * (HorzStride) of 2. The 16-bit result is stored in the lower word of
382 * each destination channel and the upper word is not modified.
383 *
384 * The above restriction implies that the f32to16 instruction must use
385 * align1 mode, because only in align1 mode is it possible to specify
386 * horizontal stride. We choose here to defy the hardware docs and emit
387 * align16 instructions.
388 *
389 * (I [chadv] did attempt to emit align1 instructions for VS f32to16
390 * instructions. I was partially successful in that the code passed all
391 * tests. However, the code was dubiously correct and fragile, and the
392 * tests were not harsh enough to probe that frailty. Not trusting the
393 * code, I chose instead to remain in align16 mode in defiance of the hw
394 * docs).
395 *
396 * I've [chadv] experimentally confirmed that, on gen7 hardware and the
397 * simulator, emitting a f32to16 in align16 mode with UD as destination
398 * data type is safe. The behavior differs from that specified in the PRM
399 * in that the upper word of each destination channel is cleared to 0.
400 */
401
402 dst_reg tmp_dst(this, glsl_type::uvec2_type);
403 src_reg tmp_src(tmp_dst);
404
405 #if 0
406 /* Verify the undocumented behavior on which the following instructions
407 * rely. If f32to16 fails to clear the upper word of the X and Y channels,
408 * then the result of the bit-or instruction below will be incorrect.
409 *
410 * You should inspect the disasm output in order to verify that the MOV is
411 * not optimized away.
412 */
413 emit(MOV(tmp_dst, src_reg(0x12345678u)));
414 #endif
415
416 /* Give tmp the form below, where "." means untouched.
417 *
418 * w z y x w z y x
419 * |.|.|0x0000hhhh|0x0000llll|.|.|0x0000hhhh|0x0000llll|
420 *
421 * That the upper word of each write-channel be 0 is required for the
422 * following bit-shift and bit-or instructions to work. Note that this
423 * relies on the undocumented hardware behavior mentioned above.
424 */
425 tmp_dst.writemask = WRITEMASK_XY;
426 emit(F32TO16(tmp_dst, src0));
427
428 /* Give the write-channels of dst the form:
429 * 0xhhhh0000
430 */
431 tmp_src.swizzle = BRW_SWIZZLE_YYYY;
432 emit(SHL(dst, tmp_src, src_reg(16u)));
433
434 /* Finally, give the write-channels of dst the form of packHalf2x16's
435 * output:
436 * 0xhhhhllll
437 */
438 tmp_src.swizzle = BRW_SWIZZLE_XXXX;
439 emit(OR(dst, src_reg(dst), tmp_src));
440 }
441
442 void
443 vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0)
444 {
445 if (devinfo->gen < 7) {
446 unreachable("ir_unop_unpack_half_2x16 should be lowered");
447 }
448
449 assert(dst.type == BRW_REGISTER_TYPE_F);
450 assert(src0.type == BRW_REGISTER_TYPE_UD);
451
452 /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
453 *
454 * Because this instruction does not have a 16-bit floating-point type,
455 * the source data type must be Word (W). The destination type must be
456 * F (Float).
457 *
458 * To use W as the source data type, we must adjust horizontal strides,
459 * which is only possible in align1 mode. All my [chadv] attempts at
460 * emitting align1 instructions for unpackHalf2x16 failed to pass the
461 * Piglit tests, so I gave up.
462 *
463 * I've verified that, on gen7 hardware and the simulator, it is safe to
464 * emit f16to32 in align16 mode with UD as source data type.
465 */
466
467 dst_reg tmp_dst(this, glsl_type::uvec2_type);
468 src_reg tmp_src(tmp_dst);
469
470 tmp_dst.writemask = WRITEMASK_X;
471 emit(AND(tmp_dst, src0, src_reg(0xffffu)));
472
473 tmp_dst.writemask = WRITEMASK_Y;
474 emit(SHR(tmp_dst, src0, src_reg(16u)));
475
476 dst.writemask = WRITEMASK_XY;
477 emit(F16TO32(dst, tmp_src));
478 }
479
480 void
481 vec4_visitor::emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0)
482 {
483 /* Instead of splitting the 32-bit integer, shifting, and ORing it back
484 * together, we can shift it by <0, 8, 16, 24>. The packed integer immediate
485 * is not suitable to generate the shift values, but we can use the packed
486 * vector float and a type-converting MOV.
487 */
488 dst_reg shift(this, glsl_type::uvec4_type);
489 emit(MOV(shift, src_reg(0x00, 0x60, 0x70, 0x78)));
490
491 dst_reg shifted(this, glsl_type::uvec4_type);
492 src0.swizzle = BRW_SWIZZLE_XXXX;
493 emit(SHR(shifted, src0, src_reg(shift)));
494
495 shifted.type = BRW_REGISTER_TYPE_UB;
496 dst_reg f(this, glsl_type::vec4_type);
497 emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted));
498
499 emit(MUL(dst, src_reg(f), src_reg(1.0f / 255.0f)));
500 }
501
502 void
503 vec4_visitor::emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0)
504 {
505 /* Instead of splitting the 32-bit integer, shifting, and ORing it back
506 * together, we can shift it by <0, 8, 16, 24>. The packed integer immediate
507 * is not suitable to generate the shift values, but we can use the packed
508 * vector float and a type-converting MOV.
509 */
510 dst_reg shift(this, glsl_type::uvec4_type);
511 emit(MOV(shift, src_reg(0x00, 0x60, 0x70, 0x78)));
512
513 dst_reg shifted(this, glsl_type::uvec4_type);
514 src0.swizzle = BRW_SWIZZLE_XXXX;
515 emit(SHR(shifted, src0, src_reg(shift)));
516
517 shifted.type = BRW_REGISTER_TYPE_B;
518 dst_reg f(this, glsl_type::vec4_type);
519 emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted));
520
521 dst_reg scaled(this, glsl_type::vec4_type);
522 emit(MUL(scaled, src_reg(f), src_reg(1.0f / 127.0f)));
523
524 dst_reg max(this, glsl_type::vec4_type);
525 emit_minmax(BRW_CONDITIONAL_GE, max, src_reg(scaled), src_reg(-1.0f));
526 emit_minmax(BRW_CONDITIONAL_L, dst, src_reg(max), src_reg(1.0f));
527 }
528
529 void
530 vec4_visitor::emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0)
531 {
532 dst_reg saturated(this, glsl_type::vec4_type);
533 vec4_instruction *inst = emit(MOV(saturated, src0));
534 inst->saturate = true;
535
536 dst_reg scaled(this, glsl_type::vec4_type);
537 emit(MUL(scaled, src_reg(saturated), src_reg(255.0f)));
538
539 dst_reg rounded(this, glsl_type::vec4_type);
540 emit(RNDE(rounded, src_reg(scaled)));
541
542 dst_reg u(this, glsl_type::uvec4_type);
543 emit(MOV(u, src_reg(rounded)));
544
545 src_reg bytes(u);
546 emit(VEC4_OPCODE_PACK_BYTES, dst, bytes);
547 }
548
549 void
550 vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0)
551 {
552 dst_reg max(this, glsl_type::vec4_type);
553 emit_minmax(BRW_CONDITIONAL_GE, max, src0, src_reg(-1.0f));
554
555 dst_reg min(this, glsl_type::vec4_type);
556 emit_minmax(BRW_CONDITIONAL_L, min, src_reg(max), src_reg(1.0f));
557
558 dst_reg scaled(this, glsl_type::vec4_type);
559 emit(MUL(scaled, src_reg(min), src_reg(127.0f)));
560
561 dst_reg rounded(this, glsl_type::vec4_type);
562 emit(RNDE(rounded, src_reg(scaled)));
563
564 dst_reg i(this, glsl_type::ivec4_type);
565 emit(MOV(i, src_reg(rounded)));
566
567 src_reg bytes(i);
568 emit(VEC4_OPCODE_PACK_BYTES, dst, bytes);
569 }
570
571 /**
572 * Returns the minimum number of vec4 elements needed to pack a type.
573 *
574 * For simple types, it will return 1 (a single vec4); for matrices, the
575 * number of columns; for array and struct, the sum of the vec4_size of
576 * each of its elements; and for sampler and atomic, zero.
577 *
578 * This method is useful to calculate how much register space is needed to
579 * store a particular type.
580 */
581 extern "C" int
582 type_size_vec4(const struct glsl_type *type)
583 {
584 unsigned int i;
585 int size;
586
587 switch (type->base_type) {
588 case GLSL_TYPE_UINT:
589 case GLSL_TYPE_INT:
590 case GLSL_TYPE_FLOAT:
591 case GLSL_TYPE_BOOL:
592 if (type->is_matrix()) {
593 return type->matrix_columns;
594 } else {
595 /* Regardless of size of vector, it gets a vec4. This is bad
596 * packing for things like floats, but otherwise arrays become a
597 * mess. Hopefully a later pass over the code can pack scalars
598 * down if appropriate.
599 */
600 return 1;
601 }
602 case GLSL_TYPE_ARRAY:
603 assert(type->length > 0);
604 return type_size_vec4(type->fields.array) * type->length;
605 case GLSL_TYPE_STRUCT:
606 size = 0;
607 for (i = 0; i < type->length; i++) {
608 size += type_size_vec4(type->fields.structure[i].type);
609 }
610 return size;
611 case GLSL_TYPE_SUBROUTINE:
612 return 1;
613
614 case GLSL_TYPE_SAMPLER:
615 /* Samplers take up no register space, since they're baked in at
616 * link time.
617 */
618 return 0;
619 case GLSL_TYPE_ATOMIC_UINT:
620 return 0;
621 case GLSL_TYPE_IMAGE:
622 return DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4);
623 case GLSL_TYPE_VOID:
624 case GLSL_TYPE_DOUBLE:
625 case GLSL_TYPE_ERROR:
626 case GLSL_TYPE_INTERFACE:
627 case GLSL_TYPE_FUNCTION:
628 unreachable("not reached");
629 }
630
631 return 0;
632 }
633
634 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
635 {
636 init();
637
638 this->file = GRF;
639 this->reg = v->alloc.allocate(type_size_vec4(type));
640
641 if (type->is_array() || type->is_record()) {
642 this->swizzle = BRW_SWIZZLE_NOOP;
643 } else {
644 this->swizzle = brw_swizzle_for_size(type->vector_elements);
645 }
646
647 this->type = brw_type_for_base_type(type);
648 }
649
650 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size)
651 {
652 assert(size > 0);
653
654 init();
655
656 this->file = GRF;
657 this->reg = v->alloc.allocate(type_size_vec4(type) * size);
658
659 this->swizzle = BRW_SWIZZLE_NOOP;
660
661 this->type = brw_type_for_base_type(type);
662 }
663
664 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
665 {
666 init();
667
668 this->file = GRF;
669 this->reg = v->alloc.allocate(type_size_vec4(type));
670
671 if (type->is_array() || type->is_record()) {
672 this->writemask = WRITEMASK_XYZW;
673 } else {
674 this->writemask = (1 << type->vector_elements) - 1;
675 }
676
677 this->type = brw_type_for_base_type(type);
678 }
679
680 vec4_instruction *
681 vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
682 src_reg src0, src_reg src1)
683 {
684 vec4_instruction *inst;
685
686 if (devinfo->gen >= 6) {
687 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
688 inst->conditional_mod = conditionalmod;
689 } else {
690 emit(CMP(dst, src0, src1, conditionalmod));
691
692 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
693 inst->predicate = BRW_PREDICATE_NORMAL;
694 }
695
696 return inst;
697 }
698
699 vec4_instruction *
700 vec4_visitor::emit_lrp(const dst_reg &dst,
701 const src_reg &x, const src_reg &y, const src_reg &a)
702 {
703 if (devinfo->gen >= 6) {
704 /* Note that the instruction's argument order is reversed from GLSL
705 * and the IR.
706 */
707 return emit(LRP(dst, fix_3src_operand(a), fix_3src_operand(y),
708 fix_3src_operand(x)));
709 } else {
710 /* Earlier generations don't support three source operations, so we
711 * need to emit x*(1-a) + y*a.
712 */
713 dst_reg y_times_a = dst_reg(this, glsl_type::vec4_type);
714 dst_reg one_minus_a = dst_reg(this, glsl_type::vec4_type);
715 dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type);
716 y_times_a.writemask = dst.writemask;
717 one_minus_a.writemask = dst.writemask;
718 x_times_one_minus_a.writemask = dst.writemask;
719
720 emit(MUL(y_times_a, y, a));
721 emit(ADD(one_minus_a, negate(a), src_reg(1.0f)));
722 emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a)));
723 return emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)));
724 }
725 }
726
727 /**
728 * Emits the instructions needed to perform a pull constant load. before_block
729 * and before_inst can be NULL in which case the instruction will be appended
730 * to the end of the instruction list.
731 */
732 void
733 vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
734 src_reg surf_index,
735 src_reg offset_reg,
736 bblock_t *before_block,
737 vec4_instruction *before_inst)
738 {
739 assert((before_inst == NULL && before_block == NULL) ||
740 (before_inst && before_block));
741
742 vec4_instruction *pull;
743
744 if (devinfo->gen >= 9) {
745 /* Gen9+ needs a message header in order to use SIMD4x2 mode */
746 src_reg header(this, glsl_type::uvec4_type, 2);
747
748 pull = new(mem_ctx)
749 vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
750 dst_reg(header));
751
752 if (before_inst)
753 emit_before(before_block, before_inst, pull);
754 else
755 emit(pull);
756
757 dst_reg index_reg = retype(offset(dst_reg(header), 1),
758 offset_reg.type);
759 pull = MOV(writemask(index_reg, WRITEMASK_X), offset_reg);
760
761 if (before_inst)
762 emit_before(before_block, before_inst, pull);
763 else
764 emit(pull);
765
766 pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
767 dst,
768 surf_index,
769 header);
770 pull->mlen = 2;
771 pull->header_size = 1;
772 } else if (devinfo->gen >= 7) {
773 dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
774
775 grf_offset.type = offset_reg.type;
776
777 pull = MOV(grf_offset, offset_reg);
778
779 if (before_inst)
780 emit_before(before_block, before_inst, pull);
781 else
782 emit(pull);
783
784 pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
785 dst,
786 surf_index,
787 src_reg(grf_offset));
788 pull->mlen = 1;
789 } else {
790 pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD,
791 dst,
792 surf_index,
793 offset_reg);
794 pull->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->gen) + 1;
795 pull->mlen = 1;
796 }
797
798 if (before_inst)
799 emit_before(before_block, before_inst, pull);
800 else
801 emit(pull);
802 }
803
804 src_reg
805 vec4_visitor::emit_uniformize(const src_reg &src)
806 {
807 const src_reg chan_index(this, glsl_type::uint_type);
808 const dst_reg dst = retype(dst_reg(this, glsl_type::uint_type),
809 src.type);
810
811 emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, dst_reg(chan_index))
812 ->force_writemask_all = true;
813 emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index)
814 ->force_writemask_all = true;
815
816 return src_reg(dst);
817 }
818
819 src_reg
820 vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type,
821 src_reg coordinate, src_reg sampler)
822 {
823 vec4_instruction *inst =
824 new(mem_ctx) vec4_instruction(SHADER_OPCODE_TXF_MCS,
825 dst_reg(this, glsl_type::uvec4_type));
826 inst->base_mrf = 2;
827 inst->src[1] = sampler;
828
829 int param_base;
830
831 if (devinfo->gen >= 9) {
832 /* Gen9+ needs a message header in order to use SIMD4x2 mode */
833 vec4_instruction *header_inst = new(mem_ctx)
834 vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
835 dst_reg(MRF, inst->base_mrf));
836
837 emit(header_inst);
838
839 inst->mlen = 2;
840 inst->header_size = 1;
841 param_base = inst->base_mrf + 1;
842 } else {
843 inst->mlen = 1;
844 param_base = inst->base_mrf;
845 }
846
847 /* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */
848 int coord_mask = (1 << coordinate_type->vector_elements) - 1;
849 int zero_mask = 0xf & ~coord_mask;
850
851 emit(MOV(dst_reg(MRF, param_base, coordinate_type, coord_mask),
852 coordinate));
853
854 emit(MOV(dst_reg(MRF, param_base, coordinate_type, zero_mask),
855 src_reg(0)));
856
857 emit(inst);
858 return src_reg(inst->dst);
859 }
860
861 bool
862 vec4_visitor::is_high_sampler(src_reg sampler)
863 {
864 if (devinfo->gen < 8 && !devinfo->is_haswell)
865 return false;
866
867 return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16;
868 }
869
870 void
871 vec4_visitor::emit_texture(ir_texture_opcode op,
872 dst_reg dest,
873 const glsl_type *dest_type,
874 src_reg coordinate,
875 int coord_components,
876 src_reg shadow_comparitor,
877 src_reg lod, src_reg lod2,
878 src_reg sample_index,
879 uint32_t constant_offset,
880 src_reg offset_value,
881 src_reg mcs,
882 bool is_cube_array,
883 uint32_t sampler,
884 src_reg sampler_reg)
885 {
886 enum opcode opcode;
887 switch (op) {
888 case ir_tex: opcode = SHADER_OPCODE_TXL; break;
889 case ir_txl: opcode = SHADER_OPCODE_TXL; break;
890 case ir_txd: opcode = SHADER_OPCODE_TXD; break;
891 case ir_txf: opcode = SHADER_OPCODE_TXF; break;
892 case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break;
893 case ir_txs: opcode = SHADER_OPCODE_TXS; break;
894 case ir_tg4: opcode = offset_value.file != BAD_FILE
895 ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
896 case ir_query_levels: opcode = SHADER_OPCODE_TXS; break;
897 case ir_texture_samples: opcode = SHADER_OPCODE_SAMPLEINFO; break;
898 case ir_txb:
899 unreachable("TXB is not valid for vertex shaders.");
900 case ir_lod:
901 unreachable("LOD is not valid for vertex shaders.");
902 default:
903 unreachable("Unrecognized tex op");
904 }
905
906 vec4_instruction *inst = new(mem_ctx) vec4_instruction(
907 opcode, dst_reg(this, dest_type));
908
909 inst->offset = constant_offset;
910
911 /* The message header is necessary for:
912 * - Gen4 (always)
913 * - Gen9+ for selecting SIMD4x2
914 * - Texel offsets
915 * - Gather channel selection
916 * - Sampler indices too large to fit in a 4-bit value.
917 * - Sampleinfo message - takes no parameters, but mlen = 0 is illegal
918 */
919 inst->header_size =
920 (devinfo->gen < 5 || devinfo->gen >= 9 ||
921 inst->offset != 0 || op == ir_tg4 ||
922 op == ir_texture_samples ||
923 is_high_sampler(sampler_reg)) ? 1 : 0;
924 inst->base_mrf = 2;
925 inst->mlen = inst->header_size;
926 inst->dst.writemask = WRITEMASK_XYZW;
927 inst->shadow_compare = shadow_comparitor.file != BAD_FILE;
928
929 inst->src[1] = sampler_reg;
930
931 /* MRF for the first parameter */
932 int param_base = inst->base_mrf + inst->header_size;
933
934 if (op == ir_txs || op == ir_query_levels) {
935 int writemask = devinfo->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
936 emit(MOV(dst_reg(MRF, param_base, lod.type, writemask), lod));
937 inst->mlen++;
938 } else if (op == ir_texture_samples) {
939 inst->dst.writemask = WRITEMASK_X;
940 } else {
941 /* Load the coordinate */
942 /* FINISHME: gl_clamp_mask and saturate */
943 int coord_mask = (1 << coord_components) - 1;
944 int zero_mask = 0xf & ~coord_mask;
945
946 emit(MOV(dst_reg(MRF, param_base, coordinate.type, coord_mask),
947 coordinate));
948 inst->mlen++;
949
950 if (zero_mask != 0) {
951 emit(MOV(dst_reg(MRF, param_base, coordinate.type, zero_mask),
952 src_reg(0)));
953 }
954 /* Load the shadow comparitor */
955 if (shadow_comparitor.file != BAD_FILE && op != ir_txd && (op != ir_tg4 || offset_value.file == BAD_FILE)) {
956 emit(MOV(dst_reg(MRF, param_base + 1, shadow_comparitor.type,
957 WRITEMASK_X),
958 shadow_comparitor));
959 inst->mlen++;
960 }
961
962 /* Load the LOD info */
963 if (op == ir_tex || op == ir_txl) {
964 int mrf, writemask;
965 if (devinfo->gen >= 5) {
966 mrf = param_base + 1;
967 if (shadow_comparitor.file != BAD_FILE) {
968 writemask = WRITEMASK_Y;
969 /* mlen already incremented */
970 } else {
971 writemask = WRITEMASK_X;
972 inst->mlen++;
973 }
974 } else /* devinfo->gen == 4 */ {
975 mrf = param_base;
976 writemask = WRITEMASK_W;
977 }
978 emit(MOV(dst_reg(MRF, mrf, lod.type, writemask), lod));
979 } else if (op == ir_txf) {
980 emit(MOV(dst_reg(MRF, param_base, lod.type, WRITEMASK_W), lod));
981 } else if (op == ir_txf_ms) {
982 emit(MOV(dst_reg(MRF, param_base + 1, sample_index.type, WRITEMASK_X),
983 sample_index));
984 if (devinfo->gen >= 7) {
985 /* MCS data is in the first channel of `mcs`, but we need to get it into
986 * the .y channel of the second vec4 of params, so replicate .x across
987 * the whole vec4 and then mask off everything except .y
988 */
989 mcs.swizzle = BRW_SWIZZLE_XXXX;
990 emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::uint_type, WRITEMASK_Y),
991 mcs));
992 }
993 inst->mlen++;
994 } else if (op == ir_txd) {
995 const brw_reg_type type = lod.type;
996
997 if (devinfo->gen >= 5) {
998 lod.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
999 lod2.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
1000 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), lod));
1001 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), lod2));
1002 inst->mlen++;
1003
1004 if (dest_type->vector_elements == 3 || shadow_comparitor.file != BAD_FILE) {
1005 lod.swizzle = BRW_SWIZZLE_ZZZZ;
1006 lod2.swizzle = BRW_SWIZZLE_ZZZZ;
1007 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), lod));
1008 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), lod2));
1009 inst->mlen++;
1010
1011 if (shadow_comparitor.file != BAD_FILE) {
1012 emit(MOV(dst_reg(MRF, param_base + 2,
1013 shadow_comparitor.type, WRITEMASK_Z),
1014 shadow_comparitor));
1015 }
1016 }
1017 } else /* devinfo->gen == 4 */ {
1018 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), lod));
1019 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), lod2));
1020 inst->mlen += 2;
1021 }
1022 } else if (op == ir_tg4 && offset_value.file != BAD_FILE) {
1023 if (shadow_comparitor.file != BAD_FILE) {
1024 emit(MOV(dst_reg(MRF, param_base, shadow_comparitor.type, WRITEMASK_W),
1025 shadow_comparitor));
1026 }
1027
1028 emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::ivec2_type, WRITEMASK_XY),
1029 offset_value));
1030 inst->mlen++;
1031 }
1032 }
1033
1034 emit(inst);
1035
1036 /* fixup num layers (z) for cube arrays: hardware returns faces * layers;
1037 * spec requires layers.
1038 */
1039 if (op == ir_txs && is_cube_array) {
1040 emit_math(SHADER_OPCODE_INT_QUOTIENT,
1041 writemask(inst->dst, WRITEMASK_Z),
1042 src_reg(inst->dst), src_reg(6));
1043 }
1044
1045 if (devinfo->gen == 6 && op == ir_tg4) {
1046 emit_gen6_gather_wa(key_tex->gen6_gather_wa[sampler], inst->dst);
1047 }
1048
1049 swizzle_result(op, dest,
1050 src_reg(inst->dst), sampler, dest_type);
1051 }
1052
1053 /**
1054 * Apply workarounds for Gen6 gather with UINT/SINT
1055 */
1056 void
1057 vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst)
1058 {
1059 if (!wa)
1060 return;
1061
1062 int width = (wa & WA_8BIT) ? 8 : 16;
1063 dst_reg dst_f = dst;
1064 dst_f.type = BRW_REGISTER_TYPE_F;
1065
1066 /* Convert from UNORM to UINT */
1067 emit(MUL(dst_f, src_reg(dst_f), src_reg((float)((1 << width) - 1))));
1068 emit(MOV(dst, src_reg(dst_f)));
1069
1070 if (wa & WA_SIGN) {
1071 /* Reinterpret the UINT value as a signed INT value by
1072 * shifting the sign bit into place, then shifting back
1073 * preserving sign.
1074 */
1075 emit(SHL(dst, src_reg(dst), src_reg(32 - width)));
1076 emit(ASR(dst, src_reg(dst), src_reg(32 - width)));
1077 }
1078 }
1079
1080 /**
1081 * Set up the gather channel based on the swizzle, for gather4.
1082 */
1083 uint32_t
1084 vec4_visitor::gather_channel(unsigned gather_component, uint32_t sampler)
1085 {
1086 int swiz = GET_SWZ(key_tex->swizzles[sampler], gather_component);
1087 switch (swiz) {
1088 case SWIZZLE_X: return 0;
1089 case SWIZZLE_Y:
1090 /* gather4 sampler is broken for green channel on RG32F --
1091 * we must ask for blue instead.
1092 */
1093 if (key_tex->gather_channel_quirk_mask & (1 << sampler))
1094 return 2;
1095 return 1;
1096 case SWIZZLE_Z: return 2;
1097 case SWIZZLE_W: return 3;
1098 default:
1099 unreachable("Not reached"); /* zero, one swizzles handled already */
1100 }
1101 }
1102
1103 void
1104 vec4_visitor::swizzle_result(ir_texture_opcode op, dst_reg dest,
1105 src_reg orig_val, uint32_t sampler,
1106 const glsl_type *dest_type)
1107 {
1108 int s = key_tex->swizzles[sampler];
1109
1110 dst_reg swizzled_result = dest;
1111
1112 if (op == ir_query_levels) {
1113 /* # levels is in .w */
1114 orig_val.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
1115 emit(MOV(swizzled_result, orig_val));
1116 return;
1117 }
1118
1119 if (op == ir_txs || dest_type == glsl_type::float_type
1120 || s == SWIZZLE_NOOP || op == ir_tg4) {
1121 emit(MOV(swizzled_result, orig_val));
1122 return;
1123 }
1124
1125
1126 int zero_mask = 0, one_mask = 0, copy_mask = 0;
1127 int swizzle[4] = {0};
1128
1129 for (int i = 0; i < 4; i++) {
1130 switch (GET_SWZ(s, i)) {
1131 case SWIZZLE_ZERO:
1132 zero_mask |= (1 << i);
1133 break;
1134 case SWIZZLE_ONE:
1135 one_mask |= (1 << i);
1136 break;
1137 default:
1138 copy_mask |= (1 << i);
1139 swizzle[i] = GET_SWZ(s, i);
1140 break;
1141 }
1142 }
1143
1144 if (copy_mask) {
1145 orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1146 swizzled_result.writemask = copy_mask;
1147 emit(MOV(swizzled_result, orig_val));
1148 }
1149
1150 if (zero_mask) {
1151 swizzled_result.writemask = zero_mask;
1152 emit(MOV(swizzled_result, src_reg(0.0f)));
1153 }
1154
1155 if (one_mask) {
1156 swizzled_result.writemask = one_mask;
1157 emit(MOV(swizzled_result, src_reg(1.0f)));
1158 }
1159 }
1160
1161 void
1162 vec4_visitor::gs_emit_vertex(int stream_id)
1163 {
1164 unreachable("not reached");
1165 }
1166
1167 void
1168 vec4_visitor::gs_end_primitive()
1169 {
1170 unreachable("not reached");
1171 }
1172
1173 void
1174 vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
1175 dst_reg dst, src_reg offset,
1176 src_reg src0, src_reg src1)
1177 {
1178 unsigned mlen = 0;
1179
1180 /* Set the atomic operation offset. */
1181 emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), offset));
1182 mlen++;
1183
1184 /* Set the atomic operation arguments. */
1185 if (src0.file != BAD_FILE) {
1186 emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src0));
1187 mlen++;
1188 }
1189
1190 if (src1.file != BAD_FILE) {
1191 emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src1));
1192 mlen++;
1193 }
1194
1195 /* Emit the instruction. Note that this maps to the normal SIMD8
1196 * untyped atomic message on Ivy Bridge, but that's OK because
1197 * unused channels will be masked out.
1198 */
1199 vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst,
1200 brw_message_reg(0),
1201 src_reg(surf_index), src_reg(atomic_op));
1202 inst->mlen = mlen;
1203 }
1204
1205 void
1206 vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
1207 src_reg offset)
1208 {
1209 /* Set the surface read offset. */
1210 emit(MOV(brw_writemask(brw_uvec_mrf(8, 0, 0), WRITEMASK_X), offset));
1211
1212 /* Emit the instruction. Note that this maps to the normal SIMD8
1213 * untyped surface read message, but that's OK because unused
1214 * channels will be masked out.
1215 */
1216 vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst,
1217 brw_message_reg(0),
1218 src_reg(surf_index), src_reg(1));
1219 inst->mlen = 1;
1220 }
1221
1222 void
1223 vec4_visitor::emit_ndc_computation()
1224 {
1225 if (output_reg[VARYING_SLOT_POS].file == BAD_FILE)
1226 return;
1227
1228 /* Get the position */
1229 src_reg pos = src_reg(output_reg[VARYING_SLOT_POS]);
1230
1231 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
1232 dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
1233 output_reg[BRW_VARYING_SLOT_NDC] = ndc;
1234
1235 current_annotation = "NDC";
1236 dst_reg ndc_w = ndc;
1237 ndc_w.writemask = WRITEMASK_W;
1238 src_reg pos_w = pos;
1239 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
1240 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
1241
1242 dst_reg ndc_xyz = ndc;
1243 ndc_xyz.writemask = WRITEMASK_XYZ;
1244
1245 emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
1246 }
1247
1248 void
1249 vec4_visitor::emit_psiz_and_flags(dst_reg reg)
1250 {
1251 if (devinfo->gen < 6 &&
1252 ((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) ||
1253 output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE ||
1254 devinfo->has_negative_rhw_bug)) {
1255 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
1256 dst_reg header1_w = header1;
1257 header1_w.writemask = WRITEMASK_W;
1258
1259 emit(MOV(header1, 0u));
1260
1261 if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
1262 src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]);
1263
1264 current_annotation = "Point size";
1265 emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
1266 emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
1267 }
1268
1269 if (output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE) {
1270 current_annotation = "Clipping flags";
1271 dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
1272 dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
1273
1274 emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), src_reg(0.0f), BRW_CONDITIONAL_L));
1275 emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, src_reg(0));
1276 emit(OR(header1_w, src_reg(header1_w), src_reg(flags0)));
1277
1278 emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), src_reg(0.0f), BRW_CONDITIONAL_L));
1279 emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, src_reg(0));
1280 emit(SHL(flags1, src_reg(flags1), src_reg(4)));
1281 emit(OR(header1_w, src_reg(header1_w), src_reg(flags1)));
1282 }
1283
1284 /* i965 clipping workaround:
1285 * 1) Test for -ve rhw
1286 * 2) If set,
1287 * set ndc = (0,0,0,0)
1288 * set ucp[6] = 1
1289 *
1290 * Later, clipping will detect ucp[6] and ensure the primitive is
1291 * clipped against all fixed planes.
1292 */
1293 if (devinfo->has_negative_rhw_bug &&
1294 output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE) {
1295 src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]);
1296 ndc_w.swizzle = BRW_SWIZZLE_WWWW;
1297 emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L));
1298 vec4_instruction *inst;
1299 inst = emit(OR(header1_w, src_reg(header1_w), src_reg(1u << 6)));
1300 inst->predicate = BRW_PREDICATE_NORMAL;
1301 output_reg[BRW_VARYING_SLOT_NDC].type = BRW_REGISTER_TYPE_F;
1302 inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f)));
1303 inst->predicate = BRW_PREDICATE_NORMAL;
1304 }
1305
1306 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
1307 } else if (devinfo->gen < 6) {
1308 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
1309 } else {
1310 emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
1311 if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
1312 dst_reg reg_w = reg;
1313 reg_w.writemask = WRITEMASK_W;
1314 src_reg reg_as_src = src_reg(output_reg[VARYING_SLOT_PSIZ]);
1315 reg_as_src.type = reg_w.type;
1316 reg_as_src.swizzle = brw_swizzle_for_size(1);
1317 emit(MOV(reg_w, reg_as_src));
1318 }
1319 if (prog_data->vue_map.slots_valid & VARYING_BIT_LAYER) {
1320 dst_reg reg_y = reg;
1321 reg_y.writemask = WRITEMASK_Y;
1322 reg_y.type = BRW_REGISTER_TYPE_D;
1323 output_reg[VARYING_SLOT_LAYER].type = reg_y.type;
1324 emit(MOV(reg_y, src_reg(output_reg[VARYING_SLOT_LAYER])));
1325 }
1326 if (prog_data->vue_map.slots_valid & VARYING_BIT_VIEWPORT) {
1327 dst_reg reg_z = reg;
1328 reg_z.writemask = WRITEMASK_Z;
1329 reg_z.type = BRW_REGISTER_TYPE_D;
1330 output_reg[VARYING_SLOT_VIEWPORT].type = reg_z.type;
1331 emit(MOV(reg_z, src_reg(output_reg[VARYING_SLOT_VIEWPORT])));
1332 }
1333 }
1334 }
1335
1336 vec4_instruction *
1337 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
1338 {
1339 assert(varying < VARYING_SLOT_MAX);
1340 assert(output_reg[varying].type == reg.type);
1341 current_annotation = output_reg_annotation[varying];
1342 if (output_reg[varying].file != BAD_FILE)
1343 return emit(MOV(reg, src_reg(output_reg[varying])));
1344 else
1345 return NULL;
1346 }
1347
1348 void
1349 vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
1350 {
1351 reg.type = BRW_REGISTER_TYPE_F;
1352 output_reg[varying].type = reg.type;
1353
1354 switch (varying) {
1355 case VARYING_SLOT_PSIZ:
1356 {
1357 /* PSIZ is always in slot 0, and is coupled with other flags. */
1358 current_annotation = "indices, point width, clip flags";
1359 emit_psiz_and_flags(reg);
1360 break;
1361 }
1362 case BRW_VARYING_SLOT_NDC:
1363 current_annotation = "NDC";
1364 if (output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE)
1365 emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
1366 break;
1367 case VARYING_SLOT_POS:
1368 current_annotation = "gl_Position";
1369 if (output_reg[VARYING_SLOT_POS].file != BAD_FILE)
1370 emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
1371 break;
1372 case VARYING_SLOT_EDGE:
1373 /* This is present when doing unfilled polygons. We're supposed to copy
1374 * the edge flag from the user-provided vertex array
1375 * (glEdgeFlagPointer), or otherwise we'll copy from the current value
1376 * of that attribute (starts as 1.0f). This is then used in clipping to
1377 * determine which edges should be drawn as wireframe.
1378 */
1379 current_annotation = "edge flag";
1380 emit(MOV(reg, src_reg(dst_reg(ATTR, VERT_ATTRIB_EDGEFLAG,
1381 glsl_type::float_type, WRITEMASK_XYZW))));
1382 break;
1383 case BRW_VARYING_SLOT_PAD:
1384 /* No need to write to this slot */
1385 break;
1386 default:
1387 emit_generic_urb_slot(reg, varying);
1388 break;
1389 }
1390 }
1391
1392 static int
1393 align_interleaved_urb_mlen(const struct brw_device_info *devinfo, int mlen)
1394 {
1395 if (devinfo->gen >= 6) {
1396 /* URB data written (does not include the message header reg) must
1397 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
1398 * section 5.4.3.2.2: URB_INTERLEAVED.
1399 *
1400 * URB entries are allocated on a multiple of 1024 bits, so an
1401 * extra 128 bits written here to make the end align to 256 is
1402 * no problem.
1403 */
1404 if ((mlen % 2) != 1)
1405 mlen++;
1406 }
1407
1408 return mlen;
1409 }
1410
1411
1412 /**
1413 * Generates the VUE payload plus the necessary URB write instructions to
1414 * output it.
1415 *
1416 * The VUE layout is documented in Volume 2a.
1417 */
1418 void
1419 vec4_visitor::emit_vertex()
1420 {
1421 /* MRF 0 is reserved for the debugger, so start with message header
1422 * in MRF 1.
1423 */
1424 int base_mrf = 1;
1425 int mrf = base_mrf;
1426 /* In the process of generating our URB write message contents, we
1427 * may need to unspill a register or load from an array. Those
1428 * reads would use MRFs 14-15.
1429 */
1430 int max_usable_mrf = FIRST_SPILL_MRF(devinfo->gen);
1431
1432 /* The following assertion verifies that max_usable_mrf causes an
1433 * even-numbered amount of URB write data, which will meet gen6's
1434 * requirements for length alignment.
1435 */
1436 assert ((max_usable_mrf - base_mrf) % 2 == 0);
1437
1438 /* First mrf is the g0-based message header containing URB handles and
1439 * such.
1440 */
1441 emit_urb_write_header(mrf++);
1442
1443 if (devinfo->gen < 6) {
1444 emit_ndc_computation();
1445 }
1446
1447 /* We may need to split this up into several URB writes, so do them in a
1448 * loop.
1449 */
1450 int slot = 0;
1451 bool complete = false;
1452 do {
1453 /* URB offset is in URB row increments, and each of our MRFs is half of
1454 * one of those, since we're doing interleaved writes.
1455 */
1456 int offset = slot / 2;
1457
1458 mrf = base_mrf + 1;
1459 for (; slot < prog_data->vue_map.num_slots; ++slot) {
1460 emit_urb_slot(dst_reg(MRF, mrf++),
1461 prog_data->vue_map.slot_to_varying[slot]);
1462
1463 /* If this was max_usable_mrf, we can't fit anything more into this
1464 * URB WRITE. Same thing if we reached the maximum length available.
1465 */
1466 if (mrf > max_usable_mrf ||
1467 align_interleaved_urb_mlen(devinfo, mrf - base_mrf + 1) > BRW_MAX_MSG_LENGTH) {
1468 slot++;
1469 break;
1470 }
1471 }
1472
1473 complete = slot >= prog_data->vue_map.num_slots;
1474 current_annotation = "URB write";
1475 vec4_instruction *inst = emit_urb_write_opcode(complete);
1476 inst->base_mrf = base_mrf;
1477 inst->mlen = align_interleaved_urb_mlen(devinfo, mrf - base_mrf);
1478 inst->offset += offset;
1479 } while(!complete);
1480 }
1481
1482
1483 src_reg
1484 vec4_visitor::get_scratch_offset(bblock_t *block, vec4_instruction *inst,
1485 src_reg *reladdr, int reg_offset)
1486 {
1487 /* Because we store the values to scratch interleaved like our
1488 * vertex data, we need to scale the vec4 index by 2.
1489 */
1490 int message_header_scale = 2;
1491
1492 /* Pre-gen6, the message header uses byte offsets instead of vec4
1493 * (16-byte) offset units.
1494 */
1495 if (devinfo->gen < 6)
1496 message_header_scale *= 16;
1497
1498 if (reladdr) {
1499 src_reg index = src_reg(this, glsl_type::int_type);
1500
1501 emit_before(block, inst, ADD(dst_reg(index), *reladdr,
1502 src_reg(reg_offset)));
1503 emit_before(block, inst, MUL(dst_reg(index), index,
1504 src_reg(message_header_scale)));
1505
1506 return index;
1507 } else {
1508 return src_reg(reg_offset * message_header_scale);
1509 }
1510 }
1511
1512 src_reg
1513 vec4_visitor::get_pull_constant_offset(bblock_t * block, vec4_instruction *inst,
1514 src_reg *reladdr, int reg_offset)
1515 {
1516 if (reladdr) {
1517 src_reg index = src_reg(this, glsl_type::int_type);
1518
1519 emit_before(block, inst, ADD(dst_reg(index), *reladdr,
1520 src_reg(reg_offset)));
1521
1522 /* Pre-gen6, the message header uses byte offsets instead of vec4
1523 * (16-byte) offset units.
1524 */
1525 if (devinfo->gen < 6) {
1526 emit_before(block, inst, MUL(dst_reg(index), index, src_reg(16)));
1527 }
1528
1529 return index;
1530 } else if (devinfo->gen >= 8) {
1531 /* Store the offset in a GRF so we can send-from-GRF. */
1532 src_reg offset = src_reg(this, glsl_type::int_type);
1533 emit_before(block, inst, MOV(dst_reg(offset), src_reg(reg_offset)));
1534 return offset;
1535 } else {
1536 int message_header_scale = devinfo->gen < 6 ? 16 : 1;
1537 return src_reg(reg_offset * message_header_scale);
1538 }
1539 }
1540
1541 /**
1542 * Emits an instruction before @inst to load the value named by @orig_src
1543 * from scratch space at @base_offset to @temp.
1544 *
1545 * @base_offset is measured in 32-byte units (the size of a register).
1546 */
1547 void
1548 vec4_visitor::emit_scratch_read(bblock_t *block, vec4_instruction *inst,
1549 dst_reg temp, src_reg orig_src,
1550 int base_offset)
1551 {
1552 int reg_offset = base_offset + orig_src.reg_offset;
1553 src_reg index = get_scratch_offset(block, inst, orig_src.reladdr,
1554 reg_offset);
1555
1556 emit_before(block, inst, SCRATCH_READ(temp, index));
1557 }
1558
1559 /**
1560 * Emits an instruction after @inst to store the value to be written
1561 * to @orig_dst to scratch space at @base_offset, from @temp.
1562 *
1563 * @base_offset is measured in 32-byte units (the size of a register).
1564 */
1565 void
1566 vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst,
1567 int base_offset)
1568 {
1569 int reg_offset = base_offset + inst->dst.reg_offset;
1570 src_reg index = get_scratch_offset(block, inst, inst->dst.reladdr,
1571 reg_offset);
1572
1573 /* Create a temporary register to store *inst's result in.
1574 *
1575 * We have to be careful in MOVing from our temporary result register in
1576 * the scratch write. If we swizzle from channels of the temporary that
1577 * weren't initialized, it will confuse live interval analysis, which will
1578 * make spilling fail to make progress.
1579 */
1580 const src_reg temp = swizzle(retype(src_reg(this, glsl_type::vec4_type),
1581 inst->dst.type),
1582 brw_swizzle_for_mask(inst->dst.writemask));
1583 dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
1584 inst->dst.writemask));
1585 vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
1586 if (inst->opcode != BRW_OPCODE_SEL)
1587 write->predicate = inst->predicate;
1588 write->ir = inst->ir;
1589 write->annotation = inst->annotation;
1590 inst->insert_after(block, write);
1591
1592 inst->dst.file = temp.file;
1593 inst->dst.reg = temp.reg;
1594 inst->dst.reg_offset = temp.reg_offset;
1595 inst->dst.reladdr = NULL;
1596 }
1597
1598 /**
1599 * Checks if \p src and/or \p src.reladdr require a scratch read, and if so,
1600 * adds the scratch read(s) before \p inst. The function also checks for
1601 * recursive reladdr scratch accesses, issuing the corresponding scratch
1602 * loads and rewriting reladdr references accordingly.
1603 *
1604 * \return \p src if it did not require a scratch load, otherwise, the
1605 * register holding the result of the scratch load that the caller should
1606 * use to rewrite src.
1607 */
1608 src_reg
1609 vec4_visitor::emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
1610 vec4_instruction *inst, src_reg src)
1611 {
1612 /* Resolve recursive reladdr scratch access by calling ourselves
1613 * with src.reladdr
1614 */
1615 if (src.reladdr)
1616 *src.reladdr = emit_resolve_reladdr(scratch_loc, block, inst,
1617 *src.reladdr);
1618
1619 /* Now handle scratch access on src */
1620 if (src.file == GRF && scratch_loc[src.reg] != -1) {
1621 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
1622 emit_scratch_read(block, inst, temp, src, scratch_loc[src.reg]);
1623 src.reg = temp.reg;
1624 src.reg_offset = temp.reg_offset;
1625 src.reladdr = NULL;
1626 }
1627
1628 return src;
1629 }
1630
1631 /**
1632 * We can't generally support array access in GRF space, because a
1633 * single instruction's destination can only span 2 contiguous
1634 * registers. So, we send all GRF arrays that get variable index
1635 * access to scratch space.
1636 */
1637 void
1638 vec4_visitor::move_grf_array_access_to_scratch()
1639 {
1640 int scratch_loc[this->alloc.count];
1641 memset(scratch_loc, -1, sizeof(scratch_loc));
1642
1643 /* First, calculate the set of virtual GRFs that need to be punted
1644 * to scratch due to having any array access on them, and where in
1645 * scratch.
1646 */
1647 foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
1648 if (inst->dst.file == GRF && inst->dst.reladdr) {
1649 if (scratch_loc[inst->dst.reg] == -1) {
1650 scratch_loc[inst->dst.reg] = last_scratch;
1651 last_scratch += this->alloc.sizes[inst->dst.reg];
1652 }
1653
1654 for (src_reg *iter = inst->dst.reladdr;
1655 iter->reladdr;
1656 iter = iter->reladdr) {
1657 if (iter->file == GRF && scratch_loc[iter->reg] == -1) {
1658 scratch_loc[iter->reg] = last_scratch;
1659 last_scratch += this->alloc.sizes[iter->reg];
1660 }
1661 }
1662 }
1663
1664 for (int i = 0 ; i < 3; i++) {
1665 for (src_reg *iter = &inst->src[i];
1666 iter->reladdr;
1667 iter = iter->reladdr) {
1668 if (iter->file == GRF && scratch_loc[iter->reg] == -1) {
1669 scratch_loc[iter->reg] = last_scratch;
1670 last_scratch += this->alloc.sizes[iter->reg];
1671 }
1672 }
1673 }
1674 }
1675
1676 /* Now, for anything that will be accessed through scratch, rewrite
1677 * it to load/store. Note that this is a _safe list walk, because
1678 * we may generate a new scratch_write instruction after the one
1679 * we're processing.
1680 */
1681 foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
1682 /* Set up the annotation tracking for new generated instructions. */
1683 base_ir = inst->ir;
1684 current_annotation = inst->annotation;
1685
1686 /* First handle scratch access on the dst. Notice we have to handle
1687 * the case where the dst's reladdr also points to scratch space.
1688 */
1689 if (inst->dst.reladdr)
1690 *inst->dst.reladdr = emit_resolve_reladdr(scratch_loc, block, inst,
1691 *inst->dst.reladdr);
1692
1693 /* Now that we have handled any (possibly recursive) reladdr scratch
1694 * accesses for dst we can safely do the scratch write for dst itself
1695 */
1696 if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1)
1697 emit_scratch_write(block, inst, scratch_loc[inst->dst.reg]);
1698
1699 /* Now handle scratch access on any src. In this case, since inst->src[i]
1700 * already is a src_reg, we can just call emit_resolve_reladdr with
1701 * inst->src[i] and it will take care of handling scratch loads for
1702 * both src and src.reladdr (recursively).
1703 */
1704 for (int i = 0 ; i < 3; i++) {
1705 inst->src[i] = emit_resolve_reladdr(scratch_loc, block, inst,
1706 inst->src[i]);
1707 }
1708 }
1709 }
1710
1711 /**
1712 * Emits an instruction before @inst to load the value named by @orig_src
1713 * from the pull constant buffer (surface) at @base_offset to @temp.
1714 */
1715 void
1716 vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
1717 dst_reg temp, src_reg orig_src,
1718 int base_offset)
1719 {
1720 int reg_offset = base_offset + orig_src.reg_offset;
1721 src_reg index = src_reg(prog_data->base.binding_table.pull_constants_start);
1722 src_reg offset = get_pull_constant_offset(block, inst, orig_src.reladdr,
1723 reg_offset);
1724
1725 emit_pull_constant_load_reg(temp,
1726 index,
1727 offset,
1728 block, inst);
1729 }
1730
1731 /**
1732 * Implements array access of uniforms by inserting a
1733 * PULL_CONSTANT_LOAD instruction.
1734 *
1735 * Unlike temporary GRF array access (where we don't support it due to
1736 * the difficulty of doing relative addressing on instruction
1737 * destinations), we could potentially do array access of uniforms
1738 * that were loaded in GRF space as push constants. In real-world
1739 * usage we've seen, though, the arrays being used are always larger
1740 * than we could load as push constants, so just always move all
1741 * uniform array access out to a pull constant buffer.
1742 */
1743 void
1744 vec4_visitor::move_uniform_array_access_to_pull_constants()
1745 {
1746 int pull_constant_loc[this->uniforms];
1747 memset(pull_constant_loc, -1, sizeof(pull_constant_loc));
1748 bool nested_reladdr;
1749
1750 /* Walk through and find array access of uniforms. Put a copy of that
1751 * uniform in the pull constant buffer.
1752 *
1753 * Note that we don't move constant-indexed accesses to arrays. No
1754 * testing has been done of the performance impact of this choice.
1755 */
1756 do {
1757 nested_reladdr = false;
1758
1759 foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
1760 for (int i = 0 ; i < 3; i++) {
1761 if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
1762 continue;
1763
1764 int uniform = inst->src[i].reg;
1765
1766 if (inst->src[i].reladdr->reladdr)
1767 nested_reladdr = true; /* will need another pass */
1768
1769 /* If this array isn't already present in the pull constant buffer,
1770 * add it.
1771 */
1772 if (pull_constant_loc[uniform] == -1) {
1773 const gl_constant_value **values =
1774 &stage_prog_data->param[uniform * 4];
1775
1776 pull_constant_loc[uniform] = stage_prog_data->nr_pull_params / 4;
1777
1778 assert(uniform < uniform_array_size);
1779 for (int j = 0; j < uniform_size[uniform] * 4; j++) {
1780 stage_prog_data->pull_param[stage_prog_data->nr_pull_params++]
1781 = values[j];
1782 }
1783 }
1784
1785 /* Set up the annotation tracking for new generated instructions. */
1786 base_ir = inst->ir;
1787 current_annotation = inst->annotation;
1788
1789 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
1790
1791 emit_pull_constant_load(block, inst, temp, inst->src[i],
1792 pull_constant_loc[uniform]);
1793
1794 inst->src[i].file = temp.file;
1795 inst->src[i].reg = temp.reg;
1796 inst->src[i].reg_offset = temp.reg_offset;
1797 inst->src[i].reladdr = NULL;
1798 }
1799 }
1800 } while (nested_reladdr);
1801
1802 /* Now there are no accesses of the UNIFORM file with a reladdr, so
1803 * no need to track them as larger-than-vec4 objects. This will be
1804 * relied on in cutting out unused uniform vectors from push
1805 * constants.
1806 */
1807 split_uniform_registers();
1808 }
1809
1810 void
1811 vec4_visitor::resolve_ud_negate(src_reg *reg)
1812 {
1813 if (reg->type != BRW_REGISTER_TYPE_UD ||
1814 !reg->negate)
1815 return;
1816
1817 src_reg temp = src_reg(this, glsl_type::uvec4_type);
1818 emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
1819 *reg = temp;
1820 }
1821
1822 vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
1823 void *log_data,
1824 const struct brw_sampler_prog_key_data *key_tex,
1825 struct brw_vue_prog_data *prog_data,
1826 const nir_shader *shader,
1827 void *mem_ctx,
1828 bool no_spills,
1829 int shader_time_index)
1830 : backend_shader(compiler, log_data, mem_ctx, shader, &prog_data->base),
1831 key_tex(key_tex),
1832 prog_data(prog_data),
1833 fail_msg(NULL),
1834 first_non_payload_grf(0),
1835 need_all_constants_in_pull_buffer(false),
1836 no_spills(no_spills),
1837 shader_time_index(shader_time_index),
1838 last_scratch(0)
1839 {
1840 this->failed = false;
1841
1842 this->base_ir = NULL;
1843 this->current_annotation = NULL;
1844 memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
1845
1846 this->virtual_grf_start = NULL;
1847 this->virtual_grf_end = NULL;
1848 this->live_intervals = NULL;
1849
1850 this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
1851
1852 this->uniforms = 0;
1853
1854 /* Initialize uniform_array_size to at least 1 because pre-gen6 VS requires
1855 * at least one. See setup_uniforms() in brw_vec4.cpp.
1856 */
1857 this->uniform_array_size = 1;
1858 if (prog_data) {
1859 this->uniform_array_size =
1860 MAX2(DIV_ROUND_UP(stage_prog_data->nr_params, 4), 1);
1861 }
1862
1863 this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
1864 }
1865
1866 vec4_visitor::~vec4_visitor()
1867 {
1868 }
1869
1870
1871 void
1872 vec4_visitor::fail(const char *format, ...)
1873 {
1874 va_list va;
1875 char *msg;
1876
1877 if (failed)
1878 return;
1879
1880 failed = true;
1881
1882 va_start(va, format);
1883 msg = ralloc_vasprintf(mem_ctx, format, va);
1884 va_end(va);
1885 msg = ralloc_asprintf(mem_ctx, "%s compile failed: %s\n", stage_abbrev, msg);
1886
1887 this->fail_msg = msg;
1888
1889 if (debug_enabled) {
1890 fprintf(stderr, "%s", msg);
1891 }
1892 }
1893
1894 } /* namespace brw */