i965: Emit MOVs for neg/abs.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_visitor.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25 #include "glsl/ir_uniform.h"
26 extern "C" {
27 #include "main/context.h"
28 #include "main/macros.h"
29 #include "program/prog_parameter.h"
30 #include "program/sampler.h"
31 }
32
33 namespace brw {
34
35 vec4_instruction::vec4_instruction(vec4_visitor *v,
36 enum opcode opcode, dst_reg dst,
37 src_reg src0, src_reg src1, src_reg src2)
38 {
39 this->opcode = opcode;
40 this->dst = dst;
41 this->src[0] = src0;
42 this->src[1] = src1;
43 this->src[2] = src2;
44 this->ir = v->base_ir;
45 this->annotation = v->current_annotation;
46 }
47
48 vec4_instruction *
49 vec4_visitor::emit(vec4_instruction *inst)
50 {
51 this->instructions.push_tail(inst);
52
53 return inst;
54 }
55
56 vec4_instruction *
57 vec4_visitor::emit_before(vec4_instruction *inst, vec4_instruction *new_inst)
58 {
59 new_inst->ir = inst->ir;
60 new_inst->annotation = inst->annotation;
61
62 inst->insert_before(new_inst);
63
64 return inst;
65 }
66
67 vec4_instruction *
68 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
69 src_reg src0, src_reg src1, src_reg src2)
70 {
71 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
72 src0, src1, src2));
73 }
74
75
76 vec4_instruction *
77 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
78 {
79 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
80 }
81
82 vec4_instruction *
83 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
84 {
85 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
86 }
87
88 vec4_instruction *
89 vec4_visitor::emit(enum opcode opcode)
90 {
91 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
92 }
93
94 #define ALU1(op) \
95 vec4_instruction * \
96 vec4_visitor::op(dst_reg dst, src_reg src0) \
97 { \
98 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
99 src0); \
100 }
101
102 #define ALU2(op) \
103 vec4_instruction * \
104 vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \
105 { \
106 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
107 src0, src1); \
108 }
109
110 #define ALU3(op) \
111 vec4_instruction * \
112 vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1, src_reg src2)\
113 { \
114 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
115 src0, src1, src2); \
116 }
117
118 ALU1(NOT)
119 ALU1(MOV)
120 ALU1(FRC)
121 ALU1(RNDD)
122 ALU1(RNDE)
123 ALU1(RNDZ)
124 ALU1(F32TO16)
125 ALU1(F16TO32)
126 ALU2(ADD)
127 ALU2(MUL)
128 ALU2(MACH)
129 ALU2(AND)
130 ALU2(OR)
131 ALU2(XOR)
132 ALU2(DP3)
133 ALU2(DP4)
134 ALU2(DPH)
135 ALU2(SHL)
136 ALU2(SHR)
137 ALU2(ASR)
138 ALU3(LRP)
139 ALU1(BFREV)
140 ALU3(BFE)
141 ALU2(BFI1)
142 ALU3(BFI2)
143 ALU1(FBH)
144 ALU1(FBL)
145 ALU1(CBIT)
146
147 /** Gen4 predicated IF. */
148 vec4_instruction *
149 vec4_visitor::IF(uint32_t predicate)
150 {
151 vec4_instruction *inst;
152
153 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF);
154 inst->predicate = predicate;
155
156 return inst;
157 }
158
159 /** Gen6+ IF with embedded comparison. */
160 vec4_instruction *
161 vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition)
162 {
163 assert(brw->gen >= 6);
164
165 vec4_instruction *inst;
166
167 resolve_ud_negate(&src0);
168 resolve_ud_negate(&src1);
169
170 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF, dst_null_d(),
171 src0, src1);
172 inst->conditional_mod = condition;
173
174 return inst;
175 }
176
177 /**
178 * CMP: Sets the low bit of the destination channels with the result
179 * of the comparison, while the upper bits are undefined, and updates
180 * the flag register with the packed 16 bits of the result.
181 */
182 vec4_instruction *
183 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition)
184 {
185 vec4_instruction *inst;
186
187 /* original gen4 does type conversion to the destination type
188 * before before comparison, producing garbage results for floating
189 * point comparisons.
190 */
191 if (brw->gen == 4) {
192 dst.type = src0.type;
193 if (dst.file == HW_REG)
194 dst.fixed_hw_reg.type = dst.type;
195 }
196
197 resolve_ud_negate(&src0);
198 resolve_ud_negate(&src1);
199
200 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_CMP, dst, src0, src1);
201 inst->conditional_mod = condition;
202
203 return inst;
204 }
205
206 vec4_instruction *
207 vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
208 {
209 vec4_instruction *inst;
210
211 inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_READ,
212 dst, index);
213 inst->base_mrf = 14;
214 inst->mlen = 2;
215
216 return inst;
217 }
218
219 vec4_instruction *
220 vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index)
221 {
222 vec4_instruction *inst;
223
224 inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_WRITE,
225 dst, src, index);
226 inst->base_mrf = 13;
227 inst->mlen = 3;
228
229 return inst;
230 }
231
232 void
233 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
234 {
235 static enum opcode dot_opcodes[] = {
236 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
237 };
238
239 emit(dot_opcodes[elements - 2], dst, src0, src1);
240 }
241
242 src_reg
243 vec4_visitor::fix_3src_operand(src_reg src)
244 {
245 /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
246 * able to use vertical stride of zero to replicate the vec4 uniform, like
247 *
248 * g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
249 *
250 * But you can't, since vertical stride is always four in three-source
251 * instructions. Instead, insert a MOV instruction to do the replication so
252 * that the three-source instruction can consume it.
253 */
254
255 /* The MOV is only needed if the source is a uniform or immediate. */
256 if (src.file != UNIFORM && src.file != IMM)
257 return src;
258
259 dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
260 expanded.type = src.type;
261 emit(MOV(expanded, src));
262 return src_reg(expanded);
263 }
264
265 src_reg
266 vec4_visitor::fix_math_operand(src_reg src)
267 {
268 /* The gen6 math instruction ignores the source modifiers --
269 * swizzle, abs, negate, and at least some parts of the register
270 * region description.
271 *
272 * Rather than trying to enumerate all these cases, *always* expand the
273 * operand to a temp GRF for gen6.
274 *
275 * For gen7, keep the operand as-is, except if immediate, which gen7 still
276 * can't use.
277 */
278
279 if (brw->gen == 7 && src.file != IMM)
280 return src;
281
282 dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
283 expanded.type = src.type;
284 emit(MOV(expanded, src));
285 return src_reg(expanded);
286 }
287
288 void
289 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
290 {
291 src = fix_math_operand(src);
292
293 if (dst.writemask != WRITEMASK_XYZW) {
294 /* The gen6 math instruction must be align1, so we can't do
295 * writemasks.
296 */
297 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
298
299 emit(opcode, temp_dst, src);
300
301 emit(MOV(dst, src_reg(temp_dst)));
302 } else {
303 emit(opcode, dst, src);
304 }
305 }
306
307 void
308 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
309 {
310 vec4_instruction *inst = emit(opcode, dst, src);
311 inst->base_mrf = 1;
312 inst->mlen = 1;
313 }
314
315 void
316 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
317 {
318 switch (opcode) {
319 case SHADER_OPCODE_RCP:
320 case SHADER_OPCODE_RSQ:
321 case SHADER_OPCODE_SQRT:
322 case SHADER_OPCODE_EXP2:
323 case SHADER_OPCODE_LOG2:
324 case SHADER_OPCODE_SIN:
325 case SHADER_OPCODE_COS:
326 break;
327 default:
328 assert(!"not reached: bad math opcode");
329 return;
330 }
331
332 if (brw->gen >= 6) {
333 return emit_math1_gen6(opcode, dst, src);
334 } else {
335 return emit_math1_gen4(opcode, dst, src);
336 }
337 }
338
339 void
340 vec4_visitor::emit_math2_gen6(enum opcode opcode,
341 dst_reg dst, src_reg src0, src_reg src1)
342 {
343 src0 = fix_math_operand(src0);
344 src1 = fix_math_operand(src1);
345
346 if (dst.writemask != WRITEMASK_XYZW) {
347 /* The gen6 math instruction must be align1, so we can't do
348 * writemasks.
349 */
350 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
351 temp_dst.type = dst.type;
352
353 emit(opcode, temp_dst, src0, src1);
354
355 emit(MOV(dst, src_reg(temp_dst)));
356 } else {
357 emit(opcode, dst, src0, src1);
358 }
359 }
360
361 void
362 vec4_visitor::emit_math2_gen4(enum opcode opcode,
363 dst_reg dst, src_reg src0, src_reg src1)
364 {
365 vec4_instruction *inst = emit(opcode, dst, src0, src1);
366 inst->base_mrf = 1;
367 inst->mlen = 2;
368 }
369
370 void
371 vec4_visitor::emit_math(enum opcode opcode,
372 dst_reg dst, src_reg src0, src_reg src1)
373 {
374 switch (opcode) {
375 case SHADER_OPCODE_POW:
376 case SHADER_OPCODE_INT_QUOTIENT:
377 case SHADER_OPCODE_INT_REMAINDER:
378 break;
379 default:
380 assert(!"not reached: unsupported binary math opcode");
381 return;
382 }
383
384 if (brw->gen >= 6) {
385 return emit_math2_gen6(opcode, dst, src0, src1);
386 } else {
387 return emit_math2_gen4(opcode, dst, src0, src1);
388 }
389 }
390
391 void
392 vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0)
393 {
394 if (brw->gen < 7)
395 assert(!"ir_unop_pack_half_2x16 should be lowered");
396
397 assert(dst.type == BRW_REGISTER_TYPE_UD);
398 assert(src0.type == BRW_REGISTER_TYPE_F);
399
400 /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
401 *
402 * Because this instruction does not have a 16-bit floating-point type,
403 * the destination data type must be Word (W).
404 *
405 * The destination must be DWord-aligned and specify a horizontal stride
406 * (HorzStride) of 2. The 16-bit result is stored in the lower word of
407 * each destination channel and the upper word is not modified.
408 *
409 * The above restriction implies that the f32to16 instruction must use
410 * align1 mode, because only in align1 mode is it possible to specify
411 * horizontal stride. We choose here to defy the hardware docs and emit
412 * align16 instructions.
413 *
414 * (I [chadv] did attempt to emit align1 instructions for VS f32to16
415 * instructions. I was partially successful in that the code passed all
416 * tests. However, the code was dubiously correct and fragile, and the
417 * tests were not harsh enough to probe that frailty. Not trusting the
418 * code, I chose instead to remain in align16 mode in defiance of the hw
419 * docs).
420 *
421 * I've [chadv] experimentally confirmed that, on gen7 hardware and the
422 * simulator, emitting a f32to16 in align16 mode with UD as destination
423 * data type is safe. The behavior differs from that specified in the PRM
424 * in that the upper word of each destination channel is cleared to 0.
425 */
426
427 dst_reg tmp_dst(this, glsl_type::uvec2_type);
428 src_reg tmp_src(tmp_dst);
429
430 #if 0
431 /* Verify the undocumented behavior on which the following instructions
432 * rely. If f32to16 fails to clear the upper word of the X and Y channels,
433 * then the result of the bit-or instruction below will be incorrect.
434 *
435 * You should inspect the disasm output in order to verify that the MOV is
436 * not optimized away.
437 */
438 emit(MOV(tmp_dst, src_reg(0x12345678u)));
439 #endif
440
441 /* Give tmp the form below, where "." means untouched.
442 *
443 * w z y x w z y x
444 * |.|.|0x0000hhhh|0x0000llll|.|.|0x0000hhhh|0x0000llll|
445 *
446 * That the upper word of each write-channel be 0 is required for the
447 * following bit-shift and bit-or instructions to work. Note that this
448 * relies on the undocumented hardware behavior mentioned above.
449 */
450 tmp_dst.writemask = WRITEMASK_XY;
451 emit(F32TO16(tmp_dst, src0));
452
453 /* Give the write-channels of dst the form:
454 * 0xhhhh0000
455 */
456 tmp_src.swizzle = SWIZZLE_Y;
457 emit(SHL(dst, tmp_src, src_reg(16u)));
458
459 /* Finally, give the write-channels of dst the form of packHalf2x16's
460 * output:
461 * 0xhhhhllll
462 */
463 tmp_src.swizzle = SWIZZLE_X;
464 emit(OR(dst, src_reg(dst), tmp_src));
465 }
466
467 void
468 vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0)
469 {
470 if (brw->gen < 7)
471 assert(!"ir_unop_unpack_half_2x16 should be lowered");
472
473 assert(dst.type == BRW_REGISTER_TYPE_F);
474 assert(src0.type == BRW_REGISTER_TYPE_UD);
475
476 /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
477 *
478 * Because this instruction does not have a 16-bit floating-point type,
479 * the source data type must be Word (W). The destination type must be
480 * F (Float).
481 *
482 * To use W as the source data type, we must adjust horizontal strides,
483 * which is only possible in align1 mode. All my [chadv] attempts at
484 * emitting align1 instructions for unpackHalf2x16 failed to pass the
485 * Piglit tests, so I gave up.
486 *
487 * I've verified that, on gen7 hardware and the simulator, it is safe to
488 * emit f16to32 in align16 mode with UD as source data type.
489 */
490
491 dst_reg tmp_dst(this, glsl_type::uvec2_type);
492 src_reg tmp_src(tmp_dst);
493
494 tmp_dst.writemask = WRITEMASK_X;
495 emit(AND(tmp_dst, src0, src_reg(0xffffu)));
496
497 tmp_dst.writemask = WRITEMASK_Y;
498 emit(SHR(tmp_dst, src0, src_reg(16u)));
499
500 dst.writemask = WRITEMASK_XY;
501 emit(F16TO32(dst, tmp_src));
502 }
503
504 void
505 vec4_visitor::visit_instructions(const exec_list *list)
506 {
507 foreach_list(node, list) {
508 ir_instruction *ir = (ir_instruction *)node;
509
510 base_ir = ir;
511 ir->accept(this);
512 }
513 }
514
515
516 static int
517 type_size(const struct glsl_type *type)
518 {
519 unsigned int i;
520 int size;
521
522 switch (type->base_type) {
523 case GLSL_TYPE_UINT:
524 case GLSL_TYPE_INT:
525 case GLSL_TYPE_FLOAT:
526 case GLSL_TYPE_BOOL:
527 if (type->is_matrix()) {
528 return type->matrix_columns;
529 } else {
530 /* Regardless of size of vector, it gets a vec4. This is bad
531 * packing for things like floats, but otherwise arrays become a
532 * mess. Hopefully a later pass over the code can pack scalars
533 * down if appropriate.
534 */
535 return 1;
536 }
537 case GLSL_TYPE_ARRAY:
538 assert(type->length > 0);
539 return type_size(type->fields.array) * type->length;
540 case GLSL_TYPE_STRUCT:
541 size = 0;
542 for (i = 0; i < type->length; i++) {
543 size += type_size(type->fields.structure[i].type);
544 }
545 return size;
546 case GLSL_TYPE_SAMPLER:
547 /* Samplers take up one slot in UNIFORMS[], but they're baked in
548 * at link time.
549 */
550 return 1;
551 case GLSL_TYPE_VOID:
552 case GLSL_TYPE_ERROR:
553 case GLSL_TYPE_INTERFACE:
554 assert(0);
555 break;
556 }
557
558 return 0;
559 }
560
561 int
562 vec4_visitor::virtual_grf_alloc(int size)
563 {
564 if (virtual_grf_array_size <= virtual_grf_count) {
565 if (virtual_grf_array_size == 0)
566 virtual_grf_array_size = 16;
567 else
568 virtual_grf_array_size *= 2;
569 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
570 virtual_grf_array_size);
571 virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int,
572 virtual_grf_array_size);
573 }
574 virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count;
575 virtual_grf_reg_count += size;
576 virtual_grf_sizes[virtual_grf_count] = size;
577 return virtual_grf_count++;
578 }
579
580 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
581 {
582 init();
583
584 this->file = GRF;
585 this->reg = v->virtual_grf_alloc(type_size(type));
586
587 if (type->is_array() || type->is_record()) {
588 this->swizzle = BRW_SWIZZLE_NOOP;
589 } else {
590 this->swizzle = swizzle_for_size(type->vector_elements);
591 }
592
593 this->type = brw_type_for_base_type(type);
594 }
595
596 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
597 {
598 init();
599
600 this->file = GRF;
601 this->reg = v->virtual_grf_alloc(type_size(type));
602
603 if (type->is_array() || type->is_record()) {
604 this->writemask = WRITEMASK_XYZW;
605 } else {
606 this->writemask = (1 << type->vector_elements) - 1;
607 }
608
609 this->type = brw_type_for_base_type(type);
610 }
611
612 /* Our support for uniforms is piggy-backed on the struct
613 * gl_fragment_program, because that's where the values actually
614 * get stored, rather than in some global gl_shader_program uniform
615 * store.
616 */
617 void
618 vec4_visitor::setup_uniform_values(ir_variable *ir)
619 {
620 int namelen = strlen(ir->name);
621
622 /* The data for our (non-builtin) uniforms is stored in a series of
623 * gl_uniform_driver_storage structs for each subcomponent that
624 * glGetUniformLocation() could name. We know it's been set up in the same
625 * order we'd walk the type, so walk the list of storage and find anything
626 * with our name, or the prefix of a component that starts with our name.
627 */
628 for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) {
629 struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
630
631 if (strncmp(ir->name, storage->name, namelen) != 0 ||
632 (storage->name[namelen] != 0 &&
633 storage->name[namelen] != '.' &&
634 storage->name[namelen] != '[')) {
635 continue;
636 }
637
638 gl_constant_value *components = storage->storage;
639 unsigned vector_count = (MAX2(storage->array_elements, 1) *
640 storage->type->matrix_columns);
641
642 for (unsigned s = 0; s < vector_count; s++) {
643 uniform_vector_size[uniforms] = storage->type->vector_elements;
644
645 int i;
646 for (i = 0; i < uniform_vector_size[uniforms]; i++) {
647 prog_data->param[uniforms * 4 + i] = &components->f;
648 components++;
649 }
650 for (; i < 4; i++) {
651 static float zero = 0;
652 prog_data->param[uniforms * 4 + i] = &zero;
653 }
654
655 uniforms++;
656 }
657 }
658 }
659
660 void
661 vec4_visitor::setup_uniform_clipplane_values()
662 {
663 gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
664
665 for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
666 this->uniform_vector_size[this->uniforms] = 4;
667 this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
668 this->userplane[i].type = BRW_REGISTER_TYPE_F;
669 for (int j = 0; j < 4; ++j) {
670 prog_data->param[this->uniforms * 4 + j] = &clip_planes[i][j];
671 }
672 ++this->uniforms;
673 }
674 }
675
676 /* Our support for builtin uniforms is even scarier than non-builtin.
677 * It sits on top of the PROG_STATE_VAR parameters that are
678 * automatically updated from GL context state.
679 */
680 void
681 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
682 {
683 const ir_state_slot *const slots = ir->state_slots;
684 assert(ir->state_slots != NULL);
685
686 for (unsigned int i = 0; i < ir->num_state_slots; i++) {
687 /* This state reference has already been setup by ir_to_mesa,
688 * but we'll get the same index back here. We can reference
689 * ParameterValues directly, since unlike brw_fs.cpp, we never
690 * add new state references during compile.
691 */
692 int index = _mesa_add_state_reference(this->prog->Parameters,
693 (gl_state_index *)slots[i].tokens);
694 float *values = &this->prog->Parameters->ParameterValues[index][0].f;
695
696 this->uniform_vector_size[this->uniforms] = 0;
697 /* Add each of the unique swizzled channels of the element.
698 * This will end up matching the size of the glsl_type of this field.
699 */
700 int last_swiz = -1;
701 for (unsigned int j = 0; j < 4; j++) {
702 int swiz = GET_SWZ(slots[i].swizzle, j);
703 last_swiz = swiz;
704
705 prog_data->param[this->uniforms * 4 + j] = &values[swiz];
706 if (swiz <= last_swiz)
707 this->uniform_vector_size[this->uniforms]++;
708 }
709 this->uniforms++;
710 }
711 }
712
713 dst_reg *
714 vec4_visitor::variable_storage(ir_variable *var)
715 {
716 return (dst_reg *)hash_table_find(this->variable_ht, var);
717 }
718
719 void
720 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate)
721 {
722 ir_expression *expr = ir->as_expression();
723
724 *predicate = BRW_PREDICATE_NORMAL;
725
726 if (expr) {
727 src_reg op[2];
728 vec4_instruction *inst;
729
730 assert(expr->get_num_operands() <= 2);
731 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
732 expr->operands[i]->accept(this);
733 op[i] = this->result;
734
735 resolve_ud_negate(&op[i]);
736 }
737
738 switch (expr->operation) {
739 case ir_unop_logic_not:
740 inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
741 inst->conditional_mod = BRW_CONDITIONAL_Z;
742 break;
743
744 case ir_binop_logic_xor:
745 inst = emit(XOR(dst_null_d(), op[0], op[1]));
746 inst->conditional_mod = BRW_CONDITIONAL_NZ;
747 break;
748
749 case ir_binop_logic_or:
750 inst = emit(OR(dst_null_d(), op[0], op[1]));
751 inst->conditional_mod = BRW_CONDITIONAL_NZ;
752 break;
753
754 case ir_binop_logic_and:
755 inst = emit(AND(dst_null_d(), op[0], op[1]));
756 inst->conditional_mod = BRW_CONDITIONAL_NZ;
757 break;
758
759 case ir_unop_f2b:
760 if (brw->gen >= 6) {
761 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
762 } else {
763 inst = emit(MOV(dst_null_f(), op[0]));
764 inst->conditional_mod = BRW_CONDITIONAL_NZ;
765 }
766 break;
767
768 case ir_unop_i2b:
769 if (brw->gen >= 6) {
770 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
771 } else {
772 inst = emit(MOV(dst_null_d(), op[0]));
773 inst->conditional_mod = BRW_CONDITIONAL_NZ;
774 }
775 break;
776
777 case ir_binop_all_equal:
778 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
779 *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
780 break;
781
782 case ir_binop_any_nequal:
783 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
784 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
785 break;
786
787 case ir_unop_any:
788 inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
789 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
790 break;
791
792 case ir_binop_greater:
793 case ir_binop_gequal:
794 case ir_binop_less:
795 case ir_binop_lequal:
796 case ir_binop_equal:
797 case ir_binop_nequal:
798 emit(CMP(dst_null_d(), op[0], op[1],
799 brw_conditional_for_comparison(expr->operation)));
800 break;
801
802 default:
803 assert(!"not reached");
804 break;
805 }
806 return;
807 }
808
809 ir->accept(this);
810
811 resolve_ud_negate(&this->result);
812
813 if (brw->gen >= 6) {
814 vec4_instruction *inst = emit(AND(dst_null_d(),
815 this->result, src_reg(1)));
816 inst->conditional_mod = BRW_CONDITIONAL_NZ;
817 } else {
818 vec4_instruction *inst = emit(MOV(dst_null_d(), this->result));
819 inst->conditional_mod = BRW_CONDITIONAL_NZ;
820 }
821 }
822
823 /**
824 * Emit a gen6 IF statement with the comparison folded into the IF
825 * instruction.
826 */
827 void
828 vec4_visitor::emit_if_gen6(ir_if *ir)
829 {
830 ir_expression *expr = ir->condition->as_expression();
831
832 if (expr) {
833 src_reg op[2];
834 dst_reg temp;
835
836 assert(expr->get_num_operands() <= 2);
837 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
838 expr->operands[i]->accept(this);
839 op[i] = this->result;
840 }
841
842 switch (expr->operation) {
843 case ir_unop_logic_not:
844 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
845 return;
846
847 case ir_binop_logic_xor:
848 emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
849 return;
850
851 case ir_binop_logic_or:
852 temp = dst_reg(this, glsl_type::bool_type);
853 emit(OR(temp, op[0], op[1]));
854 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
855 return;
856
857 case ir_binop_logic_and:
858 temp = dst_reg(this, glsl_type::bool_type);
859 emit(AND(temp, op[0], op[1]));
860 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
861 return;
862
863 case ir_unop_f2b:
864 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
865 return;
866
867 case ir_unop_i2b:
868 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
869 return;
870
871 case ir_binop_greater:
872 case ir_binop_gequal:
873 case ir_binop_less:
874 case ir_binop_lequal:
875 case ir_binop_equal:
876 case ir_binop_nequal:
877 emit(IF(op[0], op[1],
878 brw_conditional_for_comparison(expr->operation)));
879 return;
880
881 case ir_binop_all_equal:
882 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
883 emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
884 return;
885
886 case ir_binop_any_nequal:
887 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
888 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
889 return;
890
891 case ir_unop_any:
892 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
893 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
894 return;
895
896 default:
897 assert(!"not reached");
898 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
899 return;
900 }
901 return;
902 }
903
904 ir->condition->accept(this);
905
906 emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
907 }
908
909 static dst_reg
910 with_writemask(dst_reg const & r, int mask)
911 {
912 dst_reg result = r;
913 result.writemask = mask;
914 return result;
915 }
916
917 void
918 vec4_vs_visitor::emit_prolog()
919 {
920 dst_reg sign_recovery_shift;
921 dst_reg normalize_factor;
922 dst_reg es3_normalize_factor;
923
924 for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
925 if (vs_prog_data->inputs_read & BITFIELD64_BIT(i)) {
926 uint8_t wa_flags = vs_compile->key.gl_attrib_wa_flags[i];
927 dst_reg reg(ATTR, i);
928 dst_reg reg_d = reg;
929 reg_d.type = BRW_REGISTER_TYPE_D;
930 dst_reg reg_ud = reg;
931 reg_ud.type = BRW_REGISTER_TYPE_UD;
932
933 /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED attributes
934 * come in as floating point conversions of the integer values.
935 */
936 if (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK) {
937 dst_reg dst = reg;
938 dst.type = brw_type_for_base_type(glsl_type::vec4_type);
939 dst.writemask = (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK)) - 1;
940 emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
941 }
942
943 /* Do sign recovery for 2101010 formats if required. */
944 if (wa_flags & BRW_ATTRIB_WA_SIGN) {
945 if (sign_recovery_shift.file == BAD_FILE) {
946 /* shift constant: <22,22,22,30> */
947 sign_recovery_shift = dst_reg(this, glsl_type::uvec4_type);
948 emit(MOV(with_writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u)));
949 emit(MOV(with_writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u)));
950 }
951
952 emit(SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift)));
953 emit(ASR(reg_d, src_reg(reg_d), src_reg(sign_recovery_shift)));
954 }
955
956 /* Apply BGRA swizzle if required. */
957 if (wa_flags & BRW_ATTRIB_WA_BGRA) {
958 src_reg temp = src_reg(reg);
959 temp.swizzle = BRW_SWIZZLE4(2,1,0,3);
960 emit(MOV(reg, temp));
961 }
962
963 if (wa_flags & BRW_ATTRIB_WA_NORMALIZE) {
964 /* ES 3.0 has different rules for converting signed normalized
965 * fixed-point numbers than desktop GL.
966 */
967 if (_mesa_is_gles3(ctx) && (wa_flags & BRW_ATTRIB_WA_SIGN)) {
968 /* According to equation 2.2 of the ES 3.0 specification,
969 * signed normalization conversion is done by:
970 *
971 * f = c / (2^(b-1)-1)
972 */
973 if (es3_normalize_factor.file == BAD_FILE) {
974 /* mul constant: 1 / (2^(b-1) - 1) */
975 es3_normalize_factor = dst_reg(this, glsl_type::vec4_type);
976 emit(MOV(with_writemask(es3_normalize_factor, WRITEMASK_XYZ),
977 src_reg(1.0f / ((1<<9) - 1))));
978 emit(MOV(with_writemask(es3_normalize_factor, WRITEMASK_W),
979 src_reg(1.0f / ((1<<1) - 1))));
980 }
981
982 dst_reg dst = reg;
983 dst.type = brw_type_for_base_type(glsl_type::vec4_type);
984 emit(MOV(dst, src_reg(reg_d)));
985 emit(MUL(dst, src_reg(dst), src_reg(es3_normalize_factor)));
986 emit_minmax(BRW_CONDITIONAL_G, dst, src_reg(dst), src_reg(-1.0f));
987 } else {
988 /* The following equations are from the OpenGL 3.2 specification:
989 *
990 * 2.1 unsigned normalization
991 * f = c/(2^n-1)
992 *
993 * 2.2 signed normalization
994 * f = (2c+1)/(2^n-1)
995 *
996 * Both of these share a common divisor, which is represented by
997 * "normalize_factor" in the code below.
998 */
999 if (normalize_factor.file == BAD_FILE) {
1000 /* 1 / (2^b - 1) for b=<10,10,10,2> */
1001 normalize_factor = dst_reg(this, glsl_type::vec4_type);
1002 emit(MOV(with_writemask(normalize_factor, WRITEMASK_XYZ),
1003 src_reg(1.0f / ((1<<10) - 1))));
1004 emit(MOV(with_writemask(normalize_factor, WRITEMASK_W),
1005 src_reg(1.0f / ((1<<2) - 1))));
1006 }
1007
1008 dst_reg dst = reg;
1009 dst.type = brw_type_for_base_type(glsl_type::vec4_type);
1010 emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));
1011
1012 /* For signed normalization, we want the numerator to be 2c+1. */
1013 if (wa_flags & BRW_ATTRIB_WA_SIGN) {
1014 emit(MUL(dst, src_reg(dst), src_reg(2.0f)));
1015 emit(ADD(dst, src_reg(dst), src_reg(1.0f)));
1016 }
1017
1018 emit(MUL(dst, src_reg(dst), src_reg(normalize_factor)));
1019 }
1020 }
1021
1022 if (wa_flags & BRW_ATTRIB_WA_SCALE) {
1023 dst_reg dst = reg;
1024 dst.type = brw_type_for_base_type(glsl_type::vec4_type);
1025 emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));
1026 }
1027 }
1028 }
1029 }
1030
1031
1032 dst_reg *
1033 vec4_vs_visitor::make_reg_for_system_value(ir_variable *ir)
1034 {
1035 /* VertexID is stored by the VF as the last vertex element, but
1036 * we don't represent it with a flag in inputs_read, so we call
1037 * it VERT_ATTRIB_MAX, which setup_attributes() picks up on.
1038 */
1039 dst_reg *reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX);
1040 vs_prog_data->uses_vertexid = true;
1041
1042 switch (ir->location) {
1043 case SYSTEM_VALUE_VERTEX_ID:
1044 reg->writemask = WRITEMASK_X;
1045 break;
1046 case SYSTEM_VALUE_INSTANCE_ID:
1047 reg->writemask = WRITEMASK_Y;
1048 break;
1049 default:
1050 assert(!"not reached");
1051 break;
1052 }
1053
1054 return reg;
1055 }
1056
1057
1058 void
1059 vec4_visitor::visit(ir_variable *ir)
1060 {
1061 dst_reg *reg = NULL;
1062
1063 if (variable_storage(ir))
1064 return;
1065
1066 switch (ir->mode) {
1067 case ir_var_shader_in:
1068 reg = new(mem_ctx) dst_reg(ATTR, ir->location);
1069 break;
1070
1071 case ir_var_shader_out:
1072 reg = new(mem_ctx) dst_reg(this, ir->type);
1073
1074 for (int i = 0; i < type_size(ir->type); i++) {
1075 output_reg[ir->location + i] = *reg;
1076 output_reg[ir->location + i].reg_offset = i;
1077 output_reg[ir->location + i].type =
1078 brw_type_for_base_type(ir->type->get_scalar_type());
1079 output_reg_annotation[ir->location + i] = ir->name;
1080 }
1081 break;
1082
1083 case ir_var_auto:
1084 case ir_var_temporary:
1085 reg = new(mem_ctx) dst_reg(this, ir->type);
1086 break;
1087
1088 case ir_var_uniform:
1089 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
1090
1091 /* Thanks to the lower_ubo_reference pass, we will see only
1092 * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
1093 * variables, so no need for them to be in variable_ht.
1094 */
1095 if (ir->is_in_uniform_block())
1096 return;
1097
1098 /* Track how big the whole uniform variable is, in case we need to put a
1099 * copy of its data into pull constants for array access.
1100 */
1101 this->uniform_size[this->uniforms] = type_size(ir->type);
1102
1103 if (!strncmp(ir->name, "gl_", 3)) {
1104 setup_builtin_uniform_values(ir);
1105 } else {
1106 setup_uniform_values(ir);
1107 }
1108 break;
1109
1110 case ir_var_system_value:
1111 reg = make_reg_for_system_value(ir);
1112 break;
1113
1114 default:
1115 assert(!"not reached");
1116 }
1117
1118 reg->type = brw_type_for_base_type(ir->type);
1119 hash_table_insert(this->variable_ht, reg, ir);
1120 }
1121
1122 void
1123 vec4_visitor::visit(ir_loop *ir)
1124 {
1125 dst_reg counter;
1126
1127 /* We don't want debugging output to print the whole body of the
1128 * loop as the annotation.
1129 */
1130 this->base_ir = NULL;
1131
1132 if (ir->counter != NULL) {
1133 this->base_ir = ir->counter;
1134 ir->counter->accept(this);
1135 counter = *(variable_storage(ir->counter));
1136
1137 if (ir->from != NULL) {
1138 this->base_ir = ir->from;
1139 ir->from->accept(this);
1140
1141 emit(MOV(counter, this->result));
1142 }
1143 }
1144
1145 emit(BRW_OPCODE_DO);
1146
1147 if (ir->to) {
1148 this->base_ir = ir->to;
1149 ir->to->accept(this);
1150
1151 emit(CMP(dst_null_d(), src_reg(counter), this->result,
1152 brw_conditional_for_comparison(ir->cmp)));
1153
1154 vec4_instruction *inst = emit(BRW_OPCODE_BREAK);
1155 inst->predicate = BRW_PREDICATE_NORMAL;
1156 }
1157
1158 visit_instructions(&ir->body_instructions);
1159
1160
1161 if (ir->increment) {
1162 this->base_ir = ir->increment;
1163 ir->increment->accept(this);
1164 emit(ADD(counter, src_reg(counter), this->result));
1165 }
1166
1167 emit(BRW_OPCODE_WHILE);
1168 }
1169
1170 void
1171 vec4_visitor::visit(ir_loop_jump *ir)
1172 {
1173 switch (ir->mode) {
1174 case ir_loop_jump::jump_break:
1175 emit(BRW_OPCODE_BREAK);
1176 break;
1177 case ir_loop_jump::jump_continue:
1178 emit(BRW_OPCODE_CONTINUE);
1179 break;
1180 }
1181 }
1182
1183
1184 void
1185 vec4_visitor::visit(ir_function_signature *ir)
1186 {
1187 assert(0);
1188 (void)ir;
1189 }
1190
1191 void
1192 vec4_visitor::visit(ir_function *ir)
1193 {
1194 /* Ignore function bodies other than main() -- we shouldn't see calls to
1195 * them since they should all be inlined.
1196 */
1197 if (strcmp(ir->name, "main") == 0) {
1198 const ir_function_signature *sig;
1199 exec_list empty;
1200
1201 sig = ir->matching_signature(&empty);
1202
1203 assert(sig);
1204
1205 visit_instructions(&sig->body);
1206 }
1207 }
1208
1209 bool
1210 vec4_visitor::try_emit_sat(ir_expression *ir)
1211 {
1212 ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
1213 if (!sat_src)
1214 return false;
1215
1216 sat_src->accept(this);
1217 src_reg src = this->result;
1218
1219 this->result = src_reg(this, ir->type);
1220 vec4_instruction *inst;
1221 inst = emit(MOV(dst_reg(this->result), src));
1222 inst->saturate = true;
1223
1224 return true;
1225 }
1226
1227 bool
1228 vec4_visitor::try_emit_mad(ir_expression *ir, int mul_arg)
1229 {
1230 /* 3-src instructions were introduced in gen6. */
1231 if (brw->gen < 6)
1232 return false;
1233
1234 /* MAD can only handle floating-point data. */
1235 if (ir->type->base_type != GLSL_TYPE_FLOAT)
1236 return false;
1237
1238 ir_rvalue *nonmul = ir->operands[1 - mul_arg];
1239 ir_expression *mul = ir->operands[mul_arg]->as_expression();
1240
1241 if (!mul || mul->operation != ir_binop_mul)
1242 return false;
1243
1244 nonmul->accept(this);
1245 src_reg src0 = fix_3src_operand(this->result);
1246
1247 mul->operands[0]->accept(this);
1248 src_reg src1 = fix_3src_operand(this->result);
1249
1250 mul->operands[1]->accept(this);
1251 src_reg src2 = fix_3src_operand(this->result);
1252
1253 this->result = src_reg(this, ir->type);
1254 emit(BRW_OPCODE_MAD, dst_reg(this->result), src0, src1, src2);
1255
1256 return true;
1257 }
1258
1259 void
1260 vec4_visitor::emit_bool_comparison(unsigned int op,
1261 dst_reg dst, src_reg src0, src_reg src1)
1262 {
1263 /* original gen4 does destination conversion before comparison. */
1264 if (brw->gen < 5)
1265 dst.type = src0.type;
1266
1267 emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op)));
1268
1269 dst.type = BRW_REGISTER_TYPE_D;
1270 emit(AND(dst, src_reg(dst), src_reg(0x1)));
1271 }
1272
1273 void
1274 vec4_visitor::emit_minmax(uint32_t conditionalmod, dst_reg dst,
1275 src_reg src0, src_reg src1)
1276 {
1277 vec4_instruction *inst;
1278
1279 if (brw->gen >= 6) {
1280 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
1281 inst->conditional_mod = conditionalmod;
1282 } else {
1283 emit(CMP(dst, src0, src1, conditionalmod));
1284
1285 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
1286 inst->predicate = BRW_PREDICATE_NORMAL;
1287 }
1288 }
1289
1290 static bool
1291 is_16bit_constant(ir_rvalue *rvalue)
1292 {
1293 ir_constant *constant = rvalue->as_constant();
1294 if (!constant)
1295 return false;
1296
1297 if (constant->type != glsl_type::int_type &&
1298 constant->type != glsl_type::uint_type)
1299 return false;
1300
1301 return constant->value.u[0] < (1 << 16);
1302 }
1303
1304 void
1305 vec4_visitor::visit(ir_expression *ir)
1306 {
1307 unsigned int operand;
1308 src_reg op[Elements(ir->operands)];
1309 src_reg result_src;
1310 dst_reg result_dst;
1311 vec4_instruction *inst;
1312
1313 if (try_emit_sat(ir))
1314 return;
1315
1316 if (ir->operation == ir_binop_add) {
1317 if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1))
1318 return;
1319 }
1320
1321 for (operand = 0; operand < ir->get_num_operands(); operand++) {
1322 this->result.file = BAD_FILE;
1323 ir->operands[operand]->accept(this);
1324 if (this->result.file == BAD_FILE) {
1325 printf("Failed to get tree for expression operand:\n");
1326 ir->operands[operand]->print();
1327 exit(1);
1328 }
1329 op[operand] = this->result;
1330
1331 /* Matrix expression operands should have been broken down to vector
1332 * operations already.
1333 */
1334 assert(!ir->operands[operand]->type->is_matrix());
1335 }
1336
1337 int vector_elements = ir->operands[0]->type->vector_elements;
1338 if (ir->operands[1]) {
1339 vector_elements = MAX2(vector_elements,
1340 ir->operands[1]->type->vector_elements);
1341 }
1342
1343 this->result.file = BAD_FILE;
1344
1345 /* Storage for our result. Ideally for an assignment we'd be using
1346 * the actual storage for the result here, instead.
1347 */
1348 result_src = src_reg(this, ir->type);
1349 /* convenience for the emit functions below. */
1350 result_dst = dst_reg(result_src);
1351 /* If nothing special happens, this is the result. */
1352 this->result = result_src;
1353 /* Limit writes to the channels that will be used by result_src later.
1354 * This does limit this temp's use as a temporary for multi-instruction
1355 * sequences.
1356 */
1357 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1358
1359 switch (ir->operation) {
1360 case ir_unop_logic_not:
1361 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
1362 * ones complement of the whole register, not just bit 0.
1363 */
1364 emit(XOR(result_dst, op[0], src_reg(1)));
1365 break;
1366 case ir_unop_neg:
1367 op[0].negate = !op[0].negate;
1368 emit(MOV(result_dst, op[0]));
1369 break;
1370 case ir_unop_abs:
1371 op[0].abs = true;
1372 op[0].negate = false;
1373 emit(MOV(result_dst, op[0]));
1374 break;
1375
1376 case ir_unop_sign:
1377 emit(MOV(result_dst, src_reg(0.0f)));
1378
1379 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G));
1380 inst = emit(MOV(result_dst, src_reg(1.0f)));
1381 inst->predicate = BRW_PREDICATE_NORMAL;
1382
1383 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L));
1384 inst = emit(MOV(result_dst, src_reg(-1.0f)));
1385 inst->predicate = BRW_PREDICATE_NORMAL;
1386
1387 break;
1388
1389 case ir_unop_rcp:
1390 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
1391 break;
1392
1393 case ir_unop_exp2:
1394 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
1395 break;
1396 case ir_unop_log2:
1397 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
1398 break;
1399 case ir_unop_exp:
1400 case ir_unop_log:
1401 assert(!"not reached: should be handled by ir_explog_to_explog2");
1402 break;
1403 case ir_unop_sin:
1404 case ir_unop_sin_reduced:
1405 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
1406 break;
1407 case ir_unop_cos:
1408 case ir_unop_cos_reduced:
1409 emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
1410 break;
1411
1412 case ir_unop_dFdx:
1413 case ir_unop_dFdy:
1414 assert(!"derivatives not valid in vertex shader");
1415 break;
1416
1417 case ir_unop_bitfield_reverse:
1418 emit(BFREV(result_dst, op[0]));
1419 break;
1420 case ir_unop_bit_count:
1421 emit(CBIT(result_dst, op[0]));
1422 break;
1423 case ir_unop_find_msb: {
1424 src_reg temp = src_reg(this, glsl_type::uint_type);
1425
1426 inst = emit(FBH(dst_reg(temp), op[0]));
1427 inst->dst.writemask = WRITEMASK_XYZW;
1428
1429 /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
1430 * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
1431 * subtract the result from 31 to convert the MSB count into an LSB count.
1432 */
1433
1434 /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
1435 temp.swizzle = BRW_SWIZZLE_NOOP;
1436 emit(MOV(result_dst, temp));
1437
1438 src_reg src_tmp = src_reg(result_dst);
1439 emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ));
1440
1441 src_tmp.negate = true;
1442 inst = emit(ADD(result_dst, src_tmp, src_reg(31)));
1443 inst->predicate = BRW_PREDICATE_NORMAL;
1444 break;
1445 }
1446 case ir_unop_find_lsb:
1447 emit(FBL(result_dst, op[0]));
1448 break;
1449
1450 case ir_unop_noise:
1451 assert(!"not reached: should be handled by lower_noise");
1452 break;
1453
1454 case ir_binop_add:
1455 emit(ADD(result_dst, op[0], op[1]));
1456 break;
1457 case ir_binop_sub:
1458 assert(!"not reached: should be handled by ir_sub_to_add_neg");
1459 break;
1460
1461 case ir_binop_mul:
1462 if (ir->type->is_integer()) {
1463 /* For integer multiplication, the MUL uses the low 16 bits of one of
1464 * the operands (src0 through SNB, src1 on IVB and later). The MACH
1465 * accumulates in the contribution of the upper 16 bits of that
1466 * operand. If we can determine that one of the args is in the low
1467 * 16 bits, though, we can just emit a single MUL.
1468 */
1469 if (is_16bit_constant(ir->operands[0])) {
1470 if (brw->gen < 7)
1471 emit(MUL(result_dst, op[0], op[1]));
1472 else
1473 emit(MUL(result_dst, op[1], op[0]));
1474 } else if (is_16bit_constant(ir->operands[1])) {
1475 if (brw->gen < 7)
1476 emit(MUL(result_dst, op[1], op[0]));
1477 else
1478 emit(MUL(result_dst, op[0], op[1]));
1479 } else {
1480 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
1481
1482 emit(MUL(acc, op[0], op[1]));
1483 emit(MACH(dst_null_d(), op[0], op[1]));
1484 emit(MOV(result_dst, src_reg(acc)));
1485 }
1486 } else {
1487 emit(MUL(result_dst, op[0], op[1]));
1488 }
1489 break;
1490 case ir_binop_div:
1491 /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
1492 assert(ir->type->is_integer());
1493 emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
1494 break;
1495 case ir_binop_mod:
1496 /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
1497 assert(ir->type->is_integer());
1498 emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
1499 break;
1500
1501 case ir_binop_less:
1502 case ir_binop_greater:
1503 case ir_binop_lequal:
1504 case ir_binop_gequal:
1505 case ir_binop_equal:
1506 case ir_binop_nequal: {
1507 emit(CMP(result_dst, op[0], op[1],
1508 brw_conditional_for_comparison(ir->operation)));
1509 emit(AND(result_dst, result_src, src_reg(0x1)));
1510 break;
1511 }
1512
1513 case ir_binop_all_equal:
1514 /* "==" operator producing a scalar boolean. */
1515 if (ir->operands[0]->type->is_vector() ||
1516 ir->operands[1]->type->is_vector()) {
1517 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
1518 emit(MOV(result_dst, src_reg(0)));
1519 inst = emit(MOV(result_dst, src_reg(1)));
1520 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
1521 } else {
1522 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
1523 emit(AND(result_dst, result_src, src_reg(0x1)));
1524 }
1525 break;
1526 case ir_binop_any_nequal:
1527 /* "!=" operator producing a scalar boolean. */
1528 if (ir->operands[0]->type->is_vector() ||
1529 ir->operands[1]->type->is_vector()) {
1530 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
1531
1532 emit(MOV(result_dst, src_reg(0)));
1533 inst = emit(MOV(result_dst, src_reg(1)));
1534 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1535 } else {
1536 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
1537 emit(AND(result_dst, result_src, src_reg(0x1)));
1538 }
1539 break;
1540
1541 case ir_unop_any:
1542 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1543 emit(MOV(result_dst, src_reg(0)));
1544
1545 inst = emit(MOV(result_dst, src_reg(1)));
1546 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1547 break;
1548
1549 case ir_binop_logic_xor:
1550 emit(XOR(result_dst, op[0], op[1]));
1551 break;
1552
1553 case ir_binop_logic_or:
1554 emit(OR(result_dst, op[0], op[1]));
1555 break;
1556
1557 case ir_binop_logic_and:
1558 emit(AND(result_dst, op[0], op[1]));
1559 break;
1560
1561 case ir_binop_dot:
1562 assert(ir->operands[0]->type->is_vector());
1563 assert(ir->operands[0]->type == ir->operands[1]->type);
1564 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1565 break;
1566
1567 case ir_unop_sqrt:
1568 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1569 break;
1570 case ir_unop_rsq:
1571 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1572 break;
1573
1574 case ir_unop_bitcast_i2f:
1575 case ir_unop_bitcast_u2f:
1576 this->result = op[0];
1577 this->result.type = BRW_REGISTER_TYPE_F;
1578 break;
1579
1580 case ir_unop_bitcast_f2i:
1581 this->result = op[0];
1582 this->result.type = BRW_REGISTER_TYPE_D;
1583 break;
1584
1585 case ir_unop_bitcast_f2u:
1586 this->result = op[0];
1587 this->result.type = BRW_REGISTER_TYPE_UD;
1588 break;
1589
1590 case ir_unop_i2f:
1591 case ir_unop_i2u:
1592 case ir_unop_u2i:
1593 case ir_unop_u2f:
1594 case ir_unop_b2f:
1595 case ir_unop_b2i:
1596 case ir_unop_f2i:
1597 case ir_unop_f2u:
1598 emit(MOV(result_dst, op[0]));
1599 break;
1600 case ir_unop_f2b:
1601 case ir_unop_i2b: {
1602 emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1603 emit(AND(result_dst, result_src, src_reg(1)));
1604 break;
1605 }
1606
1607 case ir_unop_trunc:
1608 emit(RNDZ(result_dst, op[0]));
1609 break;
1610 case ir_unop_ceil:
1611 op[0].negate = !op[0].negate;
1612 inst = emit(RNDD(result_dst, op[0]));
1613 this->result.negate = true;
1614 break;
1615 case ir_unop_floor:
1616 inst = emit(RNDD(result_dst, op[0]));
1617 break;
1618 case ir_unop_fract:
1619 inst = emit(FRC(result_dst, op[0]));
1620 break;
1621 case ir_unop_round_even:
1622 emit(RNDE(result_dst, op[0]));
1623 break;
1624
1625 case ir_binop_min:
1626 emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]);
1627 break;
1628 case ir_binop_max:
1629 emit_minmax(BRW_CONDITIONAL_G, result_dst, op[0], op[1]);
1630 break;
1631
1632 case ir_binop_pow:
1633 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1634 break;
1635
1636 case ir_unop_bit_not:
1637 inst = emit(NOT(result_dst, op[0]));
1638 break;
1639 case ir_binop_bit_and:
1640 inst = emit(AND(result_dst, op[0], op[1]));
1641 break;
1642 case ir_binop_bit_xor:
1643 inst = emit(XOR(result_dst, op[0], op[1]));
1644 break;
1645 case ir_binop_bit_or:
1646 inst = emit(OR(result_dst, op[0], op[1]));
1647 break;
1648
1649 case ir_binop_lshift:
1650 inst = emit(SHL(result_dst, op[0], op[1]));
1651 break;
1652
1653 case ir_binop_rshift:
1654 if (ir->type->base_type == GLSL_TYPE_INT)
1655 inst = emit(ASR(result_dst, op[0], op[1]));
1656 else
1657 inst = emit(SHR(result_dst, op[0], op[1]));
1658 break;
1659
1660 case ir_binop_bfm:
1661 emit(BFI1(result_dst, op[0], op[1]));
1662 break;
1663
1664 case ir_binop_ubo_load: {
1665 ir_constant *uniform_block = ir->operands[0]->as_constant();
1666 ir_constant *const_offset_ir = ir->operands[1]->as_constant();
1667 unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
1668 src_reg offset = op[1];
1669
1670 /* Now, load the vector from that offset. */
1671 assert(ir->type->is_vector() || ir->type->is_scalar());
1672
1673 src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
1674 packed_consts.type = result.type;
1675 src_reg surf_index =
1676 src_reg(SURF_INDEX_VS_UBO(uniform_block->value.u[0]));
1677 if (const_offset_ir) {
1678 offset = src_reg(const_offset / 16);
1679 } else {
1680 emit(SHR(dst_reg(offset), offset, src_reg(4)));
1681 }
1682
1683 vec4_instruction *pull =
1684 emit(new(mem_ctx) vec4_instruction(this,
1685 VS_OPCODE_PULL_CONSTANT_LOAD,
1686 dst_reg(packed_consts),
1687 surf_index,
1688 offset));
1689 pull->base_mrf = 14;
1690 pull->mlen = 1;
1691
1692 packed_consts.swizzle = swizzle_for_size(ir->type->vector_elements);
1693 packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4,
1694 const_offset % 16 / 4,
1695 const_offset % 16 / 4,
1696 const_offset % 16 / 4);
1697
1698 /* UBO bools are any nonzero int. We store bools as either 0 or 1. */
1699 if (ir->type->base_type == GLSL_TYPE_BOOL) {
1700 emit(CMP(result_dst, packed_consts, src_reg(0u),
1701 BRW_CONDITIONAL_NZ));
1702 emit(AND(result_dst, result, src_reg(0x1)));
1703 } else {
1704 emit(MOV(result_dst, packed_consts));
1705 }
1706 break;
1707 }
1708
1709 case ir_binop_vector_extract:
1710 assert(!"should have been lowered by vec_index_to_cond_assign");
1711 break;
1712
1713 case ir_triop_lrp:
1714 op[0] = fix_3src_operand(op[0]);
1715 op[1] = fix_3src_operand(op[1]);
1716 op[2] = fix_3src_operand(op[2]);
1717 /* Note that the instruction's argument order is reversed from GLSL
1718 * and the IR.
1719 */
1720 emit(LRP(result_dst, op[2], op[1], op[0]));
1721 break;
1722
1723 case ir_triop_bfi:
1724 op[0] = fix_3src_operand(op[0]);
1725 op[1] = fix_3src_operand(op[1]);
1726 op[2] = fix_3src_operand(op[2]);
1727 emit(BFI2(result_dst, op[0], op[1], op[2]));
1728 break;
1729
1730 case ir_triop_bitfield_extract:
1731 op[0] = fix_3src_operand(op[0]);
1732 op[1] = fix_3src_operand(op[1]);
1733 op[2] = fix_3src_operand(op[2]);
1734 /* Note that the instruction's argument order is reversed from GLSL
1735 * and the IR.
1736 */
1737 emit(BFE(result_dst, op[2], op[1], op[0]));
1738 break;
1739
1740 case ir_triop_vector_insert:
1741 assert(!"should have been lowered by lower_vector_insert");
1742 break;
1743
1744 case ir_quadop_bitfield_insert:
1745 assert(!"not reached: should be handled by "
1746 "bitfield_insert_to_bfm_bfi\n");
1747 break;
1748
1749 case ir_quadop_vector:
1750 assert(!"not reached: should be handled by lower_quadop_vector");
1751 break;
1752
1753 case ir_unop_pack_half_2x16:
1754 emit_pack_half_2x16(result_dst, op[0]);
1755 break;
1756 case ir_unop_unpack_half_2x16:
1757 emit_unpack_half_2x16(result_dst, op[0]);
1758 break;
1759 case ir_unop_pack_snorm_2x16:
1760 case ir_unop_pack_snorm_4x8:
1761 case ir_unop_pack_unorm_2x16:
1762 case ir_unop_pack_unorm_4x8:
1763 case ir_unop_unpack_snorm_2x16:
1764 case ir_unop_unpack_snorm_4x8:
1765 case ir_unop_unpack_unorm_2x16:
1766 case ir_unop_unpack_unorm_4x8:
1767 assert(!"not reached: should be handled by lower_packing_builtins");
1768 break;
1769 case ir_unop_unpack_half_2x16_split_x:
1770 case ir_unop_unpack_half_2x16_split_y:
1771 case ir_binop_pack_half_2x16_split:
1772 assert(!"not reached: should not occur in vertex shader");
1773 break;
1774 }
1775 }
1776
1777
1778 void
1779 vec4_visitor::visit(ir_swizzle *ir)
1780 {
1781 src_reg src;
1782 int i = 0;
1783 int swizzle[4];
1784
1785 /* Note that this is only swizzles in expressions, not those on the left
1786 * hand side of an assignment, which do write masking. See ir_assignment
1787 * for that.
1788 */
1789
1790 ir->val->accept(this);
1791 src = this->result;
1792 assert(src.file != BAD_FILE);
1793
1794 for (i = 0; i < ir->type->vector_elements; i++) {
1795 switch (i) {
1796 case 0:
1797 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1798 break;
1799 case 1:
1800 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1801 break;
1802 case 2:
1803 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1804 break;
1805 case 3:
1806 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1807 break;
1808 }
1809 }
1810 for (; i < 4; i++) {
1811 /* Replicate the last channel out. */
1812 swizzle[i] = swizzle[ir->type->vector_elements - 1];
1813 }
1814
1815 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1816
1817 this->result = src;
1818 }
1819
1820 void
1821 vec4_visitor::visit(ir_dereference_variable *ir)
1822 {
1823 const struct glsl_type *type = ir->type;
1824 dst_reg *reg = variable_storage(ir->var);
1825
1826 if (!reg) {
1827 fail("Failed to find variable storage for %s\n", ir->var->name);
1828 this->result = src_reg(brw_null_reg());
1829 return;
1830 }
1831
1832 this->result = src_reg(*reg);
1833
1834 /* System values get their swizzle from the dst_reg writemask */
1835 if (ir->var->mode == ir_var_system_value)
1836 return;
1837
1838 if (type->is_scalar() || type->is_vector() || type->is_matrix())
1839 this->result.swizzle = swizzle_for_size(type->vector_elements);
1840 }
1841
1842
1843 int
1844 vec4_visitor::compute_array_stride(ir_dereference_array *ir)
1845 {
1846 /* Under normal circumstances array elements are stored consecutively, so
1847 * the stride is equal to the size of the array element.
1848 */
1849 return type_size(ir->type);
1850 }
1851
1852
1853 void
1854 vec4_visitor::visit(ir_dereference_array *ir)
1855 {
1856 ir_constant *constant_index;
1857 src_reg src;
1858 int array_stride = compute_array_stride(ir);
1859
1860 constant_index = ir->array_index->constant_expression_value();
1861
1862 ir->array->accept(this);
1863 src = this->result;
1864
1865 if (constant_index) {
1866 src.reg_offset += constant_index->value.i[0] * array_stride;
1867 } else {
1868 /* Variable index array dereference. It eats the "vec4" of the
1869 * base of the array and an index that offsets the Mesa register
1870 * index.
1871 */
1872 ir->array_index->accept(this);
1873
1874 src_reg index_reg;
1875
1876 if (array_stride == 1) {
1877 index_reg = this->result;
1878 } else {
1879 index_reg = src_reg(this, glsl_type::int_type);
1880
1881 emit(MUL(dst_reg(index_reg), this->result, src_reg(array_stride)));
1882 }
1883
1884 if (src.reladdr) {
1885 src_reg temp = src_reg(this, glsl_type::int_type);
1886
1887 emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
1888
1889 index_reg = temp;
1890 }
1891
1892 src.reladdr = ralloc(mem_ctx, src_reg);
1893 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1894 }
1895
1896 /* If the type is smaller than a vec4, replicate the last channel out. */
1897 if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1898 src.swizzle = swizzle_for_size(ir->type->vector_elements);
1899 else
1900 src.swizzle = BRW_SWIZZLE_NOOP;
1901 src.type = brw_type_for_base_type(ir->type);
1902
1903 this->result = src;
1904 }
1905
1906 void
1907 vec4_visitor::visit(ir_dereference_record *ir)
1908 {
1909 unsigned int i;
1910 const glsl_type *struct_type = ir->record->type;
1911 int offset = 0;
1912
1913 ir->record->accept(this);
1914
1915 for (i = 0; i < struct_type->length; i++) {
1916 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1917 break;
1918 offset += type_size(struct_type->fields.structure[i].type);
1919 }
1920
1921 /* If the type is smaller than a vec4, replicate the last channel out. */
1922 if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1923 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1924 else
1925 this->result.swizzle = BRW_SWIZZLE_NOOP;
1926 this->result.type = brw_type_for_base_type(ir->type);
1927
1928 this->result.reg_offset += offset;
1929 }
1930
1931 /**
1932 * We want to be careful in assignment setup to hit the actual storage
1933 * instead of potentially using a temporary like we might with the
1934 * ir_dereference handler.
1935 */
1936 static dst_reg
1937 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1938 {
1939 /* The LHS must be a dereference. If the LHS is a variable indexed array
1940 * access of a vector, it must be separated into a series conditional moves
1941 * before reaching this point (see ir_vec_index_to_cond_assign).
1942 */
1943 assert(ir->as_dereference());
1944 ir_dereference_array *deref_array = ir->as_dereference_array();
1945 if (deref_array) {
1946 assert(!deref_array->array->type->is_vector());
1947 }
1948
1949 /* Use the rvalue deref handler for the most part. We'll ignore
1950 * swizzles in it and write swizzles using writemask, though.
1951 */
1952 ir->accept(v);
1953 return dst_reg(v->result);
1954 }
1955
1956 void
1957 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1958 const struct glsl_type *type, uint32_t predicate)
1959 {
1960 if (type->base_type == GLSL_TYPE_STRUCT) {
1961 for (unsigned int i = 0; i < type->length; i++) {
1962 emit_block_move(dst, src, type->fields.structure[i].type, predicate);
1963 }
1964 return;
1965 }
1966
1967 if (type->is_array()) {
1968 for (unsigned int i = 0; i < type->length; i++) {
1969 emit_block_move(dst, src, type->fields.array, predicate);
1970 }
1971 return;
1972 }
1973
1974 if (type->is_matrix()) {
1975 const struct glsl_type *vec_type;
1976
1977 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1978 type->vector_elements, 1);
1979
1980 for (int i = 0; i < type->matrix_columns; i++) {
1981 emit_block_move(dst, src, vec_type, predicate);
1982 }
1983 return;
1984 }
1985
1986 assert(type->is_scalar() || type->is_vector());
1987
1988 dst->type = brw_type_for_base_type(type);
1989 src->type = dst->type;
1990
1991 dst->writemask = (1 << type->vector_elements) - 1;
1992
1993 src->swizzle = swizzle_for_size(type->vector_elements);
1994
1995 vec4_instruction *inst = emit(MOV(*dst, *src));
1996 inst->predicate = predicate;
1997
1998 dst->reg_offset++;
1999 src->reg_offset++;
2000 }
2001
2002
2003 /* If the RHS processing resulted in an instruction generating a
2004 * temporary value, and it would be easy to rewrite the instruction to
2005 * generate its result right into the LHS instead, do so. This ends
2006 * up reliably removing instructions where it can be tricky to do so
2007 * later without real UD chain information.
2008 */
2009 bool
2010 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
2011 dst_reg dst,
2012 src_reg src,
2013 vec4_instruction *pre_rhs_inst,
2014 vec4_instruction *last_rhs_inst)
2015 {
2016 /* This could be supported, but it would take more smarts. */
2017 if (ir->condition)
2018 return false;
2019
2020 if (pre_rhs_inst == last_rhs_inst)
2021 return false; /* No instructions generated to work with. */
2022
2023 /* Make sure the last instruction generated our source reg. */
2024 if (src.file != GRF ||
2025 src.file != last_rhs_inst->dst.file ||
2026 src.reg != last_rhs_inst->dst.reg ||
2027 src.reg_offset != last_rhs_inst->dst.reg_offset ||
2028 src.reladdr ||
2029 src.abs ||
2030 src.negate ||
2031 last_rhs_inst->predicate != BRW_PREDICATE_NONE)
2032 return false;
2033
2034 /* Check that that last instruction fully initialized the channels
2035 * we want to use, in the order we want to use them. We could
2036 * potentially reswizzle the operands of many instructions so that
2037 * we could handle out of order channels, but don't yet.
2038 */
2039
2040 for (unsigned i = 0; i < 4; i++) {
2041 if (dst.writemask & (1 << i)) {
2042 if (!(last_rhs_inst->dst.writemask & (1 << i)))
2043 return false;
2044
2045 if (BRW_GET_SWZ(src.swizzle, i) != i)
2046 return false;
2047 }
2048 }
2049
2050 /* Success! Rewrite the instruction. */
2051 last_rhs_inst->dst.file = dst.file;
2052 last_rhs_inst->dst.reg = dst.reg;
2053 last_rhs_inst->dst.reg_offset = dst.reg_offset;
2054 last_rhs_inst->dst.reladdr = dst.reladdr;
2055 last_rhs_inst->dst.writemask &= dst.writemask;
2056
2057 return true;
2058 }
2059
2060 void
2061 vec4_visitor::visit(ir_assignment *ir)
2062 {
2063 dst_reg dst = get_assignment_lhs(ir->lhs, this);
2064 uint32_t predicate = BRW_PREDICATE_NONE;
2065
2066 if (!ir->lhs->type->is_scalar() &&
2067 !ir->lhs->type->is_vector()) {
2068 ir->rhs->accept(this);
2069 src_reg src = this->result;
2070
2071 if (ir->condition) {
2072 emit_bool_to_cond_code(ir->condition, &predicate);
2073 }
2074
2075 /* emit_block_move doesn't account for swizzles in the source register.
2076 * This should be ok, since the source register is a structure or an
2077 * array, and those can't be swizzled. But double-check to be sure.
2078 */
2079 assert(src.swizzle ==
2080 (ir->rhs->type->is_matrix()
2081 ? swizzle_for_size(ir->rhs->type->vector_elements)
2082 : BRW_SWIZZLE_NOOP));
2083
2084 emit_block_move(&dst, &src, ir->rhs->type, predicate);
2085 return;
2086 }
2087
2088 /* Now we're down to just a scalar/vector with writemasks. */
2089 int i;
2090
2091 vec4_instruction *pre_rhs_inst, *last_rhs_inst;
2092 pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
2093
2094 ir->rhs->accept(this);
2095
2096 last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
2097
2098 src_reg src = this->result;
2099
2100 int swizzles[4];
2101 int first_enabled_chan = 0;
2102 int src_chan = 0;
2103
2104 assert(ir->lhs->type->is_vector() ||
2105 ir->lhs->type->is_scalar());
2106 dst.writemask = ir->write_mask;
2107
2108 for (int i = 0; i < 4; i++) {
2109 if (dst.writemask & (1 << i)) {
2110 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
2111 break;
2112 }
2113 }
2114
2115 /* Swizzle a small RHS vector into the channels being written.
2116 *
2117 * glsl ir treats write_mask as dictating how many channels are
2118 * present on the RHS while in our instructions we need to make
2119 * those channels appear in the slots of the vec4 they're written to.
2120 */
2121 for (int i = 0; i < 4; i++) {
2122 if (dst.writemask & (1 << i))
2123 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
2124 else
2125 swizzles[i] = first_enabled_chan;
2126 }
2127 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
2128 swizzles[2], swizzles[3]);
2129
2130 if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
2131 return;
2132 }
2133
2134 if (ir->condition) {
2135 emit_bool_to_cond_code(ir->condition, &predicate);
2136 }
2137
2138 for (i = 0; i < type_size(ir->lhs->type); i++) {
2139 vec4_instruction *inst = emit(MOV(dst, src));
2140 inst->predicate = predicate;
2141
2142 dst.reg_offset++;
2143 src.reg_offset++;
2144 }
2145 }
2146
2147 void
2148 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
2149 {
2150 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
2151 foreach_list(node, &ir->components) {
2152 ir_constant *field_value = (ir_constant *)node;
2153
2154 emit_constant_values(dst, field_value);
2155 }
2156 return;
2157 }
2158
2159 if (ir->type->is_array()) {
2160 for (unsigned int i = 0; i < ir->type->length; i++) {
2161 emit_constant_values(dst, ir->array_elements[i]);
2162 }
2163 return;
2164 }
2165
2166 if (ir->type->is_matrix()) {
2167 for (int i = 0; i < ir->type->matrix_columns; i++) {
2168 float *vec = &ir->value.f[i * ir->type->vector_elements];
2169
2170 for (int j = 0; j < ir->type->vector_elements; j++) {
2171 dst->writemask = 1 << j;
2172 dst->type = BRW_REGISTER_TYPE_F;
2173
2174 emit(MOV(*dst, src_reg(vec[j])));
2175 }
2176 dst->reg_offset++;
2177 }
2178 return;
2179 }
2180
2181 int remaining_writemask = (1 << ir->type->vector_elements) - 1;
2182
2183 for (int i = 0; i < ir->type->vector_elements; i++) {
2184 if (!(remaining_writemask & (1 << i)))
2185 continue;
2186
2187 dst->writemask = 1 << i;
2188 dst->type = brw_type_for_base_type(ir->type);
2189
2190 /* Find other components that match the one we're about to
2191 * write. Emits fewer instructions for things like vec4(0.5,
2192 * 1.5, 1.5, 1.5).
2193 */
2194 for (int j = i + 1; j < ir->type->vector_elements; j++) {
2195 if (ir->type->base_type == GLSL_TYPE_BOOL) {
2196 if (ir->value.b[i] == ir->value.b[j])
2197 dst->writemask |= (1 << j);
2198 } else {
2199 /* u, i, and f storage all line up, so no need for a
2200 * switch case for comparing each type.
2201 */
2202 if (ir->value.u[i] == ir->value.u[j])
2203 dst->writemask |= (1 << j);
2204 }
2205 }
2206
2207 switch (ir->type->base_type) {
2208 case GLSL_TYPE_FLOAT:
2209 emit(MOV(*dst, src_reg(ir->value.f[i])));
2210 break;
2211 case GLSL_TYPE_INT:
2212 emit(MOV(*dst, src_reg(ir->value.i[i])));
2213 break;
2214 case GLSL_TYPE_UINT:
2215 emit(MOV(*dst, src_reg(ir->value.u[i])));
2216 break;
2217 case GLSL_TYPE_BOOL:
2218 emit(MOV(*dst, src_reg(ir->value.b[i])));
2219 break;
2220 default:
2221 assert(!"Non-float/uint/int/bool constant");
2222 break;
2223 }
2224
2225 remaining_writemask &= ~dst->writemask;
2226 }
2227 dst->reg_offset++;
2228 }
2229
2230 void
2231 vec4_visitor::visit(ir_constant *ir)
2232 {
2233 dst_reg dst = dst_reg(this, ir->type);
2234 this->result = src_reg(dst);
2235
2236 emit_constant_values(&dst, ir);
2237 }
2238
2239 void
2240 vec4_visitor::visit(ir_call *ir)
2241 {
2242 assert(!"not reached");
2243 }
2244
2245 void
2246 vec4_visitor::visit(ir_texture *ir)
2247 {
2248 int sampler =
2249 _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog);
2250
2251 /* Should be lowered by do_lower_texture_projection */
2252 assert(!ir->projector);
2253
2254 /* Generate code to compute all the subexpression trees. This has to be
2255 * done before loading any values into MRFs for the sampler message since
2256 * generating these values may involve SEND messages that need the MRFs.
2257 */
2258 src_reg coordinate;
2259 if (ir->coordinate) {
2260 ir->coordinate->accept(this);
2261 coordinate = this->result;
2262 }
2263
2264 src_reg shadow_comparitor;
2265 if (ir->shadow_comparitor) {
2266 ir->shadow_comparitor->accept(this);
2267 shadow_comparitor = this->result;
2268 }
2269
2270 const glsl_type *lod_type = NULL, *sample_index_type = NULL;
2271 src_reg lod, dPdx, dPdy, sample_index;
2272 switch (ir->op) {
2273 case ir_tex:
2274 lod = src_reg(0.0f);
2275 lod_type = glsl_type::float_type;
2276 break;
2277 case ir_txf:
2278 case ir_txl:
2279 case ir_txs:
2280 ir->lod_info.lod->accept(this);
2281 lod = this->result;
2282 lod_type = ir->lod_info.lod->type;
2283 break;
2284 case ir_txf_ms:
2285 ir->lod_info.sample_index->accept(this);
2286 sample_index = this->result;
2287 sample_index_type = ir->lod_info.sample_index->type;
2288 break;
2289 case ir_txd:
2290 ir->lod_info.grad.dPdx->accept(this);
2291 dPdx = this->result;
2292
2293 ir->lod_info.grad.dPdy->accept(this);
2294 dPdy = this->result;
2295
2296 lod_type = ir->lod_info.grad.dPdx->type;
2297 break;
2298 case ir_txb:
2299 case ir_lod:
2300 break;
2301 }
2302
2303 vec4_instruction *inst = NULL;
2304 switch (ir->op) {
2305 case ir_tex:
2306 case ir_txl:
2307 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXL);
2308 break;
2309 case ir_txd:
2310 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXD);
2311 break;
2312 case ir_txf:
2313 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF);
2314 break;
2315 case ir_txf_ms:
2316 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF_MS);
2317 break;
2318 case ir_txs:
2319 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXS);
2320 break;
2321 case ir_txb:
2322 assert(!"TXB is not valid for vertex shaders.");
2323 break;
2324 case ir_lod:
2325 assert(!"LOD is not valid for vertex shaders.");
2326 break;
2327 }
2328
2329 bool use_texture_offset = ir->offset != NULL && ir->op != ir_txf;
2330
2331 /* Texel offsets go in the message header; Gen4 also requires headers. */
2332 inst->header_present = use_texture_offset || brw->gen < 5;
2333 inst->base_mrf = 2;
2334 inst->mlen = inst->header_present + 1; /* always at least one */
2335 inst->sampler = sampler;
2336 inst->dst = dst_reg(this, ir->type);
2337 inst->dst.writemask = WRITEMASK_XYZW;
2338 inst->shadow_compare = ir->shadow_comparitor != NULL;
2339
2340 if (use_texture_offset)
2341 inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
2342
2343 /* MRF for the first parameter */
2344 int param_base = inst->base_mrf + inst->header_present;
2345
2346 if (ir->op == ir_txs) {
2347 int writemask = brw->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
2348 emit(MOV(dst_reg(MRF, param_base, lod_type, writemask), lod));
2349 } else {
2350 int i, coord_mask = 0, zero_mask = 0;
2351 /* Load the coordinate */
2352 /* FINISHME: gl_clamp_mask and saturate */
2353 for (i = 0; i < ir->coordinate->type->vector_elements; i++)
2354 coord_mask |= (1 << i);
2355 for (; i < 4; i++)
2356 zero_mask |= (1 << i);
2357
2358 if (ir->offset && ir->op == ir_txf) {
2359 /* It appears that the ld instruction used for txf does its
2360 * address bounds check before adding in the offset. To work
2361 * around this, just add the integer offset to the integer
2362 * texel coordinate, and don't put the offset in the header.
2363 */
2364 ir_constant *offset = ir->offset->as_constant();
2365 assert(offset);
2366
2367 for (int j = 0; j < ir->coordinate->type->vector_elements; j++) {
2368 src_reg src = coordinate;
2369 src.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(src.swizzle, j),
2370 BRW_GET_SWZ(src.swizzle, j),
2371 BRW_GET_SWZ(src.swizzle, j),
2372 BRW_GET_SWZ(src.swizzle, j));
2373 emit(ADD(dst_reg(MRF, param_base, ir->coordinate->type, 1 << j),
2374 src, offset->value.i[j]));
2375 }
2376 } else {
2377 emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
2378 coordinate));
2379 }
2380 emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
2381 src_reg(0)));
2382 /* Load the shadow comparitor */
2383 if (ir->shadow_comparitor && ir->op != ir_txd) {
2384 emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
2385 WRITEMASK_X),
2386 shadow_comparitor));
2387 inst->mlen++;
2388 }
2389
2390 /* Load the LOD info */
2391 if (ir->op == ir_tex || ir->op == ir_txl) {
2392 int mrf, writemask;
2393 if (brw->gen >= 5) {
2394 mrf = param_base + 1;
2395 if (ir->shadow_comparitor) {
2396 writemask = WRITEMASK_Y;
2397 /* mlen already incremented */
2398 } else {
2399 writemask = WRITEMASK_X;
2400 inst->mlen++;
2401 }
2402 } else /* brw->gen == 4 */ {
2403 mrf = param_base;
2404 writemask = WRITEMASK_W;
2405 }
2406 emit(MOV(dst_reg(MRF, mrf, lod_type, writemask), lod));
2407 } else if (ir->op == ir_txf) {
2408 emit(MOV(dst_reg(MRF, param_base, lod_type, WRITEMASK_W), lod));
2409 } else if (ir->op == ir_txf_ms) {
2410 emit(MOV(dst_reg(MRF, param_base + 1, sample_index_type, WRITEMASK_X),
2411 sample_index));
2412 inst->mlen++;
2413
2414 /* on Gen7, there is an additional MCS parameter here after SI,
2415 * but we don't bother to emit it since it's always zero. If
2416 * we start supporting texturing from CMS surfaces, this will have
2417 * to change
2418 */
2419 } else if (ir->op == ir_txd) {
2420 const glsl_type *type = lod_type;
2421
2422 if (brw->gen >= 5) {
2423 dPdx.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
2424 dPdy.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
2425 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), dPdx));
2426 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), dPdy));
2427 inst->mlen++;
2428
2429 if (ir->type->vector_elements == 3 || ir->shadow_comparitor) {
2430 dPdx.swizzle = BRW_SWIZZLE_ZZZZ;
2431 dPdy.swizzle = BRW_SWIZZLE_ZZZZ;
2432 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), dPdx));
2433 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), dPdy));
2434 inst->mlen++;
2435
2436 if (ir->shadow_comparitor) {
2437 emit(MOV(dst_reg(MRF, param_base + 2,
2438 ir->shadow_comparitor->type, WRITEMASK_Z),
2439 shadow_comparitor));
2440 }
2441 }
2442 } else /* brw->gen == 4 */ {
2443 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), dPdx));
2444 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), dPdy));
2445 inst->mlen += 2;
2446 }
2447 }
2448 }
2449
2450 emit(inst);
2451
2452 /* fixup num layers (z) for cube arrays: hardware returns faces * layers;
2453 * spec requires layers.
2454 */
2455 if (ir->op == ir_txs) {
2456 glsl_type const *type = ir->sampler->type;
2457 if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
2458 type->sampler_array) {
2459 emit_math(SHADER_OPCODE_INT_QUOTIENT,
2460 with_writemask(inst->dst, WRITEMASK_Z),
2461 src_reg(inst->dst), src_reg(6));
2462 }
2463 }
2464
2465 swizzle_result(ir, src_reg(inst->dst), sampler);
2466 }
2467
2468 void
2469 vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, int sampler)
2470 {
2471 int s = key->tex.swizzles[sampler];
2472
2473 this->result = src_reg(this, ir->type);
2474 dst_reg swizzled_result(this->result);
2475
2476 if (ir->op == ir_txs || ir->type == glsl_type::float_type
2477 || s == SWIZZLE_NOOP) {
2478 emit(MOV(swizzled_result, orig_val));
2479 return;
2480 }
2481
2482 int zero_mask = 0, one_mask = 0, copy_mask = 0;
2483 int swizzle[4] = {0};
2484
2485 for (int i = 0; i < 4; i++) {
2486 switch (GET_SWZ(s, i)) {
2487 case SWIZZLE_ZERO:
2488 zero_mask |= (1 << i);
2489 break;
2490 case SWIZZLE_ONE:
2491 one_mask |= (1 << i);
2492 break;
2493 default:
2494 copy_mask |= (1 << i);
2495 swizzle[i] = GET_SWZ(s, i);
2496 break;
2497 }
2498 }
2499
2500 if (copy_mask) {
2501 orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
2502 swizzled_result.writemask = copy_mask;
2503 emit(MOV(swizzled_result, orig_val));
2504 }
2505
2506 if (zero_mask) {
2507 swizzled_result.writemask = zero_mask;
2508 emit(MOV(swizzled_result, src_reg(0.0f)));
2509 }
2510
2511 if (one_mask) {
2512 swizzled_result.writemask = one_mask;
2513 emit(MOV(swizzled_result, src_reg(1.0f)));
2514 }
2515 }
2516
2517 void
2518 vec4_visitor::visit(ir_return *ir)
2519 {
2520 assert(!"not reached");
2521 }
2522
2523 void
2524 vec4_visitor::visit(ir_discard *ir)
2525 {
2526 assert(!"not reached");
2527 }
2528
2529 void
2530 vec4_visitor::visit(ir_if *ir)
2531 {
2532 /* Don't point the annotation at the if statement, because then it plus
2533 * the then and else blocks get printed.
2534 */
2535 this->base_ir = ir->condition;
2536
2537 if (brw->gen == 6) {
2538 emit_if_gen6(ir);
2539 } else {
2540 uint32_t predicate;
2541 emit_bool_to_cond_code(ir->condition, &predicate);
2542 emit(IF(predicate));
2543 }
2544
2545 visit_instructions(&ir->then_instructions);
2546
2547 if (!ir->else_instructions.is_empty()) {
2548 this->base_ir = ir->condition;
2549 emit(BRW_OPCODE_ELSE);
2550
2551 visit_instructions(&ir->else_instructions);
2552 }
2553
2554 this->base_ir = ir->condition;
2555 emit(BRW_OPCODE_ENDIF);
2556 }
2557
2558 void
2559 vec4_visitor::visit(ir_emit_vertex *)
2560 {
2561 assert(!"not reached");
2562 }
2563
2564 void
2565 vec4_visitor::visit(ir_end_primitive *)
2566 {
2567 assert(!"not reached");
2568 }
2569
2570 void
2571 vec4_visitor::emit_ndc_computation()
2572 {
2573 /* Get the position */
2574 src_reg pos = src_reg(output_reg[VARYING_SLOT_POS]);
2575
2576 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
2577 dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
2578 output_reg[BRW_VARYING_SLOT_NDC] = ndc;
2579
2580 current_annotation = "NDC";
2581 dst_reg ndc_w = ndc;
2582 ndc_w.writemask = WRITEMASK_W;
2583 src_reg pos_w = pos;
2584 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
2585 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
2586
2587 dst_reg ndc_xyz = ndc;
2588 ndc_xyz.writemask = WRITEMASK_XYZ;
2589
2590 emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
2591 }
2592
2593 void
2594 vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
2595 {
2596 if (brw->gen < 6 &&
2597 ((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) ||
2598 key->userclip_active || brw->has_negative_rhw_bug)) {
2599 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
2600 dst_reg header1_w = header1;
2601 header1_w.writemask = WRITEMASK_W;
2602
2603 emit(MOV(header1, 0u));
2604
2605 if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
2606 src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]);
2607
2608 current_annotation = "Point size";
2609 emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
2610 emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
2611 }
2612
2613 if (key->userclip_active) {
2614 current_annotation = "Clipping flags";
2615 dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
2616 dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
2617
2618 emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), src_reg(0.0f), BRW_CONDITIONAL_L));
2619 emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, src_reg(0));
2620 emit(OR(header1_w, src_reg(header1_w), src_reg(flags0)));
2621
2622 emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), src_reg(0.0f), BRW_CONDITIONAL_L));
2623 emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, src_reg(0));
2624 emit(SHL(flags1, src_reg(flags1), src_reg(4)));
2625 emit(OR(header1_w, src_reg(header1_w), src_reg(flags1)));
2626 }
2627
2628 /* i965 clipping workaround:
2629 * 1) Test for -ve rhw
2630 * 2) If set,
2631 * set ndc = (0,0,0,0)
2632 * set ucp[6] = 1
2633 *
2634 * Later, clipping will detect ucp[6] and ensure the primitive is
2635 * clipped against all fixed planes.
2636 */
2637 if (brw->has_negative_rhw_bug) {
2638 src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]);
2639 ndc_w.swizzle = BRW_SWIZZLE_WWWW;
2640 emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L));
2641 vec4_instruction *inst;
2642 inst = emit(OR(header1_w, src_reg(header1_w), src_reg(1u << 6)));
2643 inst->predicate = BRW_PREDICATE_NORMAL;
2644 inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f)));
2645 inst->predicate = BRW_PREDICATE_NORMAL;
2646 }
2647
2648 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
2649 } else if (brw->gen < 6) {
2650 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
2651 } else {
2652 emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
2653 if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
2654 emit(MOV(brw_writemask(reg, WRITEMASK_W),
2655 src_reg(output_reg[VARYING_SLOT_PSIZ])));
2656 }
2657 if (prog_data->vue_map.slots_valid & VARYING_BIT_LAYER) {
2658 emit(MOV(retype(brw_writemask(reg, WRITEMASK_Y), BRW_REGISTER_TYPE_D),
2659 src_reg(output_reg[VARYING_SLOT_LAYER])));
2660 }
2661 }
2662 }
2663
2664 void
2665 vec4_visitor::emit_clip_distances(dst_reg reg, int offset)
2666 {
2667 /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
2668 *
2669 * "If a linked set of shaders forming the vertex stage contains no
2670 * static write to gl_ClipVertex or gl_ClipDistance, but the
2671 * application has requested clipping against user clip planes through
2672 * the API, then the coordinate written to gl_Position is used for
2673 * comparison against the user clip planes."
2674 *
2675 * This function is only called if the shader didn't write to
2676 * gl_ClipDistance. Accordingly, we use gl_ClipVertex to perform clipping
2677 * if the user wrote to it; otherwise we use gl_Position.
2678 */
2679 gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX;
2680 if (!(prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) {
2681 clip_vertex = VARYING_SLOT_POS;
2682 }
2683
2684 for (int i = 0; i + offset < key->nr_userclip_plane_consts && i < 4;
2685 ++i) {
2686 reg.writemask = 1 << i;
2687 emit(DP4(reg,
2688 src_reg(output_reg[clip_vertex]),
2689 src_reg(this->userplane[i + offset])));
2690 }
2691 }
2692
2693 void
2694 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
2695 {
2696 assert (varying < VARYING_SLOT_MAX);
2697 reg.type = output_reg[varying].type;
2698 current_annotation = output_reg_annotation[varying];
2699 /* Copy the register, saturating if necessary */
2700 vec4_instruction *inst = emit(MOV(reg,
2701 src_reg(output_reg[varying])));
2702 if ((varying == VARYING_SLOT_COL0 ||
2703 varying == VARYING_SLOT_COL1 ||
2704 varying == VARYING_SLOT_BFC0 ||
2705 varying == VARYING_SLOT_BFC1) &&
2706 key->clamp_vertex_color) {
2707 inst->saturate = true;
2708 }
2709 }
2710
2711 void
2712 vec4_visitor::emit_urb_slot(int mrf, int varying)
2713 {
2714 struct brw_reg hw_reg = brw_message_reg(mrf);
2715 dst_reg reg = dst_reg(MRF, mrf);
2716 reg.type = BRW_REGISTER_TYPE_F;
2717
2718 switch (varying) {
2719 case VARYING_SLOT_PSIZ:
2720 /* PSIZ is always in slot 0, and is coupled with other flags. */
2721 current_annotation = "indices, point width, clip flags";
2722 emit_psiz_and_flags(hw_reg);
2723 break;
2724 case BRW_VARYING_SLOT_NDC:
2725 current_annotation = "NDC";
2726 emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
2727 break;
2728 case VARYING_SLOT_POS:
2729 current_annotation = "gl_Position";
2730 emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
2731 break;
2732 case VARYING_SLOT_EDGE:
2733 /* This is present when doing unfilled polygons. We're supposed to copy
2734 * the edge flag from the user-provided vertex array
2735 * (glEdgeFlagPointer), or otherwise we'll copy from the current value
2736 * of that attribute (starts as 1.0f). This is then used in clipping to
2737 * determine which edges should be drawn as wireframe.
2738 */
2739 current_annotation = "edge flag";
2740 emit(MOV(reg, src_reg(dst_reg(ATTR, VERT_ATTRIB_EDGEFLAG,
2741 glsl_type::float_type, WRITEMASK_XYZW))));
2742 break;
2743 case BRW_VARYING_SLOT_PAD:
2744 /* No need to write to this slot */
2745 break;
2746 default:
2747 emit_generic_urb_slot(reg, varying);
2748 break;
2749 }
2750 }
2751
2752 static int
2753 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
2754 {
2755 if (brw->gen >= 6) {
2756 /* URB data written (does not include the message header reg) must
2757 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
2758 * section 5.4.3.2.2: URB_INTERLEAVED.
2759 *
2760 * URB entries are allocated on a multiple of 1024 bits, so an
2761 * extra 128 bits written here to make the end align to 256 is
2762 * no problem.
2763 */
2764 if ((mlen % 2) != 1)
2765 mlen++;
2766 }
2767
2768 return mlen;
2769 }
2770
2771 void
2772 vec4_vs_visitor::emit_urb_write_header(int mrf)
2773 {
2774 /* No need to do anything for VS; an implied write to this MRF will be
2775 * performed by VS_OPCODE_URB_WRITE.
2776 */
2777 (void) mrf;
2778 }
2779
2780 vec4_instruction *
2781 vec4_vs_visitor::emit_urb_write_opcode(bool complete)
2782 {
2783 /* For VS, the URB writes end the thread. */
2784 if (complete) {
2785 if (INTEL_DEBUG & DEBUG_SHADER_TIME)
2786 emit_shader_time_end();
2787 }
2788
2789 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
2790 inst->eot = complete;
2791
2792 return inst;
2793 }
2794
2795 /**
2796 * Generates the VUE payload plus the necessary URB write instructions to
2797 * output it.
2798 *
2799 * The VUE layout is documented in Volume 2a.
2800 */
2801 void
2802 vec4_visitor::emit_vertex()
2803 {
2804 /* MRF 0 is reserved for the debugger, so start with message header
2805 * in MRF 1.
2806 */
2807 int base_mrf = 1;
2808 int mrf = base_mrf;
2809 /* In the process of generating our URB write message contents, we
2810 * may need to unspill a register or load from an array. Those
2811 * reads would use MRFs 14-15.
2812 */
2813 int max_usable_mrf = 13;
2814
2815 /* The following assertion verifies that max_usable_mrf causes an
2816 * even-numbered amount of URB write data, which will meet gen6's
2817 * requirements for length alignment.
2818 */
2819 assert ((max_usable_mrf - base_mrf) % 2 == 0);
2820
2821 /* First mrf is the g0-based message header containing URB handles and
2822 * such.
2823 */
2824 emit_urb_write_header(mrf++);
2825
2826 if (brw->gen < 6) {
2827 emit_ndc_computation();
2828 }
2829
2830 /* Lower legacy ff and ClipVertex clipping to clip distances */
2831 if (key->userclip_active && !key->uses_clip_distance) {
2832 current_annotation = "user clip distances";
2833
2834 output_reg[VARYING_SLOT_CLIP_DIST0] = dst_reg(this, glsl_type::vec4_type);
2835 output_reg[VARYING_SLOT_CLIP_DIST1] = dst_reg(this, glsl_type::vec4_type);
2836
2837 emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST0], 0);
2838 emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1], 4);
2839 }
2840
2841 /* Set up the VUE data for the first URB write */
2842 int slot;
2843 for (slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
2844 emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]);
2845
2846 /* If this was max_usable_mrf, we can't fit anything more into this URB
2847 * WRITE.
2848 */
2849 if (mrf > max_usable_mrf) {
2850 slot++;
2851 break;
2852 }
2853 }
2854
2855 bool complete = slot >= prog_data->vue_map.num_slots;
2856 current_annotation = "URB write";
2857 vec4_instruction *inst = emit_urb_write_opcode(complete);
2858 inst->base_mrf = base_mrf;
2859 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2860
2861 /* Optional second URB write */
2862 if (!complete) {
2863 mrf = base_mrf + 1;
2864
2865 for (; slot < prog_data->vue_map.num_slots; ++slot) {
2866 assert(mrf < max_usable_mrf);
2867
2868 emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]);
2869 }
2870
2871 current_annotation = "URB write";
2872 inst = emit_urb_write_opcode(true /* complete */);
2873 inst->base_mrf = base_mrf;
2874 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2875 /* URB destination offset. In the previous write, we got MRFs
2876 * 2-13 minus the one header MRF, so 12 regs. URB offset is in
2877 * URB row increments, and each of our MRFs is half of one of
2878 * those, since we're doing interleaved writes.
2879 */
2880 inst->offset = (max_usable_mrf - base_mrf) / 2;
2881 }
2882 }
2883
2884 void
2885 vec4_vs_visitor::emit_thread_end()
2886 {
2887 /* For VS, we always end the thread by emitting a single vertex.
2888 * emit_urb_write_opcode() will take care of setting the eot flag on the
2889 * SEND instruction.
2890 */
2891 emit_vertex();
2892 }
2893
2894 src_reg
2895 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
2896 src_reg *reladdr, int reg_offset)
2897 {
2898 /* Because we store the values to scratch interleaved like our
2899 * vertex data, we need to scale the vec4 index by 2.
2900 */
2901 int message_header_scale = 2;
2902
2903 /* Pre-gen6, the message header uses byte offsets instead of vec4
2904 * (16-byte) offset units.
2905 */
2906 if (brw->gen < 6)
2907 message_header_scale *= 16;
2908
2909 if (reladdr) {
2910 src_reg index = src_reg(this, glsl_type::int_type);
2911
2912 emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2913 emit_before(inst, MUL(dst_reg(index),
2914 index, src_reg(message_header_scale)));
2915
2916 return index;
2917 } else {
2918 return src_reg(reg_offset * message_header_scale);
2919 }
2920 }
2921
2922 src_reg
2923 vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
2924 src_reg *reladdr, int reg_offset)
2925 {
2926 if (reladdr) {
2927 src_reg index = src_reg(this, glsl_type::int_type);
2928
2929 emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2930
2931 /* Pre-gen6, the message header uses byte offsets instead of vec4
2932 * (16-byte) offset units.
2933 */
2934 if (brw->gen < 6) {
2935 emit_before(inst, MUL(dst_reg(index), index, src_reg(16)));
2936 }
2937
2938 return index;
2939 } else {
2940 int message_header_scale = brw->gen < 6 ? 16 : 1;
2941 return src_reg(reg_offset * message_header_scale);
2942 }
2943 }
2944
2945 /**
2946 * Emits an instruction before @inst to load the value named by @orig_src
2947 * from scratch space at @base_offset to @temp.
2948 *
2949 * @base_offset is measured in 32-byte units (the size of a register).
2950 */
2951 void
2952 vec4_visitor::emit_scratch_read(vec4_instruction *inst,
2953 dst_reg temp, src_reg orig_src,
2954 int base_offset)
2955 {
2956 int reg_offset = base_offset + orig_src.reg_offset;
2957 src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
2958
2959 emit_before(inst, SCRATCH_READ(temp, index));
2960 }
2961
2962 /**
2963 * Emits an instruction after @inst to store the value to be written
2964 * to @orig_dst to scratch space at @base_offset, from @temp.
2965 *
2966 * @base_offset is measured in 32-byte units (the size of a register).
2967 */
2968 void
2969 vec4_visitor::emit_scratch_write(vec4_instruction *inst, int base_offset)
2970 {
2971 int reg_offset = base_offset + inst->dst.reg_offset;
2972 src_reg index = get_scratch_offset(inst, inst->dst.reladdr, reg_offset);
2973
2974 /* Create a temporary register to store *inst's result in.
2975 *
2976 * We have to be careful in MOVing from our temporary result register in
2977 * the scratch write. If we swizzle from channels of the temporary that
2978 * weren't initialized, it will confuse live interval analysis, which will
2979 * make spilling fail to make progress.
2980 */
2981 src_reg temp = src_reg(this, glsl_type::vec4_type);
2982 temp.type = inst->dst.type;
2983 int first_writemask_chan = ffs(inst->dst.writemask) - 1;
2984 int swizzles[4];
2985 for (int i = 0; i < 4; i++)
2986 if (inst->dst.writemask & (1 << i))
2987 swizzles[i] = i;
2988 else
2989 swizzles[i] = first_writemask_chan;
2990 temp.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
2991 swizzles[2], swizzles[3]);
2992
2993 dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
2994 inst->dst.writemask));
2995 vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
2996 write->predicate = inst->predicate;
2997 write->ir = inst->ir;
2998 write->annotation = inst->annotation;
2999 inst->insert_after(write);
3000
3001 inst->dst.file = temp.file;
3002 inst->dst.reg = temp.reg;
3003 inst->dst.reg_offset = temp.reg_offset;
3004 inst->dst.reladdr = NULL;
3005 }
3006
3007 /**
3008 * We can't generally support array access in GRF space, because a
3009 * single instruction's destination can only span 2 contiguous
3010 * registers. So, we send all GRF arrays that get variable index
3011 * access to scratch space.
3012 */
3013 void
3014 vec4_visitor::move_grf_array_access_to_scratch()
3015 {
3016 int scratch_loc[this->virtual_grf_count];
3017
3018 for (int i = 0; i < this->virtual_grf_count; i++) {
3019 scratch_loc[i] = -1;
3020 }
3021
3022 /* First, calculate the set of virtual GRFs that need to be punted
3023 * to scratch due to having any array access on them, and where in
3024 * scratch.
3025 */
3026 foreach_list(node, &this->instructions) {
3027 vec4_instruction *inst = (vec4_instruction *)node;
3028
3029 if (inst->dst.file == GRF && inst->dst.reladdr &&
3030 scratch_loc[inst->dst.reg] == -1) {
3031 scratch_loc[inst->dst.reg] = c->last_scratch;
3032 c->last_scratch += this->virtual_grf_sizes[inst->dst.reg];
3033 }
3034
3035 for (int i = 0 ; i < 3; i++) {
3036 src_reg *src = &inst->src[i];
3037
3038 if (src->file == GRF && src->reladdr &&
3039 scratch_loc[src->reg] == -1) {
3040 scratch_loc[src->reg] = c->last_scratch;
3041 c->last_scratch += this->virtual_grf_sizes[src->reg];
3042 }
3043 }
3044 }
3045
3046 /* Now, for anything that will be accessed through scratch, rewrite
3047 * it to load/store. Note that this is a _safe list walk, because
3048 * we may generate a new scratch_write instruction after the one
3049 * we're processing.
3050 */
3051 foreach_list_safe(node, &this->instructions) {
3052 vec4_instruction *inst = (vec4_instruction *)node;
3053
3054 /* Set up the annotation tracking for new generated instructions. */
3055 base_ir = inst->ir;
3056 current_annotation = inst->annotation;
3057
3058 if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
3059 emit_scratch_write(inst, scratch_loc[inst->dst.reg]);
3060 }
3061
3062 for (int i = 0 ; i < 3; i++) {
3063 if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
3064 continue;
3065
3066 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
3067
3068 emit_scratch_read(inst, temp, inst->src[i],
3069 scratch_loc[inst->src[i].reg]);
3070
3071 inst->src[i].file = temp.file;
3072 inst->src[i].reg = temp.reg;
3073 inst->src[i].reg_offset = temp.reg_offset;
3074 inst->src[i].reladdr = NULL;
3075 }
3076 }
3077 }
3078
3079 /**
3080 * Emits an instruction before @inst to load the value named by @orig_src
3081 * from the pull constant buffer (surface) at @base_offset to @temp.
3082 */
3083 void
3084 vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
3085 dst_reg temp, src_reg orig_src,
3086 int base_offset)
3087 {
3088 int reg_offset = base_offset + orig_src.reg_offset;
3089 src_reg index = src_reg((unsigned)SURF_INDEX_VERT_CONST_BUFFER);
3090 src_reg offset = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
3091 vec4_instruction *load;
3092
3093 if (brw->gen >= 7) {
3094 dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
3095 grf_offset.type = offset.type;
3096 emit_before(inst, MOV(grf_offset, offset));
3097
3098 load = new(mem_ctx) vec4_instruction(this,
3099 VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
3100 temp, index, src_reg(grf_offset));
3101 } else {
3102 load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
3103 temp, index, offset);
3104 load->base_mrf = 14;
3105 load->mlen = 1;
3106 }
3107 emit_before(inst, load);
3108 }
3109
3110 /**
3111 * Implements array access of uniforms by inserting a
3112 * PULL_CONSTANT_LOAD instruction.
3113 *
3114 * Unlike temporary GRF array access (where we don't support it due to
3115 * the difficulty of doing relative addressing on instruction
3116 * destinations), we could potentially do array access of uniforms
3117 * that were loaded in GRF space as push constants. In real-world
3118 * usage we've seen, though, the arrays being used are always larger
3119 * than we could load as push constants, so just always move all
3120 * uniform array access out to a pull constant buffer.
3121 */
3122 void
3123 vec4_visitor::move_uniform_array_access_to_pull_constants()
3124 {
3125 int pull_constant_loc[this->uniforms];
3126
3127 for (int i = 0; i < this->uniforms; i++) {
3128 pull_constant_loc[i] = -1;
3129 }
3130
3131 /* Walk through and find array access of uniforms. Put a copy of that
3132 * uniform in the pull constant buffer.
3133 *
3134 * Note that we don't move constant-indexed accesses to arrays. No
3135 * testing has been done of the performance impact of this choice.
3136 */
3137 foreach_list_safe(node, &this->instructions) {
3138 vec4_instruction *inst = (vec4_instruction *)node;
3139
3140 for (int i = 0 ; i < 3; i++) {
3141 if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
3142 continue;
3143
3144 int uniform = inst->src[i].reg;
3145
3146 /* If this array isn't already present in the pull constant buffer,
3147 * add it.
3148 */
3149 if (pull_constant_loc[uniform] == -1) {
3150 const float **values = &prog_data->param[uniform * 4];
3151
3152 pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
3153
3154 for (int j = 0; j < uniform_size[uniform] * 4; j++) {
3155 prog_data->pull_param[prog_data->nr_pull_params++]
3156 = values[j];
3157 }
3158 }
3159
3160 /* Set up the annotation tracking for new generated instructions. */
3161 base_ir = inst->ir;
3162 current_annotation = inst->annotation;
3163
3164 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
3165
3166 emit_pull_constant_load(inst, temp, inst->src[i],
3167 pull_constant_loc[uniform]);
3168
3169 inst->src[i].file = temp.file;
3170 inst->src[i].reg = temp.reg;
3171 inst->src[i].reg_offset = temp.reg_offset;
3172 inst->src[i].reladdr = NULL;
3173 }
3174 }
3175
3176 /* Now there are no accesses of the UNIFORM file with a reladdr, so
3177 * no need to track them as larger-than-vec4 objects. This will be
3178 * relied on in cutting out unused uniform vectors from push
3179 * constants.
3180 */
3181 split_uniform_registers();
3182 }
3183
3184 void
3185 vec4_visitor::resolve_ud_negate(src_reg *reg)
3186 {
3187 if (reg->type != BRW_REGISTER_TYPE_UD ||
3188 !reg->negate)
3189 return;
3190
3191 src_reg temp = src_reg(this, glsl_type::uvec4_type);
3192 emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
3193 *reg = temp;
3194 }
3195
3196 vec4_visitor::vec4_visitor(struct brw_context *brw,
3197 struct brw_vec4_compile *c,
3198 struct gl_program *prog,
3199 const struct brw_vec4_prog_key *key,
3200 struct brw_vec4_prog_data *prog_data,
3201 struct gl_shader_program *shader_prog,
3202 struct brw_shader *shader,
3203 void *mem_ctx,
3204 bool debug_flag)
3205 : debug_flag(debug_flag)
3206 {
3207 this->brw = brw;
3208 this->ctx = &brw->ctx;
3209 this->shader_prog = shader_prog;
3210 this->shader = shader;
3211
3212 this->mem_ctx = mem_ctx;
3213 this->failed = false;
3214
3215 this->base_ir = NULL;
3216 this->current_annotation = NULL;
3217 memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
3218
3219 this->c = c;
3220 this->prog = prog;
3221 this->key = key;
3222 this->prog_data = prog_data;
3223
3224 this->variable_ht = hash_table_ctor(0,
3225 hash_table_pointer_hash,
3226 hash_table_pointer_compare);
3227
3228 this->virtual_grf_start = NULL;
3229 this->virtual_grf_end = NULL;
3230 this->virtual_grf_sizes = NULL;
3231 this->virtual_grf_count = 0;
3232 this->virtual_grf_reg_map = NULL;
3233 this->virtual_grf_reg_count = 0;
3234 this->virtual_grf_array_size = 0;
3235 this->live_intervals_valid = false;
3236
3237 this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
3238
3239 this->uniforms = 0;
3240 }
3241
3242 vec4_visitor::~vec4_visitor()
3243 {
3244 hash_table_dtor(this->variable_ht);
3245 }
3246
3247
3248 vec4_vs_visitor::vec4_vs_visitor(struct brw_context *brw,
3249 struct brw_vs_compile *vs_compile,
3250 struct brw_vs_prog_data *vs_prog_data,
3251 struct gl_shader_program *prog,
3252 struct brw_shader *shader,
3253 void *mem_ctx)
3254 : vec4_visitor(brw, &vs_compile->base, &vs_compile->vp->program.Base,
3255 &vs_compile->key.base, &vs_prog_data->base, prog, shader,
3256 mem_ctx, INTEL_DEBUG & DEBUG_VS),
3257 vs_compile(vs_compile),
3258 vs_prog_data(vs_prog_data)
3259 {
3260 }
3261
3262
3263 void
3264 vec4_visitor::fail(const char *format, ...)
3265 {
3266 va_list va;
3267 char *msg;
3268
3269 if (failed)
3270 return;
3271
3272 failed = true;
3273
3274 va_start(va, format);
3275 msg = ralloc_vasprintf(mem_ctx, format, va);
3276 va_end(va);
3277 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
3278
3279 this->fail_msg = msg;
3280
3281 if (debug_flag) {
3282 fprintf(stderr, "%s", msg);
3283 }
3284 }
3285
3286 } /* namespace brw */