i965/fs: Add support for translating ir_triop_fma into MAD.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_visitor.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25 #include "brw_vs.h"
26 #include "glsl/ir_uniform.h"
27 extern "C" {
28 #include "main/context.h"
29 #include "main/macros.h"
30 #include "program/prog_parameter.h"
31 #include "program/sampler.h"
32 }
33
34 namespace brw {
35
36 vec4_instruction::vec4_instruction(vec4_visitor *v,
37 enum opcode opcode, dst_reg dst,
38 src_reg src0, src_reg src1, src_reg src2)
39 {
40 this->opcode = opcode;
41 this->dst = dst;
42 this->src[0] = src0;
43 this->src[1] = src1;
44 this->src[2] = src2;
45 this->ir = v->base_ir;
46 this->annotation = v->current_annotation;
47 }
48
49 vec4_instruction *
50 vec4_visitor::emit(vec4_instruction *inst)
51 {
52 this->instructions.push_tail(inst);
53
54 return inst;
55 }
56
57 vec4_instruction *
58 vec4_visitor::emit_before(vec4_instruction *inst, vec4_instruction *new_inst)
59 {
60 new_inst->ir = inst->ir;
61 new_inst->annotation = inst->annotation;
62
63 inst->insert_before(new_inst);
64
65 return inst;
66 }
67
68 vec4_instruction *
69 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
70 src_reg src0, src_reg src1, src_reg src2)
71 {
72 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
73 src0, src1, src2));
74 }
75
76
77 vec4_instruction *
78 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
79 {
80 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
81 }
82
83 vec4_instruction *
84 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
85 {
86 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
87 }
88
89 vec4_instruction *
90 vec4_visitor::emit(enum opcode opcode)
91 {
92 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
93 }
94
95 #define ALU1(op) \
96 vec4_instruction * \
97 vec4_visitor::op(dst_reg dst, src_reg src0) \
98 { \
99 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
100 src0); \
101 }
102
103 #define ALU2(op) \
104 vec4_instruction * \
105 vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \
106 { \
107 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
108 src0, src1); \
109 }
110
111 #define ALU3(op) \
112 vec4_instruction * \
113 vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1, src_reg src2)\
114 { \
115 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
116 src0, src1, src2); \
117 }
118
119 ALU1(NOT)
120 ALU1(MOV)
121 ALU1(FRC)
122 ALU1(RNDD)
123 ALU1(RNDE)
124 ALU1(RNDZ)
125 ALU1(F32TO16)
126 ALU1(F16TO32)
127 ALU2(ADD)
128 ALU2(MUL)
129 ALU2(MACH)
130 ALU2(AND)
131 ALU2(OR)
132 ALU2(XOR)
133 ALU2(DP3)
134 ALU2(DP4)
135 ALU2(DPH)
136 ALU2(SHL)
137 ALU2(SHR)
138 ALU2(ASR)
139 ALU3(LRP)
140 ALU1(BFREV)
141 ALU3(BFE)
142 ALU2(BFI1)
143 ALU3(BFI2)
144 ALU1(FBH)
145 ALU1(FBL)
146 ALU1(CBIT)
147
148 /** Gen4 predicated IF. */
149 vec4_instruction *
150 vec4_visitor::IF(uint32_t predicate)
151 {
152 vec4_instruction *inst;
153
154 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF);
155 inst->predicate = predicate;
156
157 return inst;
158 }
159
160 /** Gen6+ IF with embedded comparison. */
161 vec4_instruction *
162 vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition)
163 {
164 assert(brw->gen >= 6);
165
166 vec4_instruction *inst;
167
168 resolve_ud_negate(&src0);
169 resolve_ud_negate(&src1);
170
171 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF, dst_null_d(),
172 src0, src1);
173 inst->conditional_mod = condition;
174
175 return inst;
176 }
177
178 /**
179 * CMP: Sets the low bit of the destination channels with the result
180 * of the comparison, while the upper bits are undefined, and updates
181 * the flag register with the packed 16 bits of the result.
182 */
183 vec4_instruction *
184 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition)
185 {
186 vec4_instruction *inst;
187
188 /* original gen4 does type conversion to the destination type
189 * before before comparison, producing garbage results for floating
190 * point comparisons.
191 */
192 if (brw->gen == 4) {
193 dst.type = src0.type;
194 if (dst.file == HW_REG)
195 dst.fixed_hw_reg.type = dst.type;
196 }
197
198 resolve_ud_negate(&src0);
199 resolve_ud_negate(&src1);
200
201 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_CMP, dst, src0, src1);
202 inst->conditional_mod = condition;
203
204 return inst;
205 }
206
207 vec4_instruction *
208 vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
209 {
210 vec4_instruction *inst;
211
212 inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_READ,
213 dst, index);
214 inst->base_mrf = 14;
215 inst->mlen = 2;
216
217 return inst;
218 }
219
220 vec4_instruction *
221 vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index)
222 {
223 vec4_instruction *inst;
224
225 inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_WRITE,
226 dst, src, index);
227 inst->base_mrf = 13;
228 inst->mlen = 3;
229
230 return inst;
231 }
232
233 void
234 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
235 {
236 static enum opcode dot_opcodes[] = {
237 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
238 };
239
240 emit(dot_opcodes[elements - 2], dst, src0, src1);
241 }
242
243 src_reg
244 vec4_visitor::fix_3src_operand(src_reg src)
245 {
246 /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
247 * able to use vertical stride of zero to replicate the vec4 uniform, like
248 *
249 * g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
250 *
251 * But you can't, since vertical stride is always four in three-source
252 * instructions. Instead, insert a MOV instruction to do the replication so
253 * that the three-source instruction can consume it.
254 */
255
256 /* The MOV is only needed if the source is a uniform or immediate. */
257 if (src.file != UNIFORM && src.file != IMM)
258 return src;
259
260 dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
261 expanded.type = src.type;
262 emit(MOV(expanded, src));
263 return src_reg(expanded);
264 }
265
266 src_reg
267 vec4_visitor::fix_math_operand(src_reg src)
268 {
269 /* The gen6 math instruction ignores the source modifiers --
270 * swizzle, abs, negate, and at least some parts of the register
271 * region description.
272 *
273 * Rather than trying to enumerate all these cases, *always* expand the
274 * operand to a temp GRF for gen6.
275 *
276 * For gen7, keep the operand as-is, except if immediate, which gen7 still
277 * can't use.
278 */
279
280 if (brw->gen == 7 && src.file != IMM)
281 return src;
282
283 dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
284 expanded.type = src.type;
285 emit(MOV(expanded, src));
286 return src_reg(expanded);
287 }
288
289 void
290 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
291 {
292 src = fix_math_operand(src);
293
294 if (dst.writemask != WRITEMASK_XYZW) {
295 /* The gen6 math instruction must be align1, so we can't do
296 * writemasks.
297 */
298 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
299
300 emit(opcode, temp_dst, src);
301
302 emit(MOV(dst, src_reg(temp_dst)));
303 } else {
304 emit(opcode, dst, src);
305 }
306 }
307
308 void
309 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
310 {
311 vec4_instruction *inst = emit(opcode, dst, src);
312 inst->base_mrf = 1;
313 inst->mlen = 1;
314 }
315
316 void
317 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
318 {
319 switch (opcode) {
320 case SHADER_OPCODE_RCP:
321 case SHADER_OPCODE_RSQ:
322 case SHADER_OPCODE_SQRT:
323 case SHADER_OPCODE_EXP2:
324 case SHADER_OPCODE_LOG2:
325 case SHADER_OPCODE_SIN:
326 case SHADER_OPCODE_COS:
327 break;
328 default:
329 assert(!"not reached: bad math opcode");
330 return;
331 }
332
333 if (brw->gen >= 6) {
334 return emit_math1_gen6(opcode, dst, src);
335 } else {
336 return emit_math1_gen4(opcode, dst, src);
337 }
338 }
339
340 void
341 vec4_visitor::emit_math2_gen6(enum opcode opcode,
342 dst_reg dst, src_reg src0, src_reg src1)
343 {
344 src0 = fix_math_operand(src0);
345 src1 = fix_math_operand(src1);
346
347 if (dst.writemask != WRITEMASK_XYZW) {
348 /* The gen6 math instruction must be align1, so we can't do
349 * writemasks.
350 */
351 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
352 temp_dst.type = dst.type;
353
354 emit(opcode, temp_dst, src0, src1);
355
356 emit(MOV(dst, src_reg(temp_dst)));
357 } else {
358 emit(opcode, dst, src0, src1);
359 }
360 }
361
362 void
363 vec4_visitor::emit_math2_gen4(enum opcode opcode,
364 dst_reg dst, src_reg src0, src_reg src1)
365 {
366 vec4_instruction *inst = emit(opcode, dst, src0, src1);
367 inst->base_mrf = 1;
368 inst->mlen = 2;
369 }
370
371 void
372 vec4_visitor::emit_math(enum opcode opcode,
373 dst_reg dst, src_reg src0, src_reg src1)
374 {
375 switch (opcode) {
376 case SHADER_OPCODE_POW:
377 case SHADER_OPCODE_INT_QUOTIENT:
378 case SHADER_OPCODE_INT_REMAINDER:
379 break;
380 default:
381 assert(!"not reached: unsupported binary math opcode");
382 return;
383 }
384
385 if (brw->gen >= 6) {
386 return emit_math2_gen6(opcode, dst, src0, src1);
387 } else {
388 return emit_math2_gen4(opcode, dst, src0, src1);
389 }
390 }
391
392 void
393 vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0)
394 {
395 if (brw->gen < 7)
396 assert(!"ir_unop_pack_half_2x16 should be lowered");
397
398 assert(dst.type == BRW_REGISTER_TYPE_UD);
399 assert(src0.type == BRW_REGISTER_TYPE_F);
400
401 /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
402 *
403 * Because this instruction does not have a 16-bit floating-point type,
404 * the destination data type must be Word (W).
405 *
406 * The destination must be DWord-aligned and specify a horizontal stride
407 * (HorzStride) of 2. The 16-bit result is stored in the lower word of
408 * each destination channel and the upper word is not modified.
409 *
410 * The above restriction implies that the f32to16 instruction must use
411 * align1 mode, because only in align1 mode is it possible to specify
412 * horizontal stride. We choose here to defy the hardware docs and emit
413 * align16 instructions.
414 *
415 * (I [chadv] did attempt to emit align1 instructions for VS f32to16
416 * instructions. I was partially successful in that the code passed all
417 * tests. However, the code was dubiously correct and fragile, and the
418 * tests were not harsh enough to probe that frailty. Not trusting the
419 * code, I chose instead to remain in align16 mode in defiance of the hw
420 * docs).
421 *
422 * I've [chadv] experimentally confirmed that, on gen7 hardware and the
423 * simulator, emitting a f32to16 in align16 mode with UD as destination
424 * data type is safe. The behavior differs from that specified in the PRM
425 * in that the upper word of each destination channel is cleared to 0.
426 */
427
428 dst_reg tmp_dst(this, glsl_type::uvec2_type);
429 src_reg tmp_src(tmp_dst);
430
431 #if 0
432 /* Verify the undocumented behavior on which the following instructions
433 * rely. If f32to16 fails to clear the upper word of the X and Y channels,
434 * then the result of the bit-or instruction below will be incorrect.
435 *
436 * You should inspect the disasm output in order to verify that the MOV is
437 * not optimized away.
438 */
439 emit(MOV(tmp_dst, src_reg(0x12345678u)));
440 #endif
441
442 /* Give tmp the form below, where "." means untouched.
443 *
444 * w z y x w z y x
445 * |.|.|0x0000hhhh|0x0000llll|.|.|0x0000hhhh|0x0000llll|
446 *
447 * That the upper word of each write-channel be 0 is required for the
448 * following bit-shift and bit-or instructions to work. Note that this
449 * relies on the undocumented hardware behavior mentioned above.
450 */
451 tmp_dst.writemask = WRITEMASK_XY;
452 emit(F32TO16(tmp_dst, src0));
453
454 /* Give the write-channels of dst the form:
455 * 0xhhhh0000
456 */
457 tmp_src.swizzle = SWIZZLE_Y;
458 emit(SHL(dst, tmp_src, src_reg(16u)));
459
460 /* Finally, give the write-channels of dst the form of packHalf2x16's
461 * output:
462 * 0xhhhhllll
463 */
464 tmp_src.swizzle = SWIZZLE_X;
465 emit(OR(dst, src_reg(dst), tmp_src));
466 }
467
468 void
469 vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0)
470 {
471 if (brw->gen < 7)
472 assert(!"ir_unop_unpack_half_2x16 should be lowered");
473
474 assert(dst.type == BRW_REGISTER_TYPE_F);
475 assert(src0.type == BRW_REGISTER_TYPE_UD);
476
477 /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
478 *
479 * Because this instruction does not have a 16-bit floating-point type,
480 * the source data type must be Word (W). The destination type must be
481 * F (Float).
482 *
483 * To use W as the source data type, we must adjust horizontal strides,
484 * which is only possible in align1 mode. All my [chadv] attempts at
485 * emitting align1 instructions for unpackHalf2x16 failed to pass the
486 * Piglit tests, so I gave up.
487 *
488 * I've verified that, on gen7 hardware and the simulator, it is safe to
489 * emit f16to32 in align16 mode with UD as source data type.
490 */
491
492 dst_reg tmp_dst(this, glsl_type::uvec2_type);
493 src_reg tmp_src(tmp_dst);
494
495 tmp_dst.writemask = WRITEMASK_X;
496 emit(AND(tmp_dst, src0, src_reg(0xffffu)));
497
498 tmp_dst.writemask = WRITEMASK_Y;
499 emit(SHR(tmp_dst, src0, src_reg(16u)));
500
501 dst.writemask = WRITEMASK_XY;
502 emit(F16TO32(dst, tmp_src));
503 }
504
505 void
506 vec4_visitor::visit_instructions(const exec_list *list)
507 {
508 foreach_list(node, list) {
509 ir_instruction *ir = (ir_instruction *)node;
510
511 base_ir = ir;
512 ir->accept(this);
513 }
514 }
515
516
517 static int
518 type_size(const struct glsl_type *type)
519 {
520 unsigned int i;
521 int size;
522
523 switch (type->base_type) {
524 case GLSL_TYPE_UINT:
525 case GLSL_TYPE_INT:
526 case GLSL_TYPE_FLOAT:
527 case GLSL_TYPE_BOOL:
528 if (type->is_matrix()) {
529 return type->matrix_columns;
530 } else {
531 /* Regardless of size of vector, it gets a vec4. This is bad
532 * packing for things like floats, but otherwise arrays become a
533 * mess. Hopefully a later pass over the code can pack scalars
534 * down if appropriate.
535 */
536 return 1;
537 }
538 case GLSL_TYPE_ARRAY:
539 assert(type->length > 0);
540 return type_size(type->fields.array) * type->length;
541 case GLSL_TYPE_STRUCT:
542 size = 0;
543 for (i = 0; i < type->length; i++) {
544 size += type_size(type->fields.structure[i].type);
545 }
546 return size;
547 case GLSL_TYPE_SAMPLER:
548 /* Samplers take up one slot in UNIFORMS[], but they're baked in
549 * at link time.
550 */
551 return 1;
552 case GLSL_TYPE_VOID:
553 case GLSL_TYPE_ERROR:
554 case GLSL_TYPE_INTERFACE:
555 assert(0);
556 break;
557 }
558
559 return 0;
560 }
561
562 int
563 vec4_visitor::virtual_grf_alloc(int size)
564 {
565 if (virtual_grf_array_size <= virtual_grf_count) {
566 if (virtual_grf_array_size == 0)
567 virtual_grf_array_size = 16;
568 else
569 virtual_grf_array_size *= 2;
570 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
571 virtual_grf_array_size);
572 virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int,
573 virtual_grf_array_size);
574 }
575 virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count;
576 virtual_grf_reg_count += size;
577 virtual_grf_sizes[virtual_grf_count] = size;
578 return virtual_grf_count++;
579 }
580
581 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
582 {
583 init();
584
585 this->file = GRF;
586 this->reg = v->virtual_grf_alloc(type_size(type));
587
588 if (type->is_array() || type->is_record()) {
589 this->swizzle = BRW_SWIZZLE_NOOP;
590 } else {
591 this->swizzle = swizzle_for_size(type->vector_elements);
592 }
593
594 this->type = brw_type_for_base_type(type);
595 }
596
597 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
598 {
599 init();
600
601 this->file = GRF;
602 this->reg = v->virtual_grf_alloc(type_size(type));
603
604 if (type->is_array() || type->is_record()) {
605 this->writemask = WRITEMASK_XYZW;
606 } else {
607 this->writemask = (1 << type->vector_elements) - 1;
608 }
609
610 this->type = brw_type_for_base_type(type);
611 }
612
613 /* Our support for uniforms is piggy-backed on the struct
614 * gl_fragment_program, because that's where the values actually
615 * get stored, rather than in some global gl_shader_program uniform
616 * store.
617 */
618 void
619 vec4_visitor::setup_uniform_values(ir_variable *ir)
620 {
621 int namelen = strlen(ir->name);
622
623 /* The data for our (non-builtin) uniforms is stored in a series of
624 * gl_uniform_driver_storage structs for each subcomponent that
625 * glGetUniformLocation() could name. We know it's been set up in the same
626 * order we'd walk the type, so walk the list of storage and find anything
627 * with our name, or the prefix of a component that starts with our name.
628 */
629 for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) {
630 struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
631
632 if (strncmp(ir->name, storage->name, namelen) != 0 ||
633 (storage->name[namelen] != 0 &&
634 storage->name[namelen] != '.' &&
635 storage->name[namelen] != '[')) {
636 continue;
637 }
638
639 gl_constant_value *components = storage->storage;
640 unsigned vector_count = (MAX2(storage->array_elements, 1) *
641 storage->type->matrix_columns);
642
643 for (unsigned s = 0; s < vector_count; s++) {
644 uniform_vector_size[uniforms] = storage->type->vector_elements;
645
646 int i;
647 for (i = 0; i < uniform_vector_size[uniforms]; i++) {
648 prog_data->param[uniforms * 4 + i] = &components->f;
649 components++;
650 }
651 for (; i < 4; i++) {
652 static float zero = 0;
653 prog_data->param[uniforms * 4 + i] = &zero;
654 }
655
656 uniforms++;
657 }
658 }
659 }
660
661 void
662 vec4_visitor::setup_uniform_clipplane_values()
663 {
664 gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
665
666 for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
667 this->uniform_vector_size[this->uniforms] = 4;
668 this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
669 this->userplane[i].type = BRW_REGISTER_TYPE_F;
670 for (int j = 0; j < 4; ++j) {
671 prog_data->param[this->uniforms * 4 + j] = &clip_planes[i][j];
672 }
673 ++this->uniforms;
674 }
675 }
676
677 /* Our support for builtin uniforms is even scarier than non-builtin.
678 * It sits on top of the PROG_STATE_VAR parameters that are
679 * automatically updated from GL context state.
680 */
681 void
682 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
683 {
684 const ir_state_slot *const slots = ir->state_slots;
685 assert(ir->state_slots != NULL);
686
687 for (unsigned int i = 0; i < ir->num_state_slots; i++) {
688 /* This state reference has already been setup by ir_to_mesa,
689 * but we'll get the same index back here. We can reference
690 * ParameterValues directly, since unlike brw_fs.cpp, we never
691 * add new state references during compile.
692 */
693 int index = _mesa_add_state_reference(this->prog->Parameters,
694 (gl_state_index *)slots[i].tokens);
695 float *values = &this->prog->Parameters->ParameterValues[index][0].f;
696
697 this->uniform_vector_size[this->uniforms] = 0;
698 /* Add each of the unique swizzled channels of the element.
699 * This will end up matching the size of the glsl_type of this field.
700 */
701 int last_swiz = -1;
702 for (unsigned int j = 0; j < 4; j++) {
703 int swiz = GET_SWZ(slots[i].swizzle, j);
704 last_swiz = swiz;
705
706 prog_data->param[this->uniforms * 4 + j] = &values[swiz];
707 if (swiz <= last_swiz)
708 this->uniform_vector_size[this->uniforms]++;
709 }
710 this->uniforms++;
711 }
712 }
713
714 dst_reg *
715 vec4_visitor::variable_storage(ir_variable *var)
716 {
717 return (dst_reg *)hash_table_find(this->variable_ht, var);
718 }
719
720 void
721 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate)
722 {
723 ir_expression *expr = ir->as_expression();
724
725 *predicate = BRW_PREDICATE_NORMAL;
726
727 if (expr) {
728 src_reg op[2];
729 vec4_instruction *inst;
730
731 assert(expr->get_num_operands() <= 2);
732 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
733 expr->operands[i]->accept(this);
734 op[i] = this->result;
735
736 resolve_ud_negate(&op[i]);
737 }
738
739 switch (expr->operation) {
740 case ir_unop_logic_not:
741 inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
742 inst->conditional_mod = BRW_CONDITIONAL_Z;
743 break;
744
745 case ir_binop_logic_xor:
746 inst = emit(XOR(dst_null_d(), op[0], op[1]));
747 inst->conditional_mod = BRW_CONDITIONAL_NZ;
748 break;
749
750 case ir_binop_logic_or:
751 inst = emit(OR(dst_null_d(), op[0], op[1]));
752 inst->conditional_mod = BRW_CONDITIONAL_NZ;
753 break;
754
755 case ir_binop_logic_and:
756 inst = emit(AND(dst_null_d(), op[0], op[1]));
757 inst->conditional_mod = BRW_CONDITIONAL_NZ;
758 break;
759
760 case ir_unop_f2b:
761 if (brw->gen >= 6) {
762 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
763 } else {
764 inst = emit(MOV(dst_null_f(), op[0]));
765 inst->conditional_mod = BRW_CONDITIONAL_NZ;
766 }
767 break;
768
769 case ir_unop_i2b:
770 if (brw->gen >= 6) {
771 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
772 } else {
773 inst = emit(MOV(dst_null_d(), op[0]));
774 inst->conditional_mod = BRW_CONDITIONAL_NZ;
775 }
776 break;
777
778 case ir_binop_all_equal:
779 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
780 *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
781 break;
782
783 case ir_binop_any_nequal:
784 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
785 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
786 break;
787
788 case ir_unop_any:
789 inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
790 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
791 break;
792
793 case ir_binop_greater:
794 case ir_binop_gequal:
795 case ir_binop_less:
796 case ir_binop_lequal:
797 case ir_binop_equal:
798 case ir_binop_nequal:
799 emit(CMP(dst_null_d(), op[0], op[1],
800 brw_conditional_for_comparison(expr->operation)));
801 break;
802
803 default:
804 assert(!"not reached");
805 break;
806 }
807 return;
808 }
809
810 ir->accept(this);
811
812 resolve_ud_negate(&this->result);
813
814 if (brw->gen >= 6) {
815 vec4_instruction *inst = emit(AND(dst_null_d(),
816 this->result, src_reg(1)));
817 inst->conditional_mod = BRW_CONDITIONAL_NZ;
818 } else {
819 vec4_instruction *inst = emit(MOV(dst_null_d(), this->result));
820 inst->conditional_mod = BRW_CONDITIONAL_NZ;
821 }
822 }
823
824 /**
825 * Emit a gen6 IF statement with the comparison folded into the IF
826 * instruction.
827 */
828 void
829 vec4_visitor::emit_if_gen6(ir_if *ir)
830 {
831 ir_expression *expr = ir->condition->as_expression();
832
833 if (expr) {
834 src_reg op[2];
835 dst_reg temp;
836
837 assert(expr->get_num_operands() <= 2);
838 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
839 expr->operands[i]->accept(this);
840 op[i] = this->result;
841 }
842
843 switch (expr->operation) {
844 case ir_unop_logic_not:
845 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
846 return;
847
848 case ir_binop_logic_xor:
849 emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
850 return;
851
852 case ir_binop_logic_or:
853 temp = dst_reg(this, glsl_type::bool_type);
854 emit(OR(temp, op[0], op[1]));
855 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
856 return;
857
858 case ir_binop_logic_and:
859 temp = dst_reg(this, glsl_type::bool_type);
860 emit(AND(temp, op[0], op[1]));
861 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
862 return;
863
864 case ir_unop_f2b:
865 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
866 return;
867
868 case ir_unop_i2b:
869 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
870 return;
871
872 case ir_binop_greater:
873 case ir_binop_gequal:
874 case ir_binop_less:
875 case ir_binop_lequal:
876 case ir_binop_equal:
877 case ir_binop_nequal:
878 emit(IF(op[0], op[1],
879 brw_conditional_for_comparison(expr->operation)));
880 return;
881
882 case ir_binop_all_equal:
883 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
884 emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
885 return;
886
887 case ir_binop_any_nequal:
888 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
889 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
890 return;
891
892 case ir_unop_any:
893 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
894 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
895 return;
896
897 default:
898 assert(!"not reached");
899 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
900 return;
901 }
902 return;
903 }
904
905 ir->condition->accept(this);
906
907 emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
908 }
909
910 static dst_reg
911 with_writemask(dst_reg const & r, int mask)
912 {
913 dst_reg result = r;
914 result.writemask = mask;
915 return result;
916 }
917
918 void
919 vec4_vs_visitor::emit_prolog()
920 {
921 dst_reg sign_recovery_shift;
922 dst_reg normalize_factor;
923 dst_reg es3_normalize_factor;
924
925 for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
926 if (vs_prog_data->inputs_read & BITFIELD64_BIT(i)) {
927 uint8_t wa_flags = vs_compile->key.gl_attrib_wa_flags[i];
928 dst_reg reg(ATTR, i);
929 dst_reg reg_d = reg;
930 reg_d.type = BRW_REGISTER_TYPE_D;
931 dst_reg reg_ud = reg;
932 reg_ud.type = BRW_REGISTER_TYPE_UD;
933
934 /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED attributes
935 * come in as floating point conversions of the integer values.
936 */
937 if (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK) {
938 dst_reg dst = reg;
939 dst.type = brw_type_for_base_type(glsl_type::vec4_type);
940 dst.writemask = (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK)) - 1;
941 emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
942 }
943
944 /* Do sign recovery for 2101010 formats if required. */
945 if (wa_flags & BRW_ATTRIB_WA_SIGN) {
946 if (sign_recovery_shift.file == BAD_FILE) {
947 /* shift constant: <22,22,22,30> */
948 sign_recovery_shift = dst_reg(this, glsl_type::uvec4_type);
949 emit(MOV(with_writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u)));
950 emit(MOV(with_writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u)));
951 }
952
953 emit(SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift)));
954 emit(ASR(reg_d, src_reg(reg_d), src_reg(sign_recovery_shift)));
955 }
956
957 /* Apply BGRA swizzle if required. */
958 if (wa_flags & BRW_ATTRIB_WA_BGRA) {
959 src_reg temp = src_reg(reg);
960 temp.swizzle = BRW_SWIZZLE4(2,1,0,3);
961 emit(MOV(reg, temp));
962 }
963
964 if (wa_flags & BRW_ATTRIB_WA_NORMALIZE) {
965 /* ES 3.0 has different rules for converting signed normalized
966 * fixed-point numbers than desktop GL.
967 */
968 if (_mesa_is_gles3(ctx) && (wa_flags & BRW_ATTRIB_WA_SIGN)) {
969 /* According to equation 2.2 of the ES 3.0 specification,
970 * signed normalization conversion is done by:
971 *
972 * f = c / (2^(b-1)-1)
973 */
974 if (es3_normalize_factor.file == BAD_FILE) {
975 /* mul constant: 1 / (2^(b-1) - 1) */
976 es3_normalize_factor = dst_reg(this, glsl_type::vec4_type);
977 emit(MOV(with_writemask(es3_normalize_factor, WRITEMASK_XYZ),
978 src_reg(1.0f / ((1<<9) - 1))));
979 emit(MOV(with_writemask(es3_normalize_factor, WRITEMASK_W),
980 src_reg(1.0f / ((1<<1) - 1))));
981 }
982
983 dst_reg dst = reg;
984 dst.type = brw_type_for_base_type(glsl_type::vec4_type);
985 emit(MOV(dst, src_reg(reg_d)));
986 emit(MUL(dst, src_reg(dst), src_reg(es3_normalize_factor)));
987 emit_minmax(BRW_CONDITIONAL_G, dst, src_reg(dst), src_reg(-1.0f));
988 } else {
989 /* The following equations are from the OpenGL 3.2 specification:
990 *
991 * 2.1 unsigned normalization
992 * f = c/(2^n-1)
993 *
994 * 2.2 signed normalization
995 * f = (2c+1)/(2^n-1)
996 *
997 * Both of these share a common divisor, which is represented by
998 * "normalize_factor" in the code below.
999 */
1000 if (normalize_factor.file == BAD_FILE) {
1001 /* 1 / (2^b - 1) for b=<10,10,10,2> */
1002 normalize_factor = dst_reg(this, glsl_type::vec4_type);
1003 emit(MOV(with_writemask(normalize_factor, WRITEMASK_XYZ),
1004 src_reg(1.0f / ((1<<10) - 1))));
1005 emit(MOV(with_writemask(normalize_factor, WRITEMASK_W),
1006 src_reg(1.0f / ((1<<2) - 1))));
1007 }
1008
1009 dst_reg dst = reg;
1010 dst.type = brw_type_for_base_type(glsl_type::vec4_type);
1011 emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));
1012
1013 /* For signed normalization, we want the numerator to be 2c+1. */
1014 if (wa_flags & BRW_ATTRIB_WA_SIGN) {
1015 emit(MUL(dst, src_reg(dst), src_reg(2.0f)));
1016 emit(ADD(dst, src_reg(dst), src_reg(1.0f)));
1017 }
1018
1019 emit(MUL(dst, src_reg(dst), src_reg(normalize_factor)));
1020 }
1021 }
1022
1023 if (wa_flags & BRW_ATTRIB_WA_SCALE) {
1024 dst_reg dst = reg;
1025 dst.type = brw_type_for_base_type(glsl_type::vec4_type);
1026 emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));
1027 }
1028 }
1029 }
1030 }
1031
1032
1033 dst_reg *
1034 vec4_vs_visitor::make_reg_for_system_value(ir_variable *ir)
1035 {
1036 /* VertexID is stored by the VF as the last vertex element, but
1037 * we don't represent it with a flag in inputs_read, so we call
1038 * it VERT_ATTRIB_MAX, which setup_attributes() picks up on.
1039 */
1040 dst_reg *reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX);
1041 vs_prog_data->uses_vertexid = true;
1042
1043 switch (ir->location) {
1044 case SYSTEM_VALUE_VERTEX_ID:
1045 reg->writemask = WRITEMASK_X;
1046 break;
1047 case SYSTEM_VALUE_INSTANCE_ID:
1048 reg->writemask = WRITEMASK_Y;
1049 break;
1050 default:
1051 assert(!"not reached");
1052 break;
1053 }
1054
1055 return reg;
1056 }
1057
1058
1059 void
1060 vec4_visitor::visit(ir_variable *ir)
1061 {
1062 dst_reg *reg = NULL;
1063
1064 if (variable_storage(ir))
1065 return;
1066
1067 switch (ir->mode) {
1068 case ir_var_shader_in:
1069 reg = new(mem_ctx) dst_reg(ATTR, ir->location);
1070 break;
1071
1072 case ir_var_shader_out:
1073 reg = new(mem_ctx) dst_reg(this, ir->type);
1074
1075 for (int i = 0; i < type_size(ir->type); i++) {
1076 output_reg[ir->location + i] = *reg;
1077 output_reg[ir->location + i].reg_offset = i;
1078 output_reg[ir->location + i].type =
1079 brw_type_for_base_type(ir->type->get_scalar_type());
1080 output_reg_annotation[ir->location + i] = ir->name;
1081 }
1082 break;
1083
1084 case ir_var_auto:
1085 case ir_var_temporary:
1086 reg = new(mem_ctx) dst_reg(this, ir->type);
1087 break;
1088
1089 case ir_var_uniform:
1090 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
1091
1092 /* Thanks to the lower_ubo_reference pass, we will see only
1093 * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
1094 * variables, so no need for them to be in variable_ht.
1095 */
1096 if (ir->is_in_uniform_block())
1097 return;
1098
1099 /* Track how big the whole uniform variable is, in case we need to put a
1100 * copy of its data into pull constants for array access.
1101 */
1102 this->uniform_size[this->uniforms] = type_size(ir->type);
1103
1104 if (!strncmp(ir->name, "gl_", 3)) {
1105 setup_builtin_uniform_values(ir);
1106 } else {
1107 setup_uniform_values(ir);
1108 }
1109 break;
1110
1111 case ir_var_system_value:
1112 reg = make_reg_for_system_value(ir);
1113 break;
1114
1115 default:
1116 assert(!"not reached");
1117 }
1118
1119 reg->type = brw_type_for_base_type(ir->type);
1120 hash_table_insert(this->variable_ht, reg, ir);
1121 }
1122
1123 void
1124 vec4_visitor::visit(ir_loop *ir)
1125 {
1126 dst_reg counter;
1127
1128 /* We don't want debugging output to print the whole body of the
1129 * loop as the annotation.
1130 */
1131 this->base_ir = NULL;
1132
1133 if (ir->counter != NULL) {
1134 this->base_ir = ir->counter;
1135 ir->counter->accept(this);
1136 counter = *(variable_storage(ir->counter));
1137
1138 if (ir->from != NULL) {
1139 this->base_ir = ir->from;
1140 ir->from->accept(this);
1141
1142 emit(MOV(counter, this->result));
1143 }
1144 }
1145
1146 emit(BRW_OPCODE_DO);
1147
1148 if (ir->to) {
1149 this->base_ir = ir->to;
1150 ir->to->accept(this);
1151
1152 emit(CMP(dst_null_d(), src_reg(counter), this->result,
1153 brw_conditional_for_comparison(ir->cmp)));
1154
1155 vec4_instruction *inst = emit(BRW_OPCODE_BREAK);
1156 inst->predicate = BRW_PREDICATE_NORMAL;
1157 }
1158
1159 visit_instructions(&ir->body_instructions);
1160
1161
1162 if (ir->increment) {
1163 this->base_ir = ir->increment;
1164 ir->increment->accept(this);
1165 emit(ADD(counter, src_reg(counter), this->result));
1166 }
1167
1168 emit(BRW_OPCODE_WHILE);
1169 }
1170
1171 void
1172 vec4_visitor::visit(ir_loop_jump *ir)
1173 {
1174 switch (ir->mode) {
1175 case ir_loop_jump::jump_break:
1176 emit(BRW_OPCODE_BREAK);
1177 break;
1178 case ir_loop_jump::jump_continue:
1179 emit(BRW_OPCODE_CONTINUE);
1180 break;
1181 }
1182 }
1183
1184
1185 void
1186 vec4_visitor::visit(ir_function_signature *ir)
1187 {
1188 assert(0);
1189 (void)ir;
1190 }
1191
1192 void
1193 vec4_visitor::visit(ir_function *ir)
1194 {
1195 /* Ignore function bodies other than main() -- we shouldn't see calls to
1196 * them since they should all be inlined.
1197 */
1198 if (strcmp(ir->name, "main") == 0) {
1199 const ir_function_signature *sig;
1200 exec_list empty;
1201
1202 sig = ir->matching_signature(&empty);
1203
1204 assert(sig);
1205
1206 visit_instructions(&sig->body);
1207 }
1208 }
1209
1210 bool
1211 vec4_visitor::try_emit_sat(ir_expression *ir)
1212 {
1213 ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
1214 if (!sat_src)
1215 return false;
1216
1217 sat_src->accept(this);
1218 src_reg src = this->result;
1219
1220 this->result = src_reg(this, ir->type);
1221 vec4_instruction *inst;
1222 inst = emit(MOV(dst_reg(this->result), src));
1223 inst->saturate = true;
1224
1225 return true;
1226 }
1227
1228 bool
1229 vec4_visitor::try_emit_mad(ir_expression *ir, int mul_arg)
1230 {
1231 /* 3-src instructions were introduced in gen6. */
1232 if (brw->gen < 6)
1233 return false;
1234
1235 /* MAD can only handle floating-point data. */
1236 if (ir->type->base_type != GLSL_TYPE_FLOAT)
1237 return false;
1238
1239 ir_rvalue *nonmul = ir->operands[1 - mul_arg];
1240 ir_expression *mul = ir->operands[mul_arg]->as_expression();
1241
1242 if (!mul || mul->operation != ir_binop_mul)
1243 return false;
1244
1245 nonmul->accept(this);
1246 src_reg src0 = fix_3src_operand(this->result);
1247
1248 mul->operands[0]->accept(this);
1249 src_reg src1 = fix_3src_operand(this->result);
1250
1251 mul->operands[1]->accept(this);
1252 src_reg src2 = fix_3src_operand(this->result);
1253
1254 this->result = src_reg(this, ir->type);
1255 emit(BRW_OPCODE_MAD, dst_reg(this->result), src0, src1, src2);
1256
1257 return true;
1258 }
1259
1260 void
1261 vec4_visitor::emit_bool_comparison(unsigned int op,
1262 dst_reg dst, src_reg src0, src_reg src1)
1263 {
1264 /* original gen4 does destination conversion before comparison. */
1265 if (brw->gen < 5)
1266 dst.type = src0.type;
1267
1268 emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op)));
1269
1270 dst.type = BRW_REGISTER_TYPE_D;
1271 emit(AND(dst, src_reg(dst), src_reg(0x1)));
1272 }
1273
1274 void
1275 vec4_visitor::emit_minmax(uint32_t conditionalmod, dst_reg dst,
1276 src_reg src0, src_reg src1)
1277 {
1278 vec4_instruction *inst;
1279
1280 if (brw->gen >= 6) {
1281 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
1282 inst->conditional_mod = conditionalmod;
1283 } else {
1284 emit(CMP(dst, src0, src1, conditionalmod));
1285
1286 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
1287 inst->predicate = BRW_PREDICATE_NORMAL;
1288 }
1289 }
1290
1291 static bool
1292 is_16bit_constant(ir_rvalue *rvalue)
1293 {
1294 ir_constant *constant = rvalue->as_constant();
1295 if (!constant)
1296 return false;
1297
1298 if (constant->type != glsl_type::int_type &&
1299 constant->type != glsl_type::uint_type)
1300 return false;
1301
1302 return constant->value.u[0] < (1 << 16);
1303 }
1304
1305 void
1306 vec4_visitor::visit(ir_expression *ir)
1307 {
1308 unsigned int operand;
1309 src_reg op[Elements(ir->operands)];
1310 src_reg result_src;
1311 dst_reg result_dst;
1312 vec4_instruction *inst;
1313
1314 if (try_emit_sat(ir))
1315 return;
1316
1317 if (ir->operation == ir_binop_add) {
1318 if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1))
1319 return;
1320 }
1321
1322 for (operand = 0; operand < ir->get_num_operands(); operand++) {
1323 this->result.file = BAD_FILE;
1324 ir->operands[operand]->accept(this);
1325 if (this->result.file == BAD_FILE) {
1326 printf("Failed to get tree for expression operand:\n");
1327 ir->operands[operand]->print();
1328 exit(1);
1329 }
1330 op[operand] = this->result;
1331
1332 /* Matrix expression operands should have been broken down to vector
1333 * operations already.
1334 */
1335 assert(!ir->operands[operand]->type->is_matrix());
1336 }
1337
1338 int vector_elements = ir->operands[0]->type->vector_elements;
1339 if (ir->operands[1]) {
1340 vector_elements = MAX2(vector_elements,
1341 ir->operands[1]->type->vector_elements);
1342 }
1343
1344 this->result.file = BAD_FILE;
1345
1346 /* Storage for our result. Ideally for an assignment we'd be using
1347 * the actual storage for the result here, instead.
1348 */
1349 result_src = src_reg(this, ir->type);
1350 /* convenience for the emit functions below. */
1351 result_dst = dst_reg(result_src);
1352 /* If nothing special happens, this is the result. */
1353 this->result = result_src;
1354 /* Limit writes to the channels that will be used by result_src later.
1355 * This does limit this temp's use as a temporary for multi-instruction
1356 * sequences.
1357 */
1358 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1359
1360 switch (ir->operation) {
1361 case ir_unop_logic_not:
1362 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
1363 * ones complement of the whole register, not just bit 0.
1364 */
1365 emit(XOR(result_dst, op[0], src_reg(1)));
1366 break;
1367 case ir_unop_neg:
1368 op[0].negate = !op[0].negate;
1369 emit(MOV(result_dst, op[0]));
1370 break;
1371 case ir_unop_abs:
1372 op[0].abs = true;
1373 op[0].negate = false;
1374 emit(MOV(result_dst, op[0]));
1375 break;
1376
1377 case ir_unop_sign:
1378 emit(MOV(result_dst, src_reg(0.0f)));
1379
1380 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G));
1381 inst = emit(MOV(result_dst, src_reg(1.0f)));
1382 inst->predicate = BRW_PREDICATE_NORMAL;
1383
1384 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L));
1385 inst = emit(MOV(result_dst, src_reg(-1.0f)));
1386 inst->predicate = BRW_PREDICATE_NORMAL;
1387
1388 break;
1389
1390 case ir_unop_rcp:
1391 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
1392 break;
1393
1394 case ir_unop_exp2:
1395 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
1396 break;
1397 case ir_unop_log2:
1398 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
1399 break;
1400 case ir_unop_exp:
1401 case ir_unop_log:
1402 assert(!"not reached: should be handled by ir_explog_to_explog2");
1403 break;
1404 case ir_unop_sin:
1405 case ir_unop_sin_reduced:
1406 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
1407 break;
1408 case ir_unop_cos:
1409 case ir_unop_cos_reduced:
1410 emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
1411 break;
1412
1413 case ir_unop_dFdx:
1414 case ir_unop_dFdy:
1415 assert(!"derivatives not valid in vertex shader");
1416 break;
1417
1418 case ir_unop_bitfield_reverse:
1419 emit(BFREV(result_dst, op[0]));
1420 break;
1421 case ir_unop_bit_count:
1422 emit(CBIT(result_dst, op[0]));
1423 break;
1424 case ir_unop_find_msb: {
1425 src_reg temp = src_reg(this, glsl_type::uint_type);
1426
1427 inst = emit(FBH(dst_reg(temp), op[0]));
1428 inst->dst.writemask = WRITEMASK_XYZW;
1429
1430 /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
1431 * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
1432 * subtract the result from 31 to convert the MSB count into an LSB count.
1433 */
1434
1435 /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
1436 temp.swizzle = BRW_SWIZZLE_NOOP;
1437 emit(MOV(result_dst, temp));
1438
1439 src_reg src_tmp = src_reg(result_dst);
1440 emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ));
1441
1442 src_tmp.negate = true;
1443 inst = emit(ADD(result_dst, src_tmp, src_reg(31)));
1444 inst->predicate = BRW_PREDICATE_NORMAL;
1445 break;
1446 }
1447 case ir_unop_find_lsb:
1448 emit(FBL(result_dst, op[0]));
1449 break;
1450
1451 case ir_unop_noise:
1452 assert(!"not reached: should be handled by lower_noise");
1453 break;
1454
1455 case ir_binop_add:
1456 emit(ADD(result_dst, op[0], op[1]));
1457 break;
1458 case ir_binop_sub:
1459 assert(!"not reached: should be handled by ir_sub_to_add_neg");
1460 break;
1461
1462 case ir_binop_mul:
1463 if (ir->type->is_integer()) {
1464 /* For integer multiplication, the MUL uses the low 16 bits of one of
1465 * the operands (src0 through SNB, src1 on IVB and later). The MACH
1466 * accumulates in the contribution of the upper 16 bits of that
1467 * operand. If we can determine that one of the args is in the low
1468 * 16 bits, though, we can just emit a single MUL.
1469 */
1470 if (is_16bit_constant(ir->operands[0])) {
1471 if (brw->gen < 7)
1472 emit(MUL(result_dst, op[0], op[1]));
1473 else
1474 emit(MUL(result_dst, op[1], op[0]));
1475 } else if (is_16bit_constant(ir->operands[1])) {
1476 if (brw->gen < 7)
1477 emit(MUL(result_dst, op[1], op[0]));
1478 else
1479 emit(MUL(result_dst, op[0], op[1]));
1480 } else {
1481 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
1482
1483 emit(MUL(acc, op[0], op[1]));
1484 emit(MACH(dst_null_d(), op[0], op[1]));
1485 emit(MOV(result_dst, src_reg(acc)));
1486 }
1487 } else {
1488 emit(MUL(result_dst, op[0], op[1]));
1489 }
1490 break;
1491 case ir_binop_div:
1492 /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
1493 assert(ir->type->is_integer());
1494 emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
1495 break;
1496 case ir_binop_mod:
1497 /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
1498 assert(ir->type->is_integer());
1499 emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
1500 break;
1501
1502 case ir_binop_less:
1503 case ir_binop_greater:
1504 case ir_binop_lequal:
1505 case ir_binop_gequal:
1506 case ir_binop_equal:
1507 case ir_binop_nequal: {
1508 emit(CMP(result_dst, op[0], op[1],
1509 brw_conditional_for_comparison(ir->operation)));
1510 emit(AND(result_dst, result_src, src_reg(0x1)));
1511 break;
1512 }
1513
1514 case ir_binop_all_equal:
1515 /* "==" operator producing a scalar boolean. */
1516 if (ir->operands[0]->type->is_vector() ||
1517 ir->operands[1]->type->is_vector()) {
1518 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
1519 emit(MOV(result_dst, src_reg(0)));
1520 inst = emit(MOV(result_dst, src_reg(1)));
1521 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
1522 } else {
1523 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
1524 emit(AND(result_dst, result_src, src_reg(0x1)));
1525 }
1526 break;
1527 case ir_binop_any_nequal:
1528 /* "!=" operator producing a scalar boolean. */
1529 if (ir->operands[0]->type->is_vector() ||
1530 ir->operands[1]->type->is_vector()) {
1531 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
1532
1533 emit(MOV(result_dst, src_reg(0)));
1534 inst = emit(MOV(result_dst, src_reg(1)));
1535 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1536 } else {
1537 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
1538 emit(AND(result_dst, result_src, src_reg(0x1)));
1539 }
1540 break;
1541
1542 case ir_unop_any:
1543 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1544 emit(MOV(result_dst, src_reg(0)));
1545
1546 inst = emit(MOV(result_dst, src_reg(1)));
1547 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1548 break;
1549
1550 case ir_binop_logic_xor:
1551 emit(XOR(result_dst, op[0], op[1]));
1552 break;
1553
1554 case ir_binop_logic_or:
1555 emit(OR(result_dst, op[0], op[1]));
1556 break;
1557
1558 case ir_binop_logic_and:
1559 emit(AND(result_dst, op[0], op[1]));
1560 break;
1561
1562 case ir_binop_dot:
1563 assert(ir->operands[0]->type->is_vector());
1564 assert(ir->operands[0]->type == ir->operands[1]->type);
1565 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1566 break;
1567
1568 case ir_unop_sqrt:
1569 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1570 break;
1571 case ir_unop_rsq:
1572 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1573 break;
1574
1575 case ir_unop_bitcast_i2f:
1576 case ir_unop_bitcast_u2f:
1577 this->result = op[0];
1578 this->result.type = BRW_REGISTER_TYPE_F;
1579 break;
1580
1581 case ir_unop_bitcast_f2i:
1582 this->result = op[0];
1583 this->result.type = BRW_REGISTER_TYPE_D;
1584 break;
1585
1586 case ir_unop_bitcast_f2u:
1587 this->result = op[0];
1588 this->result.type = BRW_REGISTER_TYPE_UD;
1589 break;
1590
1591 case ir_unop_i2f:
1592 case ir_unop_i2u:
1593 case ir_unop_u2i:
1594 case ir_unop_u2f:
1595 case ir_unop_b2f:
1596 case ir_unop_b2i:
1597 case ir_unop_f2i:
1598 case ir_unop_f2u:
1599 emit(MOV(result_dst, op[0]));
1600 break;
1601 case ir_unop_f2b:
1602 case ir_unop_i2b: {
1603 emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1604 emit(AND(result_dst, result_src, src_reg(1)));
1605 break;
1606 }
1607
1608 case ir_unop_trunc:
1609 emit(RNDZ(result_dst, op[0]));
1610 break;
1611 case ir_unop_ceil:
1612 op[0].negate = !op[0].negate;
1613 inst = emit(RNDD(result_dst, op[0]));
1614 this->result.negate = true;
1615 break;
1616 case ir_unop_floor:
1617 inst = emit(RNDD(result_dst, op[0]));
1618 break;
1619 case ir_unop_fract:
1620 inst = emit(FRC(result_dst, op[0]));
1621 break;
1622 case ir_unop_round_even:
1623 emit(RNDE(result_dst, op[0]));
1624 break;
1625
1626 case ir_binop_min:
1627 emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]);
1628 break;
1629 case ir_binop_max:
1630 emit_minmax(BRW_CONDITIONAL_G, result_dst, op[0], op[1]);
1631 break;
1632
1633 case ir_binop_pow:
1634 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1635 break;
1636
1637 case ir_unop_bit_not:
1638 inst = emit(NOT(result_dst, op[0]));
1639 break;
1640 case ir_binop_bit_and:
1641 inst = emit(AND(result_dst, op[0], op[1]));
1642 break;
1643 case ir_binop_bit_xor:
1644 inst = emit(XOR(result_dst, op[0], op[1]));
1645 break;
1646 case ir_binop_bit_or:
1647 inst = emit(OR(result_dst, op[0], op[1]));
1648 break;
1649
1650 case ir_binop_lshift:
1651 inst = emit(SHL(result_dst, op[0], op[1]));
1652 break;
1653
1654 case ir_binop_rshift:
1655 if (ir->type->base_type == GLSL_TYPE_INT)
1656 inst = emit(ASR(result_dst, op[0], op[1]));
1657 else
1658 inst = emit(SHR(result_dst, op[0], op[1]));
1659 break;
1660
1661 case ir_binop_bfm:
1662 emit(BFI1(result_dst, op[0], op[1]));
1663 break;
1664
1665 case ir_binop_ubo_load: {
1666 ir_constant *uniform_block = ir->operands[0]->as_constant();
1667 ir_constant *const_offset_ir = ir->operands[1]->as_constant();
1668 unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
1669 src_reg offset = op[1];
1670
1671 /* Now, load the vector from that offset. */
1672 assert(ir->type->is_vector() || ir->type->is_scalar());
1673
1674 src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
1675 packed_consts.type = result.type;
1676 src_reg surf_index =
1677 src_reg(SURF_INDEX_VS_UBO(uniform_block->value.u[0]));
1678 if (const_offset_ir) {
1679 offset = src_reg(const_offset / 16);
1680 } else {
1681 emit(SHR(dst_reg(offset), offset, src_reg(4)));
1682 }
1683
1684 vec4_instruction *pull =
1685 emit(new(mem_ctx) vec4_instruction(this,
1686 VS_OPCODE_PULL_CONSTANT_LOAD,
1687 dst_reg(packed_consts),
1688 surf_index,
1689 offset));
1690 pull->base_mrf = 14;
1691 pull->mlen = 1;
1692
1693 packed_consts.swizzle = swizzle_for_size(ir->type->vector_elements);
1694 packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4,
1695 const_offset % 16 / 4,
1696 const_offset % 16 / 4,
1697 const_offset % 16 / 4);
1698
1699 /* UBO bools are any nonzero int. We store bools as either 0 or 1. */
1700 if (ir->type->base_type == GLSL_TYPE_BOOL) {
1701 emit(CMP(result_dst, packed_consts, src_reg(0u),
1702 BRW_CONDITIONAL_NZ));
1703 emit(AND(result_dst, result, src_reg(0x1)));
1704 } else {
1705 emit(MOV(result_dst, packed_consts));
1706 }
1707 break;
1708 }
1709
1710 case ir_binop_vector_extract:
1711 assert(!"should have been lowered by vec_index_to_cond_assign");
1712 break;
1713
1714 case ir_triop_lrp:
1715 op[0] = fix_3src_operand(op[0]);
1716 op[1] = fix_3src_operand(op[1]);
1717 op[2] = fix_3src_operand(op[2]);
1718 /* Note that the instruction's argument order is reversed from GLSL
1719 * and the IR.
1720 */
1721 emit(LRP(result_dst, op[2], op[1], op[0]));
1722 break;
1723
1724 case ir_triop_bfi:
1725 op[0] = fix_3src_operand(op[0]);
1726 op[1] = fix_3src_operand(op[1]);
1727 op[2] = fix_3src_operand(op[2]);
1728 emit(BFI2(result_dst, op[0], op[1], op[2]));
1729 break;
1730
1731 case ir_triop_bitfield_extract:
1732 op[0] = fix_3src_operand(op[0]);
1733 op[1] = fix_3src_operand(op[1]);
1734 op[2] = fix_3src_operand(op[2]);
1735 /* Note that the instruction's argument order is reversed from GLSL
1736 * and the IR.
1737 */
1738 emit(BFE(result_dst, op[2], op[1], op[0]));
1739 break;
1740
1741 case ir_triop_vector_insert:
1742 assert(!"should have been lowered by lower_vector_insert");
1743 break;
1744
1745 case ir_quadop_bitfield_insert:
1746 assert(!"not reached: should be handled by "
1747 "bitfield_insert_to_bfm_bfi\n");
1748 break;
1749
1750 case ir_quadop_vector:
1751 assert(!"not reached: should be handled by lower_quadop_vector");
1752 break;
1753
1754 case ir_unop_pack_half_2x16:
1755 emit_pack_half_2x16(result_dst, op[0]);
1756 break;
1757 case ir_unop_unpack_half_2x16:
1758 emit_unpack_half_2x16(result_dst, op[0]);
1759 break;
1760 case ir_unop_pack_snorm_2x16:
1761 case ir_unop_pack_snorm_4x8:
1762 case ir_unop_pack_unorm_2x16:
1763 case ir_unop_pack_unorm_4x8:
1764 case ir_unop_unpack_snorm_2x16:
1765 case ir_unop_unpack_snorm_4x8:
1766 case ir_unop_unpack_unorm_2x16:
1767 case ir_unop_unpack_unorm_4x8:
1768 assert(!"not reached: should be handled by lower_packing_builtins");
1769 break;
1770 case ir_unop_unpack_half_2x16_split_x:
1771 case ir_unop_unpack_half_2x16_split_y:
1772 case ir_binop_pack_half_2x16_split:
1773 assert(!"not reached: should not occur in vertex shader");
1774 break;
1775 }
1776 }
1777
1778
1779 void
1780 vec4_visitor::visit(ir_swizzle *ir)
1781 {
1782 src_reg src;
1783 int i = 0;
1784 int swizzle[4];
1785
1786 /* Note that this is only swizzles in expressions, not those on the left
1787 * hand side of an assignment, which do write masking. See ir_assignment
1788 * for that.
1789 */
1790
1791 ir->val->accept(this);
1792 src = this->result;
1793 assert(src.file != BAD_FILE);
1794
1795 for (i = 0; i < ir->type->vector_elements; i++) {
1796 switch (i) {
1797 case 0:
1798 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1799 break;
1800 case 1:
1801 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1802 break;
1803 case 2:
1804 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1805 break;
1806 case 3:
1807 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1808 break;
1809 }
1810 }
1811 for (; i < 4; i++) {
1812 /* Replicate the last channel out. */
1813 swizzle[i] = swizzle[ir->type->vector_elements - 1];
1814 }
1815
1816 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1817
1818 this->result = src;
1819 }
1820
1821 void
1822 vec4_visitor::visit(ir_dereference_variable *ir)
1823 {
1824 const struct glsl_type *type = ir->type;
1825 dst_reg *reg = variable_storage(ir->var);
1826
1827 if (!reg) {
1828 fail("Failed to find variable storage for %s\n", ir->var->name);
1829 this->result = src_reg(brw_null_reg());
1830 return;
1831 }
1832
1833 this->result = src_reg(*reg);
1834
1835 /* System values get their swizzle from the dst_reg writemask */
1836 if (ir->var->mode == ir_var_system_value)
1837 return;
1838
1839 if (type->is_scalar() || type->is_vector() || type->is_matrix())
1840 this->result.swizzle = swizzle_for_size(type->vector_elements);
1841 }
1842
1843
1844 int
1845 vec4_visitor::compute_array_stride(ir_dereference_array *ir)
1846 {
1847 /* Under normal circumstances array elements are stored consecutively, so
1848 * the stride is equal to the size of the array element.
1849 */
1850 return type_size(ir->type);
1851 }
1852
1853
1854 void
1855 vec4_visitor::visit(ir_dereference_array *ir)
1856 {
1857 ir_constant *constant_index;
1858 src_reg src;
1859 int array_stride = compute_array_stride(ir);
1860
1861 constant_index = ir->array_index->constant_expression_value();
1862
1863 ir->array->accept(this);
1864 src = this->result;
1865
1866 if (constant_index) {
1867 src.reg_offset += constant_index->value.i[0] * array_stride;
1868 } else {
1869 /* Variable index array dereference. It eats the "vec4" of the
1870 * base of the array and an index that offsets the Mesa register
1871 * index.
1872 */
1873 ir->array_index->accept(this);
1874
1875 src_reg index_reg;
1876
1877 if (array_stride == 1) {
1878 index_reg = this->result;
1879 } else {
1880 index_reg = src_reg(this, glsl_type::int_type);
1881
1882 emit(MUL(dst_reg(index_reg), this->result, src_reg(array_stride)));
1883 }
1884
1885 if (src.reladdr) {
1886 src_reg temp = src_reg(this, glsl_type::int_type);
1887
1888 emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
1889
1890 index_reg = temp;
1891 }
1892
1893 src.reladdr = ralloc(mem_ctx, src_reg);
1894 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1895 }
1896
1897 /* If the type is smaller than a vec4, replicate the last channel out. */
1898 if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1899 src.swizzle = swizzle_for_size(ir->type->vector_elements);
1900 else
1901 src.swizzle = BRW_SWIZZLE_NOOP;
1902 src.type = brw_type_for_base_type(ir->type);
1903
1904 this->result = src;
1905 }
1906
1907 void
1908 vec4_visitor::visit(ir_dereference_record *ir)
1909 {
1910 unsigned int i;
1911 const glsl_type *struct_type = ir->record->type;
1912 int offset = 0;
1913
1914 ir->record->accept(this);
1915
1916 for (i = 0; i < struct_type->length; i++) {
1917 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1918 break;
1919 offset += type_size(struct_type->fields.structure[i].type);
1920 }
1921
1922 /* If the type is smaller than a vec4, replicate the last channel out. */
1923 if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1924 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1925 else
1926 this->result.swizzle = BRW_SWIZZLE_NOOP;
1927 this->result.type = brw_type_for_base_type(ir->type);
1928
1929 this->result.reg_offset += offset;
1930 }
1931
1932 /**
1933 * We want to be careful in assignment setup to hit the actual storage
1934 * instead of potentially using a temporary like we might with the
1935 * ir_dereference handler.
1936 */
1937 static dst_reg
1938 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1939 {
1940 /* The LHS must be a dereference. If the LHS is a variable indexed array
1941 * access of a vector, it must be separated into a series conditional moves
1942 * before reaching this point (see ir_vec_index_to_cond_assign).
1943 */
1944 assert(ir->as_dereference());
1945 ir_dereference_array *deref_array = ir->as_dereference_array();
1946 if (deref_array) {
1947 assert(!deref_array->array->type->is_vector());
1948 }
1949
1950 /* Use the rvalue deref handler for the most part. We'll ignore
1951 * swizzles in it and write swizzles using writemask, though.
1952 */
1953 ir->accept(v);
1954 return dst_reg(v->result);
1955 }
1956
1957 void
1958 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1959 const struct glsl_type *type, uint32_t predicate)
1960 {
1961 if (type->base_type == GLSL_TYPE_STRUCT) {
1962 for (unsigned int i = 0; i < type->length; i++) {
1963 emit_block_move(dst, src, type->fields.structure[i].type, predicate);
1964 }
1965 return;
1966 }
1967
1968 if (type->is_array()) {
1969 for (unsigned int i = 0; i < type->length; i++) {
1970 emit_block_move(dst, src, type->fields.array, predicate);
1971 }
1972 return;
1973 }
1974
1975 if (type->is_matrix()) {
1976 const struct glsl_type *vec_type;
1977
1978 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1979 type->vector_elements, 1);
1980
1981 for (int i = 0; i < type->matrix_columns; i++) {
1982 emit_block_move(dst, src, vec_type, predicate);
1983 }
1984 return;
1985 }
1986
1987 assert(type->is_scalar() || type->is_vector());
1988
1989 dst->type = brw_type_for_base_type(type);
1990 src->type = dst->type;
1991
1992 dst->writemask = (1 << type->vector_elements) - 1;
1993
1994 src->swizzle = swizzle_for_size(type->vector_elements);
1995
1996 vec4_instruction *inst = emit(MOV(*dst, *src));
1997 inst->predicate = predicate;
1998
1999 dst->reg_offset++;
2000 src->reg_offset++;
2001 }
2002
2003
2004 /* If the RHS processing resulted in an instruction generating a
2005 * temporary value, and it would be easy to rewrite the instruction to
2006 * generate its result right into the LHS instead, do so. This ends
2007 * up reliably removing instructions where it can be tricky to do so
2008 * later without real UD chain information.
2009 */
2010 bool
2011 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
2012 dst_reg dst,
2013 src_reg src,
2014 vec4_instruction *pre_rhs_inst,
2015 vec4_instruction *last_rhs_inst)
2016 {
2017 /* This could be supported, but it would take more smarts. */
2018 if (ir->condition)
2019 return false;
2020
2021 if (pre_rhs_inst == last_rhs_inst)
2022 return false; /* No instructions generated to work with. */
2023
2024 /* Make sure the last instruction generated our source reg. */
2025 if (src.file != GRF ||
2026 src.file != last_rhs_inst->dst.file ||
2027 src.reg != last_rhs_inst->dst.reg ||
2028 src.reg_offset != last_rhs_inst->dst.reg_offset ||
2029 src.reladdr ||
2030 src.abs ||
2031 src.negate ||
2032 last_rhs_inst->predicate != BRW_PREDICATE_NONE)
2033 return false;
2034
2035 /* Check that that last instruction fully initialized the channels
2036 * we want to use, in the order we want to use them. We could
2037 * potentially reswizzle the operands of many instructions so that
2038 * we could handle out of order channels, but don't yet.
2039 */
2040
2041 for (unsigned i = 0; i < 4; i++) {
2042 if (dst.writemask & (1 << i)) {
2043 if (!(last_rhs_inst->dst.writemask & (1 << i)))
2044 return false;
2045
2046 if (BRW_GET_SWZ(src.swizzle, i) != i)
2047 return false;
2048 }
2049 }
2050
2051 /* Success! Rewrite the instruction. */
2052 last_rhs_inst->dst.file = dst.file;
2053 last_rhs_inst->dst.reg = dst.reg;
2054 last_rhs_inst->dst.reg_offset = dst.reg_offset;
2055 last_rhs_inst->dst.reladdr = dst.reladdr;
2056 last_rhs_inst->dst.writemask &= dst.writemask;
2057
2058 return true;
2059 }
2060
2061 void
2062 vec4_visitor::visit(ir_assignment *ir)
2063 {
2064 dst_reg dst = get_assignment_lhs(ir->lhs, this);
2065 uint32_t predicate = BRW_PREDICATE_NONE;
2066
2067 if (!ir->lhs->type->is_scalar() &&
2068 !ir->lhs->type->is_vector()) {
2069 ir->rhs->accept(this);
2070 src_reg src = this->result;
2071
2072 if (ir->condition) {
2073 emit_bool_to_cond_code(ir->condition, &predicate);
2074 }
2075
2076 /* emit_block_move doesn't account for swizzles in the source register.
2077 * This should be ok, since the source register is a structure or an
2078 * array, and those can't be swizzled. But double-check to be sure.
2079 */
2080 assert(src.swizzle ==
2081 (ir->rhs->type->is_matrix()
2082 ? swizzle_for_size(ir->rhs->type->vector_elements)
2083 : BRW_SWIZZLE_NOOP));
2084
2085 emit_block_move(&dst, &src, ir->rhs->type, predicate);
2086 return;
2087 }
2088
2089 /* Now we're down to just a scalar/vector with writemasks. */
2090 int i;
2091
2092 vec4_instruction *pre_rhs_inst, *last_rhs_inst;
2093 pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
2094
2095 ir->rhs->accept(this);
2096
2097 last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
2098
2099 src_reg src = this->result;
2100
2101 int swizzles[4];
2102 int first_enabled_chan = 0;
2103 int src_chan = 0;
2104
2105 assert(ir->lhs->type->is_vector() ||
2106 ir->lhs->type->is_scalar());
2107 dst.writemask = ir->write_mask;
2108
2109 for (int i = 0; i < 4; i++) {
2110 if (dst.writemask & (1 << i)) {
2111 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
2112 break;
2113 }
2114 }
2115
2116 /* Swizzle a small RHS vector into the channels being written.
2117 *
2118 * glsl ir treats write_mask as dictating how many channels are
2119 * present on the RHS while in our instructions we need to make
2120 * those channels appear in the slots of the vec4 they're written to.
2121 */
2122 for (int i = 0; i < 4; i++) {
2123 if (dst.writemask & (1 << i))
2124 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
2125 else
2126 swizzles[i] = first_enabled_chan;
2127 }
2128 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
2129 swizzles[2], swizzles[3]);
2130
2131 if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
2132 return;
2133 }
2134
2135 if (ir->condition) {
2136 emit_bool_to_cond_code(ir->condition, &predicate);
2137 }
2138
2139 for (i = 0; i < type_size(ir->lhs->type); i++) {
2140 vec4_instruction *inst = emit(MOV(dst, src));
2141 inst->predicate = predicate;
2142
2143 dst.reg_offset++;
2144 src.reg_offset++;
2145 }
2146 }
2147
2148 void
2149 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
2150 {
2151 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
2152 foreach_list(node, &ir->components) {
2153 ir_constant *field_value = (ir_constant *)node;
2154
2155 emit_constant_values(dst, field_value);
2156 }
2157 return;
2158 }
2159
2160 if (ir->type->is_array()) {
2161 for (unsigned int i = 0; i < ir->type->length; i++) {
2162 emit_constant_values(dst, ir->array_elements[i]);
2163 }
2164 return;
2165 }
2166
2167 if (ir->type->is_matrix()) {
2168 for (int i = 0; i < ir->type->matrix_columns; i++) {
2169 float *vec = &ir->value.f[i * ir->type->vector_elements];
2170
2171 for (int j = 0; j < ir->type->vector_elements; j++) {
2172 dst->writemask = 1 << j;
2173 dst->type = BRW_REGISTER_TYPE_F;
2174
2175 emit(MOV(*dst, src_reg(vec[j])));
2176 }
2177 dst->reg_offset++;
2178 }
2179 return;
2180 }
2181
2182 int remaining_writemask = (1 << ir->type->vector_elements) - 1;
2183
2184 for (int i = 0; i < ir->type->vector_elements; i++) {
2185 if (!(remaining_writemask & (1 << i)))
2186 continue;
2187
2188 dst->writemask = 1 << i;
2189 dst->type = brw_type_for_base_type(ir->type);
2190
2191 /* Find other components that match the one we're about to
2192 * write. Emits fewer instructions for things like vec4(0.5,
2193 * 1.5, 1.5, 1.5).
2194 */
2195 for (int j = i + 1; j < ir->type->vector_elements; j++) {
2196 if (ir->type->base_type == GLSL_TYPE_BOOL) {
2197 if (ir->value.b[i] == ir->value.b[j])
2198 dst->writemask |= (1 << j);
2199 } else {
2200 /* u, i, and f storage all line up, so no need for a
2201 * switch case for comparing each type.
2202 */
2203 if (ir->value.u[i] == ir->value.u[j])
2204 dst->writemask |= (1 << j);
2205 }
2206 }
2207
2208 switch (ir->type->base_type) {
2209 case GLSL_TYPE_FLOAT:
2210 emit(MOV(*dst, src_reg(ir->value.f[i])));
2211 break;
2212 case GLSL_TYPE_INT:
2213 emit(MOV(*dst, src_reg(ir->value.i[i])));
2214 break;
2215 case GLSL_TYPE_UINT:
2216 emit(MOV(*dst, src_reg(ir->value.u[i])));
2217 break;
2218 case GLSL_TYPE_BOOL:
2219 emit(MOV(*dst, src_reg(ir->value.b[i])));
2220 break;
2221 default:
2222 assert(!"Non-float/uint/int/bool constant");
2223 break;
2224 }
2225
2226 remaining_writemask &= ~dst->writemask;
2227 }
2228 dst->reg_offset++;
2229 }
2230
2231 void
2232 vec4_visitor::visit(ir_constant *ir)
2233 {
2234 dst_reg dst = dst_reg(this, ir->type);
2235 this->result = src_reg(dst);
2236
2237 emit_constant_values(&dst, ir);
2238 }
2239
2240 void
2241 vec4_visitor::visit(ir_call *ir)
2242 {
2243 assert(!"not reached");
2244 }
2245
2246 void
2247 vec4_visitor::visit(ir_texture *ir)
2248 {
2249 int sampler =
2250 _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog);
2251
2252 /* Should be lowered by do_lower_texture_projection */
2253 assert(!ir->projector);
2254
2255 /* Generate code to compute all the subexpression trees. This has to be
2256 * done before loading any values into MRFs for the sampler message since
2257 * generating these values may involve SEND messages that need the MRFs.
2258 */
2259 src_reg coordinate;
2260 if (ir->coordinate) {
2261 ir->coordinate->accept(this);
2262 coordinate = this->result;
2263 }
2264
2265 src_reg shadow_comparitor;
2266 if (ir->shadow_comparitor) {
2267 ir->shadow_comparitor->accept(this);
2268 shadow_comparitor = this->result;
2269 }
2270
2271 const glsl_type *lod_type = NULL, *sample_index_type = NULL;
2272 src_reg lod, dPdx, dPdy, sample_index;
2273 switch (ir->op) {
2274 case ir_tex:
2275 lod = src_reg(0.0f);
2276 lod_type = glsl_type::float_type;
2277 break;
2278 case ir_txf:
2279 case ir_txl:
2280 case ir_txs:
2281 ir->lod_info.lod->accept(this);
2282 lod = this->result;
2283 lod_type = ir->lod_info.lod->type;
2284 break;
2285 case ir_txf_ms:
2286 ir->lod_info.sample_index->accept(this);
2287 sample_index = this->result;
2288 sample_index_type = ir->lod_info.sample_index->type;
2289 break;
2290 case ir_txd:
2291 ir->lod_info.grad.dPdx->accept(this);
2292 dPdx = this->result;
2293
2294 ir->lod_info.grad.dPdy->accept(this);
2295 dPdy = this->result;
2296
2297 lod_type = ir->lod_info.grad.dPdx->type;
2298 break;
2299 case ir_txb:
2300 case ir_lod:
2301 break;
2302 }
2303
2304 vec4_instruction *inst = NULL;
2305 switch (ir->op) {
2306 case ir_tex:
2307 case ir_txl:
2308 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXL);
2309 break;
2310 case ir_txd:
2311 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXD);
2312 break;
2313 case ir_txf:
2314 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF);
2315 break;
2316 case ir_txf_ms:
2317 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF_MS);
2318 break;
2319 case ir_txs:
2320 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXS);
2321 break;
2322 case ir_txb:
2323 assert(!"TXB is not valid for vertex shaders.");
2324 break;
2325 case ir_lod:
2326 assert(!"LOD is not valid for vertex shaders.");
2327 break;
2328 }
2329
2330 bool use_texture_offset = ir->offset != NULL && ir->op != ir_txf;
2331
2332 /* Texel offsets go in the message header; Gen4 also requires headers. */
2333 inst->header_present = use_texture_offset || brw->gen < 5;
2334 inst->base_mrf = 2;
2335 inst->mlen = inst->header_present + 1; /* always at least one */
2336 inst->sampler = sampler;
2337 inst->dst = dst_reg(this, ir->type);
2338 inst->dst.writemask = WRITEMASK_XYZW;
2339 inst->shadow_compare = ir->shadow_comparitor != NULL;
2340
2341 if (use_texture_offset)
2342 inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
2343
2344 /* MRF for the first parameter */
2345 int param_base = inst->base_mrf + inst->header_present;
2346
2347 if (ir->op == ir_txs) {
2348 int writemask = brw->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
2349 emit(MOV(dst_reg(MRF, param_base, lod_type, writemask), lod));
2350 } else {
2351 int i, coord_mask = 0, zero_mask = 0;
2352 /* Load the coordinate */
2353 /* FINISHME: gl_clamp_mask and saturate */
2354 for (i = 0; i < ir->coordinate->type->vector_elements; i++)
2355 coord_mask |= (1 << i);
2356 for (; i < 4; i++)
2357 zero_mask |= (1 << i);
2358
2359 if (ir->offset && ir->op == ir_txf) {
2360 /* It appears that the ld instruction used for txf does its
2361 * address bounds check before adding in the offset. To work
2362 * around this, just add the integer offset to the integer
2363 * texel coordinate, and don't put the offset in the header.
2364 */
2365 ir_constant *offset = ir->offset->as_constant();
2366 assert(offset);
2367
2368 for (int j = 0; j < ir->coordinate->type->vector_elements; j++) {
2369 src_reg src = coordinate;
2370 src.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(src.swizzle, j),
2371 BRW_GET_SWZ(src.swizzle, j),
2372 BRW_GET_SWZ(src.swizzle, j),
2373 BRW_GET_SWZ(src.swizzle, j));
2374 emit(ADD(dst_reg(MRF, param_base, ir->coordinate->type, 1 << j),
2375 src, offset->value.i[j]));
2376 }
2377 } else {
2378 emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
2379 coordinate));
2380 }
2381 emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
2382 src_reg(0)));
2383 /* Load the shadow comparitor */
2384 if (ir->shadow_comparitor && ir->op != ir_txd) {
2385 emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
2386 WRITEMASK_X),
2387 shadow_comparitor));
2388 inst->mlen++;
2389 }
2390
2391 /* Load the LOD info */
2392 if (ir->op == ir_tex || ir->op == ir_txl) {
2393 int mrf, writemask;
2394 if (brw->gen >= 5) {
2395 mrf = param_base + 1;
2396 if (ir->shadow_comparitor) {
2397 writemask = WRITEMASK_Y;
2398 /* mlen already incremented */
2399 } else {
2400 writemask = WRITEMASK_X;
2401 inst->mlen++;
2402 }
2403 } else /* brw->gen == 4 */ {
2404 mrf = param_base;
2405 writemask = WRITEMASK_W;
2406 }
2407 emit(MOV(dst_reg(MRF, mrf, lod_type, writemask), lod));
2408 } else if (ir->op == ir_txf) {
2409 emit(MOV(dst_reg(MRF, param_base, lod_type, WRITEMASK_W), lod));
2410 } else if (ir->op == ir_txf_ms) {
2411 emit(MOV(dst_reg(MRF, param_base + 1, sample_index_type, WRITEMASK_X),
2412 sample_index));
2413 inst->mlen++;
2414
2415 /* on Gen7, there is an additional MCS parameter here after SI,
2416 * but we don't bother to emit it since it's always zero. If
2417 * we start supporting texturing from CMS surfaces, this will have
2418 * to change
2419 */
2420 } else if (ir->op == ir_txd) {
2421 const glsl_type *type = lod_type;
2422
2423 if (brw->gen >= 5) {
2424 dPdx.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
2425 dPdy.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
2426 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), dPdx));
2427 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), dPdy));
2428 inst->mlen++;
2429
2430 if (ir->type->vector_elements == 3 || ir->shadow_comparitor) {
2431 dPdx.swizzle = BRW_SWIZZLE_ZZZZ;
2432 dPdy.swizzle = BRW_SWIZZLE_ZZZZ;
2433 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), dPdx));
2434 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), dPdy));
2435 inst->mlen++;
2436
2437 if (ir->shadow_comparitor) {
2438 emit(MOV(dst_reg(MRF, param_base + 2,
2439 ir->shadow_comparitor->type, WRITEMASK_Z),
2440 shadow_comparitor));
2441 }
2442 }
2443 } else /* brw->gen == 4 */ {
2444 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), dPdx));
2445 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), dPdy));
2446 inst->mlen += 2;
2447 }
2448 }
2449 }
2450
2451 emit(inst);
2452
2453 /* fixup num layers (z) for cube arrays: hardware returns faces * layers;
2454 * spec requires layers.
2455 */
2456 if (ir->op == ir_txs) {
2457 glsl_type const *type = ir->sampler->type;
2458 if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
2459 type->sampler_array) {
2460 emit_math(SHADER_OPCODE_INT_QUOTIENT,
2461 with_writemask(inst->dst, WRITEMASK_Z),
2462 src_reg(inst->dst), src_reg(6));
2463 }
2464 }
2465
2466 swizzle_result(ir, src_reg(inst->dst), sampler);
2467 }
2468
2469 void
2470 vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, int sampler)
2471 {
2472 int s = key->tex.swizzles[sampler];
2473
2474 this->result = src_reg(this, ir->type);
2475 dst_reg swizzled_result(this->result);
2476
2477 if (ir->op == ir_txs || ir->type == glsl_type::float_type
2478 || s == SWIZZLE_NOOP) {
2479 emit(MOV(swizzled_result, orig_val));
2480 return;
2481 }
2482
2483 int zero_mask = 0, one_mask = 0, copy_mask = 0;
2484 int swizzle[4] = {0};
2485
2486 for (int i = 0; i < 4; i++) {
2487 switch (GET_SWZ(s, i)) {
2488 case SWIZZLE_ZERO:
2489 zero_mask |= (1 << i);
2490 break;
2491 case SWIZZLE_ONE:
2492 one_mask |= (1 << i);
2493 break;
2494 default:
2495 copy_mask |= (1 << i);
2496 swizzle[i] = GET_SWZ(s, i);
2497 break;
2498 }
2499 }
2500
2501 if (copy_mask) {
2502 orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
2503 swizzled_result.writemask = copy_mask;
2504 emit(MOV(swizzled_result, orig_val));
2505 }
2506
2507 if (zero_mask) {
2508 swizzled_result.writemask = zero_mask;
2509 emit(MOV(swizzled_result, src_reg(0.0f)));
2510 }
2511
2512 if (one_mask) {
2513 swizzled_result.writemask = one_mask;
2514 emit(MOV(swizzled_result, src_reg(1.0f)));
2515 }
2516 }
2517
2518 void
2519 vec4_visitor::visit(ir_return *ir)
2520 {
2521 assert(!"not reached");
2522 }
2523
2524 void
2525 vec4_visitor::visit(ir_discard *ir)
2526 {
2527 assert(!"not reached");
2528 }
2529
2530 void
2531 vec4_visitor::visit(ir_if *ir)
2532 {
2533 /* Don't point the annotation at the if statement, because then it plus
2534 * the then and else blocks get printed.
2535 */
2536 this->base_ir = ir->condition;
2537
2538 if (brw->gen == 6) {
2539 emit_if_gen6(ir);
2540 } else {
2541 uint32_t predicate;
2542 emit_bool_to_cond_code(ir->condition, &predicate);
2543 emit(IF(predicate));
2544 }
2545
2546 visit_instructions(&ir->then_instructions);
2547
2548 if (!ir->else_instructions.is_empty()) {
2549 this->base_ir = ir->condition;
2550 emit(BRW_OPCODE_ELSE);
2551
2552 visit_instructions(&ir->else_instructions);
2553 }
2554
2555 this->base_ir = ir->condition;
2556 emit(BRW_OPCODE_ENDIF);
2557 }
2558
2559 void
2560 vec4_visitor::visit(ir_emit_vertex *)
2561 {
2562 assert(!"not reached");
2563 }
2564
2565 void
2566 vec4_visitor::visit(ir_end_primitive *)
2567 {
2568 assert(!"not reached");
2569 }
2570
2571 void
2572 vec4_visitor::emit_ndc_computation()
2573 {
2574 /* Get the position */
2575 src_reg pos = src_reg(output_reg[VARYING_SLOT_POS]);
2576
2577 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
2578 dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
2579 output_reg[BRW_VARYING_SLOT_NDC] = ndc;
2580
2581 current_annotation = "NDC";
2582 dst_reg ndc_w = ndc;
2583 ndc_w.writemask = WRITEMASK_W;
2584 src_reg pos_w = pos;
2585 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
2586 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
2587
2588 dst_reg ndc_xyz = ndc;
2589 ndc_xyz.writemask = WRITEMASK_XYZ;
2590
2591 emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
2592 }
2593
2594 void
2595 vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
2596 {
2597 if (brw->gen < 6 &&
2598 ((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) ||
2599 key->userclip_active || brw->has_negative_rhw_bug)) {
2600 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
2601 dst_reg header1_w = header1;
2602 header1_w.writemask = WRITEMASK_W;
2603
2604 emit(MOV(header1, 0u));
2605
2606 if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
2607 src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]);
2608
2609 current_annotation = "Point size";
2610 emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
2611 emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
2612 }
2613
2614 if (key->userclip_active) {
2615 current_annotation = "Clipping flags";
2616 dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
2617 dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
2618
2619 emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), src_reg(0.0f), BRW_CONDITIONAL_L));
2620 emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, src_reg(0));
2621 emit(OR(header1_w, src_reg(header1_w), src_reg(flags0)));
2622
2623 emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), src_reg(0.0f), BRW_CONDITIONAL_L));
2624 emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, src_reg(0));
2625 emit(SHL(flags1, src_reg(flags1), src_reg(4)));
2626 emit(OR(header1_w, src_reg(header1_w), src_reg(flags1)));
2627 }
2628
2629 /* i965 clipping workaround:
2630 * 1) Test for -ve rhw
2631 * 2) If set,
2632 * set ndc = (0,0,0,0)
2633 * set ucp[6] = 1
2634 *
2635 * Later, clipping will detect ucp[6] and ensure the primitive is
2636 * clipped against all fixed planes.
2637 */
2638 if (brw->has_negative_rhw_bug) {
2639 src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]);
2640 ndc_w.swizzle = BRW_SWIZZLE_WWWW;
2641 emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L));
2642 vec4_instruction *inst;
2643 inst = emit(OR(header1_w, src_reg(header1_w), src_reg(1u << 6)));
2644 inst->predicate = BRW_PREDICATE_NORMAL;
2645 inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f)));
2646 inst->predicate = BRW_PREDICATE_NORMAL;
2647 }
2648
2649 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
2650 } else if (brw->gen < 6) {
2651 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
2652 } else {
2653 emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
2654 if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
2655 emit(MOV(brw_writemask(reg, WRITEMASK_W),
2656 src_reg(output_reg[VARYING_SLOT_PSIZ])));
2657 }
2658 if (prog_data->vue_map.slots_valid & VARYING_BIT_LAYER) {
2659 emit(MOV(retype(brw_writemask(reg, WRITEMASK_Y), BRW_REGISTER_TYPE_D),
2660 src_reg(output_reg[VARYING_SLOT_LAYER])));
2661 }
2662 }
2663 }
2664
2665 void
2666 vec4_visitor::emit_clip_distances(dst_reg reg, int offset)
2667 {
2668 /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
2669 *
2670 * "If a linked set of shaders forming the vertex stage contains no
2671 * static write to gl_ClipVertex or gl_ClipDistance, but the
2672 * application has requested clipping against user clip planes through
2673 * the API, then the coordinate written to gl_Position is used for
2674 * comparison against the user clip planes."
2675 *
2676 * This function is only called if the shader didn't write to
2677 * gl_ClipDistance. Accordingly, we use gl_ClipVertex to perform clipping
2678 * if the user wrote to it; otherwise we use gl_Position.
2679 */
2680 gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX;
2681 if (!(prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) {
2682 clip_vertex = VARYING_SLOT_POS;
2683 }
2684
2685 for (int i = 0; i + offset < key->nr_userclip_plane_consts && i < 4;
2686 ++i) {
2687 reg.writemask = 1 << i;
2688 emit(DP4(reg,
2689 src_reg(output_reg[clip_vertex]),
2690 src_reg(this->userplane[i + offset])));
2691 }
2692 }
2693
2694 void
2695 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
2696 {
2697 assert (varying < VARYING_SLOT_MAX);
2698 reg.type = output_reg[varying].type;
2699 current_annotation = output_reg_annotation[varying];
2700 /* Copy the register, saturating if necessary */
2701 vec4_instruction *inst = emit(MOV(reg,
2702 src_reg(output_reg[varying])));
2703 if ((varying == VARYING_SLOT_COL0 ||
2704 varying == VARYING_SLOT_COL1 ||
2705 varying == VARYING_SLOT_BFC0 ||
2706 varying == VARYING_SLOT_BFC1) &&
2707 key->clamp_vertex_color) {
2708 inst->saturate = true;
2709 }
2710 }
2711
2712 void
2713 vec4_visitor::emit_urb_slot(int mrf, int varying)
2714 {
2715 struct brw_reg hw_reg = brw_message_reg(mrf);
2716 dst_reg reg = dst_reg(MRF, mrf);
2717 reg.type = BRW_REGISTER_TYPE_F;
2718
2719 switch (varying) {
2720 case VARYING_SLOT_PSIZ:
2721 /* PSIZ is always in slot 0, and is coupled with other flags. */
2722 current_annotation = "indices, point width, clip flags";
2723 emit_psiz_and_flags(hw_reg);
2724 break;
2725 case BRW_VARYING_SLOT_NDC:
2726 current_annotation = "NDC";
2727 emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
2728 break;
2729 case VARYING_SLOT_POS:
2730 current_annotation = "gl_Position";
2731 emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
2732 break;
2733 case VARYING_SLOT_EDGE:
2734 /* This is present when doing unfilled polygons. We're supposed to copy
2735 * the edge flag from the user-provided vertex array
2736 * (glEdgeFlagPointer), or otherwise we'll copy from the current value
2737 * of that attribute (starts as 1.0f). This is then used in clipping to
2738 * determine which edges should be drawn as wireframe.
2739 */
2740 current_annotation = "edge flag";
2741 emit(MOV(reg, src_reg(dst_reg(ATTR, VERT_ATTRIB_EDGEFLAG,
2742 glsl_type::float_type, WRITEMASK_XYZW))));
2743 break;
2744 case BRW_VARYING_SLOT_PAD:
2745 /* No need to write to this slot */
2746 break;
2747 default:
2748 emit_generic_urb_slot(reg, varying);
2749 break;
2750 }
2751 }
2752
2753 static int
2754 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
2755 {
2756 if (brw->gen >= 6) {
2757 /* URB data written (does not include the message header reg) must
2758 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
2759 * section 5.4.3.2.2: URB_INTERLEAVED.
2760 *
2761 * URB entries are allocated on a multiple of 1024 bits, so an
2762 * extra 128 bits written here to make the end align to 256 is
2763 * no problem.
2764 */
2765 if ((mlen % 2) != 1)
2766 mlen++;
2767 }
2768
2769 return mlen;
2770 }
2771
2772 void
2773 vec4_vs_visitor::emit_urb_write_header(int mrf)
2774 {
2775 /* No need to do anything for VS; an implied write to this MRF will be
2776 * performed by VS_OPCODE_URB_WRITE.
2777 */
2778 (void) mrf;
2779 }
2780
2781 vec4_instruction *
2782 vec4_vs_visitor::emit_urb_write_opcode(bool complete)
2783 {
2784 /* For VS, the URB writes end the thread. */
2785 if (complete) {
2786 if (INTEL_DEBUG & DEBUG_SHADER_TIME)
2787 emit_shader_time_end();
2788 }
2789
2790 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
2791 inst->urb_write_flags = complete ?
2792 BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS;
2793
2794 return inst;
2795 }
2796
2797 /**
2798 * Generates the VUE payload plus the necessary URB write instructions to
2799 * output it.
2800 *
2801 * The VUE layout is documented in Volume 2a.
2802 */
2803 void
2804 vec4_visitor::emit_vertex()
2805 {
2806 /* MRF 0 is reserved for the debugger, so start with message header
2807 * in MRF 1.
2808 */
2809 int base_mrf = 1;
2810 int mrf = base_mrf;
2811 /* In the process of generating our URB write message contents, we
2812 * may need to unspill a register or load from an array. Those
2813 * reads would use MRFs 14-15.
2814 */
2815 int max_usable_mrf = 13;
2816
2817 /* The following assertion verifies that max_usable_mrf causes an
2818 * even-numbered amount of URB write data, which will meet gen6's
2819 * requirements for length alignment.
2820 */
2821 assert ((max_usable_mrf - base_mrf) % 2 == 0);
2822
2823 /* First mrf is the g0-based message header containing URB handles and
2824 * such.
2825 */
2826 emit_urb_write_header(mrf++);
2827
2828 if (brw->gen < 6) {
2829 emit_ndc_computation();
2830 }
2831
2832 /* Lower legacy ff and ClipVertex clipping to clip distances */
2833 if (key->userclip_active && !key->uses_clip_distance) {
2834 current_annotation = "user clip distances";
2835
2836 output_reg[VARYING_SLOT_CLIP_DIST0] = dst_reg(this, glsl_type::vec4_type);
2837 output_reg[VARYING_SLOT_CLIP_DIST1] = dst_reg(this, glsl_type::vec4_type);
2838
2839 emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST0], 0);
2840 emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1], 4);
2841 }
2842
2843 /* Set up the VUE data for the first URB write */
2844 int slot;
2845 for (slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
2846 emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]);
2847
2848 /* If this was max_usable_mrf, we can't fit anything more into this URB
2849 * WRITE.
2850 */
2851 if (mrf > max_usable_mrf) {
2852 slot++;
2853 break;
2854 }
2855 }
2856
2857 bool complete = slot >= prog_data->vue_map.num_slots;
2858 current_annotation = "URB write";
2859 vec4_instruction *inst = emit_urb_write_opcode(complete);
2860 inst->base_mrf = base_mrf;
2861 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2862
2863 /* Optional second URB write */
2864 if (!complete) {
2865 mrf = base_mrf + 1;
2866
2867 for (; slot < prog_data->vue_map.num_slots; ++slot) {
2868 assert(mrf < max_usable_mrf);
2869
2870 emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]);
2871 }
2872
2873 current_annotation = "URB write";
2874 inst = emit_urb_write_opcode(true /* complete */);
2875 inst->base_mrf = base_mrf;
2876 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2877 /* URB destination offset. In the previous write, we got MRFs
2878 * 2-13 minus the one header MRF, so 12 regs. URB offset is in
2879 * URB row increments, and each of our MRFs is half of one of
2880 * those, since we're doing interleaved writes.
2881 */
2882 inst->offset = (max_usable_mrf - base_mrf) / 2;
2883 }
2884 }
2885
2886 void
2887 vec4_vs_visitor::emit_thread_end()
2888 {
2889 /* For VS, we always end the thread by emitting a single vertex.
2890 * emit_urb_write_opcode() will take care of setting the eot flag on the
2891 * SEND instruction.
2892 */
2893 emit_vertex();
2894 }
2895
2896 src_reg
2897 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
2898 src_reg *reladdr, int reg_offset)
2899 {
2900 /* Because we store the values to scratch interleaved like our
2901 * vertex data, we need to scale the vec4 index by 2.
2902 */
2903 int message_header_scale = 2;
2904
2905 /* Pre-gen6, the message header uses byte offsets instead of vec4
2906 * (16-byte) offset units.
2907 */
2908 if (brw->gen < 6)
2909 message_header_scale *= 16;
2910
2911 if (reladdr) {
2912 src_reg index = src_reg(this, glsl_type::int_type);
2913
2914 emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2915 emit_before(inst, MUL(dst_reg(index),
2916 index, src_reg(message_header_scale)));
2917
2918 return index;
2919 } else {
2920 return src_reg(reg_offset * message_header_scale);
2921 }
2922 }
2923
2924 src_reg
2925 vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
2926 src_reg *reladdr, int reg_offset)
2927 {
2928 if (reladdr) {
2929 src_reg index = src_reg(this, glsl_type::int_type);
2930
2931 emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2932
2933 /* Pre-gen6, the message header uses byte offsets instead of vec4
2934 * (16-byte) offset units.
2935 */
2936 if (brw->gen < 6) {
2937 emit_before(inst, MUL(dst_reg(index), index, src_reg(16)));
2938 }
2939
2940 return index;
2941 } else {
2942 int message_header_scale = brw->gen < 6 ? 16 : 1;
2943 return src_reg(reg_offset * message_header_scale);
2944 }
2945 }
2946
2947 /**
2948 * Emits an instruction before @inst to load the value named by @orig_src
2949 * from scratch space at @base_offset to @temp.
2950 *
2951 * @base_offset is measured in 32-byte units (the size of a register).
2952 */
2953 void
2954 vec4_visitor::emit_scratch_read(vec4_instruction *inst,
2955 dst_reg temp, src_reg orig_src,
2956 int base_offset)
2957 {
2958 int reg_offset = base_offset + orig_src.reg_offset;
2959 src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
2960
2961 emit_before(inst, SCRATCH_READ(temp, index));
2962 }
2963
2964 /**
2965 * Emits an instruction after @inst to store the value to be written
2966 * to @orig_dst to scratch space at @base_offset, from @temp.
2967 *
2968 * @base_offset is measured in 32-byte units (the size of a register).
2969 */
2970 void
2971 vec4_visitor::emit_scratch_write(vec4_instruction *inst, int base_offset)
2972 {
2973 int reg_offset = base_offset + inst->dst.reg_offset;
2974 src_reg index = get_scratch_offset(inst, inst->dst.reladdr, reg_offset);
2975
2976 /* Create a temporary register to store *inst's result in.
2977 *
2978 * We have to be careful in MOVing from our temporary result register in
2979 * the scratch write. If we swizzle from channels of the temporary that
2980 * weren't initialized, it will confuse live interval analysis, which will
2981 * make spilling fail to make progress.
2982 */
2983 src_reg temp = src_reg(this, glsl_type::vec4_type);
2984 temp.type = inst->dst.type;
2985 int first_writemask_chan = ffs(inst->dst.writemask) - 1;
2986 int swizzles[4];
2987 for (int i = 0; i < 4; i++)
2988 if (inst->dst.writemask & (1 << i))
2989 swizzles[i] = i;
2990 else
2991 swizzles[i] = first_writemask_chan;
2992 temp.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
2993 swizzles[2], swizzles[3]);
2994
2995 dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
2996 inst->dst.writemask));
2997 vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
2998 write->predicate = inst->predicate;
2999 write->ir = inst->ir;
3000 write->annotation = inst->annotation;
3001 inst->insert_after(write);
3002
3003 inst->dst.file = temp.file;
3004 inst->dst.reg = temp.reg;
3005 inst->dst.reg_offset = temp.reg_offset;
3006 inst->dst.reladdr = NULL;
3007 }
3008
3009 /**
3010 * We can't generally support array access in GRF space, because a
3011 * single instruction's destination can only span 2 contiguous
3012 * registers. So, we send all GRF arrays that get variable index
3013 * access to scratch space.
3014 */
3015 void
3016 vec4_visitor::move_grf_array_access_to_scratch()
3017 {
3018 int scratch_loc[this->virtual_grf_count];
3019
3020 for (int i = 0; i < this->virtual_grf_count; i++) {
3021 scratch_loc[i] = -1;
3022 }
3023
3024 /* First, calculate the set of virtual GRFs that need to be punted
3025 * to scratch due to having any array access on them, and where in
3026 * scratch.
3027 */
3028 foreach_list(node, &this->instructions) {
3029 vec4_instruction *inst = (vec4_instruction *)node;
3030
3031 if (inst->dst.file == GRF && inst->dst.reladdr &&
3032 scratch_loc[inst->dst.reg] == -1) {
3033 scratch_loc[inst->dst.reg] = c->last_scratch;
3034 c->last_scratch += this->virtual_grf_sizes[inst->dst.reg];
3035 }
3036
3037 for (int i = 0 ; i < 3; i++) {
3038 src_reg *src = &inst->src[i];
3039
3040 if (src->file == GRF && src->reladdr &&
3041 scratch_loc[src->reg] == -1) {
3042 scratch_loc[src->reg] = c->last_scratch;
3043 c->last_scratch += this->virtual_grf_sizes[src->reg];
3044 }
3045 }
3046 }
3047
3048 /* Now, for anything that will be accessed through scratch, rewrite
3049 * it to load/store. Note that this is a _safe list walk, because
3050 * we may generate a new scratch_write instruction after the one
3051 * we're processing.
3052 */
3053 foreach_list_safe(node, &this->instructions) {
3054 vec4_instruction *inst = (vec4_instruction *)node;
3055
3056 /* Set up the annotation tracking for new generated instructions. */
3057 base_ir = inst->ir;
3058 current_annotation = inst->annotation;
3059
3060 if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
3061 emit_scratch_write(inst, scratch_loc[inst->dst.reg]);
3062 }
3063
3064 for (int i = 0 ; i < 3; i++) {
3065 if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
3066 continue;
3067
3068 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
3069
3070 emit_scratch_read(inst, temp, inst->src[i],
3071 scratch_loc[inst->src[i].reg]);
3072
3073 inst->src[i].file = temp.file;
3074 inst->src[i].reg = temp.reg;
3075 inst->src[i].reg_offset = temp.reg_offset;
3076 inst->src[i].reladdr = NULL;
3077 }
3078 }
3079 }
3080
3081 /**
3082 * Emits an instruction before @inst to load the value named by @orig_src
3083 * from the pull constant buffer (surface) at @base_offset to @temp.
3084 */
3085 void
3086 vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
3087 dst_reg temp, src_reg orig_src,
3088 int base_offset)
3089 {
3090 int reg_offset = base_offset + orig_src.reg_offset;
3091 src_reg index = src_reg((unsigned)SURF_INDEX_VERT_CONST_BUFFER);
3092 src_reg offset = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
3093 vec4_instruction *load;
3094
3095 if (brw->gen >= 7) {
3096 dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
3097 grf_offset.type = offset.type;
3098 emit_before(inst, MOV(grf_offset, offset));
3099
3100 load = new(mem_ctx) vec4_instruction(this,
3101 VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
3102 temp, index, src_reg(grf_offset));
3103 } else {
3104 load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
3105 temp, index, offset);
3106 load->base_mrf = 14;
3107 load->mlen = 1;
3108 }
3109 emit_before(inst, load);
3110 }
3111
3112 /**
3113 * Implements array access of uniforms by inserting a
3114 * PULL_CONSTANT_LOAD instruction.
3115 *
3116 * Unlike temporary GRF array access (where we don't support it due to
3117 * the difficulty of doing relative addressing on instruction
3118 * destinations), we could potentially do array access of uniforms
3119 * that were loaded in GRF space as push constants. In real-world
3120 * usage we've seen, though, the arrays being used are always larger
3121 * than we could load as push constants, so just always move all
3122 * uniform array access out to a pull constant buffer.
3123 */
3124 void
3125 vec4_visitor::move_uniform_array_access_to_pull_constants()
3126 {
3127 int pull_constant_loc[this->uniforms];
3128
3129 for (int i = 0; i < this->uniforms; i++) {
3130 pull_constant_loc[i] = -1;
3131 }
3132
3133 /* Walk through and find array access of uniforms. Put a copy of that
3134 * uniform in the pull constant buffer.
3135 *
3136 * Note that we don't move constant-indexed accesses to arrays. No
3137 * testing has been done of the performance impact of this choice.
3138 */
3139 foreach_list_safe(node, &this->instructions) {
3140 vec4_instruction *inst = (vec4_instruction *)node;
3141
3142 for (int i = 0 ; i < 3; i++) {
3143 if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
3144 continue;
3145
3146 int uniform = inst->src[i].reg;
3147
3148 /* If this array isn't already present in the pull constant buffer,
3149 * add it.
3150 */
3151 if (pull_constant_loc[uniform] == -1) {
3152 const float **values = &prog_data->param[uniform * 4];
3153
3154 pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
3155
3156 for (int j = 0; j < uniform_size[uniform] * 4; j++) {
3157 prog_data->pull_param[prog_data->nr_pull_params++]
3158 = values[j];
3159 }
3160 }
3161
3162 /* Set up the annotation tracking for new generated instructions. */
3163 base_ir = inst->ir;
3164 current_annotation = inst->annotation;
3165
3166 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
3167
3168 emit_pull_constant_load(inst, temp, inst->src[i],
3169 pull_constant_loc[uniform]);
3170
3171 inst->src[i].file = temp.file;
3172 inst->src[i].reg = temp.reg;
3173 inst->src[i].reg_offset = temp.reg_offset;
3174 inst->src[i].reladdr = NULL;
3175 }
3176 }
3177
3178 /* Now there are no accesses of the UNIFORM file with a reladdr, so
3179 * no need to track them as larger-than-vec4 objects. This will be
3180 * relied on in cutting out unused uniform vectors from push
3181 * constants.
3182 */
3183 split_uniform_registers();
3184 }
3185
3186 void
3187 vec4_visitor::resolve_ud_negate(src_reg *reg)
3188 {
3189 if (reg->type != BRW_REGISTER_TYPE_UD ||
3190 !reg->negate)
3191 return;
3192
3193 src_reg temp = src_reg(this, glsl_type::uvec4_type);
3194 emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
3195 *reg = temp;
3196 }
3197
3198 vec4_visitor::vec4_visitor(struct brw_context *brw,
3199 struct brw_vec4_compile *c,
3200 struct gl_program *prog,
3201 const struct brw_vec4_prog_key *key,
3202 struct brw_vec4_prog_data *prog_data,
3203 struct gl_shader_program *shader_prog,
3204 struct brw_shader *shader,
3205 void *mem_ctx,
3206 bool debug_flag)
3207 : debug_flag(debug_flag)
3208 {
3209 this->brw = brw;
3210 this->ctx = &brw->ctx;
3211 this->shader_prog = shader_prog;
3212 this->shader = shader;
3213
3214 this->mem_ctx = mem_ctx;
3215 this->failed = false;
3216
3217 this->base_ir = NULL;
3218 this->current_annotation = NULL;
3219 memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
3220
3221 this->c = c;
3222 this->prog = prog;
3223 this->key = key;
3224 this->prog_data = prog_data;
3225
3226 this->variable_ht = hash_table_ctor(0,
3227 hash_table_pointer_hash,
3228 hash_table_pointer_compare);
3229
3230 this->virtual_grf_start = NULL;
3231 this->virtual_grf_end = NULL;
3232 this->virtual_grf_sizes = NULL;
3233 this->virtual_grf_count = 0;
3234 this->virtual_grf_reg_map = NULL;
3235 this->virtual_grf_reg_count = 0;
3236 this->virtual_grf_array_size = 0;
3237 this->live_intervals_valid = false;
3238
3239 this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
3240
3241 this->uniforms = 0;
3242 }
3243
3244 vec4_visitor::~vec4_visitor()
3245 {
3246 hash_table_dtor(this->variable_ht);
3247 }
3248
3249
3250 vec4_vs_visitor::vec4_vs_visitor(struct brw_context *brw,
3251 struct brw_vs_compile *vs_compile,
3252 struct brw_vs_prog_data *vs_prog_data,
3253 struct gl_shader_program *prog,
3254 struct brw_shader *shader,
3255 void *mem_ctx)
3256 : vec4_visitor(brw, &vs_compile->base, &vs_compile->vp->program.Base,
3257 &vs_compile->key.base, &vs_prog_data->base, prog, shader,
3258 mem_ctx, INTEL_DEBUG & DEBUG_VS),
3259 vs_compile(vs_compile),
3260 vs_prog_data(vs_prog_data)
3261 {
3262 }
3263
3264
3265 void
3266 vec4_visitor::fail(const char *format, ...)
3267 {
3268 va_list va;
3269 char *msg;
3270
3271 if (failed)
3272 return;
3273
3274 failed = true;
3275
3276 va_start(va, format);
3277 msg = ralloc_vasprintf(mem_ctx, format, va);
3278 va_end(va);
3279 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
3280
3281 this->fail_msg = msg;
3282
3283 if (debug_flag) {
3284 fprintf(stderr, "%s", msg);
3285 }
3286 }
3287
3288 } /* namespace brw */