i965/vec4: Make with_writemask() non-static.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_visitor.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25 #include "brw_vs.h"
26 #include "glsl/ir_uniform.h"
27 extern "C" {
28 #include "main/context.h"
29 #include "main/macros.h"
30 #include "program/prog_parameter.h"
31 #include "program/sampler.h"
32 }
33
34 namespace brw {
35
36 vec4_instruction::vec4_instruction(vec4_visitor *v,
37 enum opcode opcode, dst_reg dst,
38 src_reg src0, src_reg src1, src_reg src2)
39 {
40 this->opcode = opcode;
41 this->dst = dst;
42 this->src[0] = src0;
43 this->src[1] = src1;
44 this->src[2] = src2;
45 this->ir = v->base_ir;
46 this->annotation = v->current_annotation;
47 }
48
49 vec4_instruction *
50 vec4_visitor::emit(vec4_instruction *inst)
51 {
52 this->instructions.push_tail(inst);
53
54 return inst;
55 }
56
57 vec4_instruction *
58 vec4_visitor::emit_before(vec4_instruction *inst, vec4_instruction *new_inst)
59 {
60 new_inst->ir = inst->ir;
61 new_inst->annotation = inst->annotation;
62
63 inst->insert_before(new_inst);
64
65 return inst;
66 }
67
68 vec4_instruction *
69 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
70 src_reg src0, src_reg src1, src_reg src2)
71 {
72 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
73 src0, src1, src2));
74 }
75
76
77 vec4_instruction *
78 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
79 {
80 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
81 }
82
83 vec4_instruction *
84 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
85 {
86 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
87 }
88
89 vec4_instruction *
90 vec4_visitor::emit(enum opcode opcode)
91 {
92 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
93 }
94
95 #define ALU1(op) \
96 vec4_instruction * \
97 vec4_visitor::op(dst_reg dst, src_reg src0) \
98 { \
99 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
100 src0); \
101 }
102
103 #define ALU2(op) \
104 vec4_instruction * \
105 vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \
106 { \
107 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
108 src0, src1); \
109 }
110
111 #define ALU3(op) \
112 vec4_instruction * \
113 vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1, src_reg src2)\
114 { \
115 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
116 src0, src1, src2); \
117 }
118
119 ALU1(NOT)
120 ALU1(MOV)
121 ALU1(FRC)
122 ALU1(RNDD)
123 ALU1(RNDE)
124 ALU1(RNDZ)
125 ALU1(F32TO16)
126 ALU1(F16TO32)
127 ALU2(ADD)
128 ALU2(MUL)
129 ALU2(MACH)
130 ALU2(AND)
131 ALU2(OR)
132 ALU2(XOR)
133 ALU2(DP3)
134 ALU2(DP4)
135 ALU2(DPH)
136 ALU2(SHL)
137 ALU2(SHR)
138 ALU2(ASR)
139 ALU3(LRP)
140 ALU1(BFREV)
141 ALU3(BFE)
142 ALU2(BFI1)
143 ALU3(BFI2)
144 ALU1(FBH)
145 ALU1(FBL)
146 ALU1(CBIT)
147 ALU3(MAD)
148
149 /** Gen4 predicated IF. */
150 vec4_instruction *
151 vec4_visitor::IF(uint32_t predicate)
152 {
153 vec4_instruction *inst;
154
155 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF);
156 inst->predicate = predicate;
157
158 return inst;
159 }
160
161 /** Gen6+ IF with embedded comparison. */
162 vec4_instruction *
163 vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition)
164 {
165 assert(brw->gen >= 6);
166
167 vec4_instruction *inst;
168
169 resolve_ud_negate(&src0);
170 resolve_ud_negate(&src1);
171
172 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF, dst_null_d(),
173 src0, src1);
174 inst->conditional_mod = condition;
175
176 return inst;
177 }
178
179 /**
180 * CMP: Sets the low bit of the destination channels with the result
181 * of the comparison, while the upper bits are undefined, and updates
182 * the flag register with the packed 16 bits of the result.
183 */
184 vec4_instruction *
185 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition)
186 {
187 vec4_instruction *inst;
188
189 /* original gen4 does type conversion to the destination type
190 * before before comparison, producing garbage results for floating
191 * point comparisons.
192 */
193 if (brw->gen == 4) {
194 dst.type = src0.type;
195 if (dst.file == HW_REG)
196 dst.fixed_hw_reg.type = dst.type;
197 }
198
199 resolve_ud_negate(&src0);
200 resolve_ud_negate(&src1);
201
202 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_CMP, dst, src0, src1);
203 inst->conditional_mod = condition;
204
205 return inst;
206 }
207
208 vec4_instruction *
209 vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
210 {
211 vec4_instruction *inst;
212
213 inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_READ,
214 dst, index);
215 inst->base_mrf = 14;
216 inst->mlen = 2;
217
218 return inst;
219 }
220
221 vec4_instruction *
222 vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index)
223 {
224 vec4_instruction *inst;
225
226 inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_WRITE,
227 dst, src, index);
228 inst->base_mrf = 13;
229 inst->mlen = 3;
230
231 return inst;
232 }
233
234 void
235 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
236 {
237 static enum opcode dot_opcodes[] = {
238 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
239 };
240
241 emit(dot_opcodes[elements - 2], dst, src0, src1);
242 }
243
244 src_reg
245 vec4_visitor::fix_3src_operand(src_reg src)
246 {
247 /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
248 * able to use vertical stride of zero to replicate the vec4 uniform, like
249 *
250 * g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
251 *
252 * But you can't, since vertical stride is always four in three-source
253 * instructions. Instead, insert a MOV instruction to do the replication so
254 * that the three-source instruction can consume it.
255 */
256
257 /* The MOV is only needed if the source is a uniform or immediate. */
258 if (src.file != UNIFORM && src.file != IMM)
259 return src;
260
261 dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
262 expanded.type = src.type;
263 emit(MOV(expanded, src));
264 return src_reg(expanded);
265 }
266
267 src_reg
268 vec4_visitor::fix_math_operand(src_reg src)
269 {
270 /* The gen6 math instruction ignores the source modifiers --
271 * swizzle, abs, negate, and at least some parts of the register
272 * region description.
273 *
274 * Rather than trying to enumerate all these cases, *always* expand the
275 * operand to a temp GRF for gen6.
276 *
277 * For gen7, keep the operand as-is, except if immediate, which gen7 still
278 * can't use.
279 */
280
281 if (brw->gen == 7 && src.file != IMM)
282 return src;
283
284 dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
285 expanded.type = src.type;
286 emit(MOV(expanded, src));
287 return src_reg(expanded);
288 }
289
290 void
291 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
292 {
293 src = fix_math_operand(src);
294
295 if (dst.writemask != WRITEMASK_XYZW) {
296 /* The gen6 math instruction must be align1, so we can't do
297 * writemasks.
298 */
299 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
300
301 emit(opcode, temp_dst, src);
302
303 emit(MOV(dst, src_reg(temp_dst)));
304 } else {
305 emit(opcode, dst, src);
306 }
307 }
308
309 void
310 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
311 {
312 vec4_instruction *inst = emit(opcode, dst, src);
313 inst->base_mrf = 1;
314 inst->mlen = 1;
315 }
316
317 void
318 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
319 {
320 switch (opcode) {
321 case SHADER_OPCODE_RCP:
322 case SHADER_OPCODE_RSQ:
323 case SHADER_OPCODE_SQRT:
324 case SHADER_OPCODE_EXP2:
325 case SHADER_OPCODE_LOG2:
326 case SHADER_OPCODE_SIN:
327 case SHADER_OPCODE_COS:
328 break;
329 default:
330 assert(!"not reached: bad math opcode");
331 return;
332 }
333
334 if (brw->gen >= 6) {
335 return emit_math1_gen6(opcode, dst, src);
336 } else {
337 return emit_math1_gen4(opcode, dst, src);
338 }
339 }
340
341 void
342 vec4_visitor::emit_math2_gen6(enum opcode opcode,
343 dst_reg dst, src_reg src0, src_reg src1)
344 {
345 src0 = fix_math_operand(src0);
346 src1 = fix_math_operand(src1);
347
348 if (dst.writemask != WRITEMASK_XYZW) {
349 /* The gen6 math instruction must be align1, so we can't do
350 * writemasks.
351 */
352 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
353 temp_dst.type = dst.type;
354
355 emit(opcode, temp_dst, src0, src1);
356
357 emit(MOV(dst, src_reg(temp_dst)));
358 } else {
359 emit(opcode, dst, src0, src1);
360 }
361 }
362
363 void
364 vec4_visitor::emit_math2_gen4(enum opcode opcode,
365 dst_reg dst, src_reg src0, src_reg src1)
366 {
367 vec4_instruction *inst = emit(opcode, dst, src0, src1);
368 inst->base_mrf = 1;
369 inst->mlen = 2;
370 }
371
372 void
373 vec4_visitor::emit_math(enum opcode opcode,
374 dst_reg dst, src_reg src0, src_reg src1)
375 {
376 switch (opcode) {
377 case SHADER_OPCODE_POW:
378 case SHADER_OPCODE_INT_QUOTIENT:
379 case SHADER_OPCODE_INT_REMAINDER:
380 break;
381 default:
382 assert(!"not reached: unsupported binary math opcode");
383 return;
384 }
385
386 if (brw->gen >= 6) {
387 return emit_math2_gen6(opcode, dst, src0, src1);
388 } else {
389 return emit_math2_gen4(opcode, dst, src0, src1);
390 }
391 }
392
393 void
394 vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0)
395 {
396 if (brw->gen < 7)
397 assert(!"ir_unop_pack_half_2x16 should be lowered");
398
399 assert(dst.type == BRW_REGISTER_TYPE_UD);
400 assert(src0.type == BRW_REGISTER_TYPE_F);
401
402 /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
403 *
404 * Because this instruction does not have a 16-bit floating-point type,
405 * the destination data type must be Word (W).
406 *
407 * The destination must be DWord-aligned and specify a horizontal stride
408 * (HorzStride) of 2. The 16-bit result is stored in the lower word of
409 * each destination channel and the upper word is not modified.
410 *
411 * The above restriction implies that the f32to16 instruction must use
412 * align1 mode, because only in align1 mode is it possible to specify
413 * horizontal stride. We choose here to defy the hardware docs and emit
414 * align16 instructions.
415 *
416 * (I [chadv] did attempt to emit align1 instructions for VS f32to16
417 * instructions. I was partially successful in that the code passed all
418 * tests. However, the code was dubiously correct and fragile, and the
419 * tests were not harsh enough to probe that frailty. Not trusting the
420 * code, I chose instead to remain in align16 mode in defiance of the hw
421 * docs).
422 *
423 * I've [chadv] experimentally confirmed that, on gen7 hardware and the
424 * simulator, emitting a f32to16 in align16 mode with UD as destination
425 * data type is safe. The behavior differs from that specified in the PRM
426 * in that the upper word of each destination channel is cleared to 0.
427 */
428
429 dst_reg tmp_dst(this, glsl_type::uvec2_type);
430 src_reg tmp_src(tmp_dst);
431
432 #if 0
433 /* Verify the undocumented behavior on which the following instructions
434 * rely. If f32to16 fails to clear the upper word of the X and Y channels,
435 * then the result of the bit-or instruction below will be incorrect.
436 *
437 * You should inspect the disasm output in order to verify that the MOV is
438 * not optimized away.
439 */
440 emit(MOV(tmp_dst, src_reg(0x12345678u)));
441 #endif
442
443 /* Give tmp the form below, where "." means untouched.
444 *
445 * w z y x w z y x
446 * |.|.|0x0000hhhh|0x0000llll|.|.|0x0000hhhh|0x0000llll|
447 *
448 * That the upper word of each write-channel be 0 is required for the
449 * following bit-shift and bit-or instructions to work. Note that this
450 * relies on the undocumented hardware behavior mentioned above.
451 */
452 tmp_dst.writemask = WRITEMASK_XY;
453 emit(F32TO16(tmp_dst, src0));
454
455 /* Give the write-channels of dst the form:
456 * 0xhhhh0000
457 */
458 tmp_src.swizzle = SWIZZLE_Y;
459 emit(SHL(dst, tmp_src, src_reg(16u)));
460
461 /* Finally, give the write-channels of dst the form of packHalf2x16's
462 * output:
463 * 0xhhhhllll
464 */
465 tmp_src.swizzle = SWIZZLE_X;
466 emit(OR(dst, src_reg(dst), tmp_src));
467 }
468
469 void
470 vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0)
471 {
472 if (brw->gen < 7)
473 assert(!"ir_unop_unpack_half_2x16 should be lowered");
474
475 assert(dst.type == BRW_REGISTER_TYPE_F);
476 assert(src0.type == BRW_REGISTER_TYPE_UD);
477
478 /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
479 *
480 * Because this instruction does not have a 16-bit floating-point type,
481 * the source data type must be Word (W). The destination type must be
482 * F (Float).
483 *
484 * To use W as the source data type, we must adjust horizontal strides,
485 * which is only possible in align1 mode. All my [chadv] attempts at
486 * emitting align1 instructions for unpackHalf2x16 failed to pass the
487 * Piglit tests, so I gave up.
488 *
489 * I've verified that, on gen7 hardware and the simulator, it is safe to
490 * emit f16to32 in align16 mode with UD as source data type.
491 */
492
493 dst_reg tmp_dst(this, glsl_type::uvec2_type);
494 src_reg tmp_src(tmp_dst);
495
496 tmp_dst.writemask = WRITEMASK_X;
497 emit(AND(tmp_dst, src0, src_reg(0xffffu)));
498
499 tmp_dst.writemask = WRITEMASK_Y;
500 emit(SHR(tmp_dst, src0, src_reg(16u)));
501
502 dst.writemask = WRITEMASK_XY;
503 emit(F16TO32(dst, tmp_src));
504 }
505
506 void
507 vec4_visitor::visit_instructions(const exec_list *list)
508 {
509 foreach_list(node, list) {
510 ir_instruction *ir = (ir_instruction *)node;
511
512 base_ir = ir;
513 ir->accept(this);
514 }
515 }
516
517
518 static int
519 type_size(const struct glsl_type *type)
520 {
521 unsigned int i;
522 int size;
523
524 switch (type->base_type) {
525 case GLSL_TYPE_UINT:
526 case GLSL_TYPE_INT:
527 case GLSL_TYPE_FLOAT:
528 case GLSL_TYPE_BOOL:
529 if (type->is_matrix()) {
530 return type->matrix_columns;
531 } else {
532 /* Regardless of size of vector, it gets a vec4. This is bad
533 * packing for things like floats, but otherwise arrays become a
534 * mess. Hopefully a later pass over the code can pack scalars
535 * down if appropriate.
536 */
537 return 1;
538 }
539 case GLSL_TYPE_ARRAY:
540 assert(type->length > 0);
541 return type_size(type->fields.array) * type->length;
542 case GLSL_TYPE_STRUCT:
543 size = 0;
544 for (i = 0; i < type->length; i++) {
545 size += type_size(type->fields.structure[i].type);
546 }
547 return size;
548 case GLSL_TYPE_SAMPLER:
549 /* Samplers take up one slot in UNIFORMS[], but they're baked in
550 * at link time.
551 */
552 return 1;
553 case GLSL_TYPE_VOID:
554 case GLSL_TYPE_ERROR:
555 case GLSL_TYPE_INTERFACE:
556 assert(0);
557 break;
558 }
559
560 return 0;
561 }
562
563 int
564 vec4_visitor::virtual_grf_alloc(int size)
565 {
566 if (virtual_grf_array_size <= virtual_grf_count) {
567 if (virtual_grf_array_size == 0)
568 virtual_grf_array_size = 16;
569 else
570 virtual_grf_array_size *= 2;
571 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
572 virtual_grf_array_size);
573 virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int,
574 virtual_grf_array_size);
575 }
576 virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count;
577 virtual_grf_reg_count += size;
578 virtual_grf_sizes[virtual_grf_count] = size;
579 return virtual_grf_count++;
580 }
581
582 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
583 {
584 init();
585
586 this->file = GRF;
587 this->reg = v->virtual_grf_alloc(type_size(type));
588
589 if (type->is_array() || type->is_record()) {
590 this->swizzle = BRW_SWIZZLE_NOOP;
591 } else {
592 this->swizzle = swizzle_for_size(type->vector_elements);
593 }
594
595 this->type = brw_type_for_base_type(type);
596 }
597
598 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
599 {
600 init();
601
602 this->file = GRF;
603 this->reg = v->virtual_grf_alloc(type_size(type));
604
605 if (type->is_array() || type->is_record()) {
606 this->writemask = WRITEMASK_XYZW;
607 } else {
608 this->writemask = (1 << type->vector_elements) - 1;
609 }
610
611 this->type = brw_type_for_base_type(type);
612 }
613
614 /* Our support for uniforms is piggy-backed on the struct
615 * gl_fragment_program, because that's where the values actually
616 * get stored, rather than in some global gl_shader_program uniform
617 * store.
618 */
619 void
620 vec4_visitor::setup_uniform_values(ir_variable *ir)
621 {
622 int namelen = strlen(ir->name);
623
624 /* The data for our (non-builtin) uniforms is stored in a series of
625 * gl_uniform_driver_storage structs for each subcomponent that
626 * glGetUniformLocation() could name. We know it's been set up in the same
627 * order we'd walk the type, so walk the list of storage and find anything
628 * with our name, or the prefix of a component that starts with our name.
629 */
630 for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) {
631 struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
632
633 if (strncmp(ir->name, storage->name, namelen) != 0 ||
634 (storage->name[namelen] != 0 &&
635 storage->name[namelen] != '.' &&
636 storage->name[namelen] != '[')) {
637 continue;
638 }
639
640 gl_constant_value *components = storage->storage;
641 unsigned vector_count = (MAX2(storage->array_elements, 1) *
642 storage->type->matrix_columns);
643
644 for (unsigned s = 0; s < vector_count; s++) {
645 uniform_vector_size[uniforms] = storage->type->vector_elements;
646
647 int i;
648 for (i = 0; i < uniform_vector_size[uniforms]; i++) {
649 prog_data->param[uniforms * 4 + i] = &components->f;
650 components++;
651 }
652 for (; i < 4; i++) {
653 static float zero = 0;
654 prog_data->param[uniforms * 4 + i] = &zero;
655 }
656
657 uniforms++;
658 }
659 }
660 }
661
662 void
663 vec4_visitor::setup_uniform_clipplane_values()
664 {
665 gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
666
667 for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
668 this->uniform_vector_size[this->uniforms] = 4;
669 this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
670 this->userplane[i].type = BRW_REGISTER_TYPE_F;
671 for (int j = 0; j < 4; ++j) {
672 prog_data->param[this->uniforms * 4 + j] = &clip_planes[i][j];
673 }
674 ++this->uniforms;
675 }
676 }
677
678 /* Our support for builtin uniforms is even scarier than non-builtin.
679 * It sits on top of the PROG_STATE_VAR parameters that are
680 * automatically updated from GL context state.
681 */
682 void
683 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
684 {
685 const ir_state_slot *const slots = ir->state_slots;
686 assert(ir->state_slots != NULL);
687
688 for (unsigned int i = 0; i < ir->num_state_slots; i++) {
689 /* This state reference has already been setup by ir_to_mesa,
690 * but we'll get the same index back here. We can reference
691 * ParameterValues directly, since unlike brw_fs.cpp, we never
692 * add new state references during compile.
693 */
694 int index = _mesa_add_state_reference(this->prog->Parameters,
695 (gl_state_index *)slots[i].tokens);
696 float *values = &this->prog->Parameters->ParameterValues[index][0].f;
697
698 this->uniform_vector_size[this->uniforms] = 0;
699 /* Add each of the unique swizzled channels of the element.
700 * This will end up matching the size of the glsl_type of this field.
701 */
702 int last_swiz = -1;
703 for (unsigned int j = 0; j < 4; j++) {
704 int swiz = GET_SWZ(slots[i].swizzle, j);
705 last_swiz = swiz;
706
707 prog_data->param[this->uniforms * 4 + j] = &values[swiz];
708 if (swiz <= last_swiz)
709 this->uniform_vector_size[this->uniforms]++;
710 }
711 this->uniforms++;
712 }
713 }
714
715 dst_reg *
716 vec4_visitor::variable_storage(ir_variable *var)
717 {
718 return (dst_reg *)hash_table_find(this->variable_ht, var);
719 }
720
721 void
722 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate)
723 {
724 ir_expression *expr = ir->as_expression();
725
726 *predicate = BRW_PREDICATE_NORMAL;
727
728 if (expr) {
729 src_reg op[2];
730 vec4_instruction *inst;
731
732 assert(expr->get_num_operands() <= 2);
733 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
734 expr->operands[i]->accept(this);
735 op[i] = this->result;
736
737 resolve_ud_negate(&op[i]);
738 }
739
740 switch (expr->operation) {
741 case ir_unop_logic_not:
742 inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
743 inst->conditional_mod = BRW_CONDITIONAL_Z;
744 break;
745
746 case ir_binop_logic_xor:
747 inst = emit(XOR(dst_null_d(), op[0], op[1]));
748 inst->conditional_mod = BRW_CONDITIONAL_NZ;
749 break;
750
751 case ir_binop_logic_or:
752 inst = emit(OR(dst_null_d(), op[0], op[1]));
753 inst->conditional_mod = BRW_CONDITIONAL_NZ;
754 break;
755
756 case ir_binop_logic_and:
757 inst = emit(AND(dst_null_d(), op[0], op[1]));
758 inst->conditional_mod = BRW_CONDITIONAL_NZ;
759 break;
760
761 case ir_unop_f2b:
762 if (brw->gen >= 6) {
763 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
764 } else {
765 inst = emit(MOV(dst_null_f(), op[0]));
766 inst->conditional_mod = BRW_CONDITIONAL_NZ;
767 }
768 break;
769
770 case ir_unop_i2b:
771 if (brw->gen >= 6) {
772 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
773 } else {
774 inst = emit(MOV(dst_null_d(), op[0]));
775 inst->conditional_mod = BRW_CONDITIONAL_NZ;
776 }
777 break;
778
779 case ir_binop_all_equal:
780 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
781 *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
782 break;
783
784 case ir_binop_any_nequal:
785 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
786 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
787 break;
788
789 case ir_unop_any:
790 inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
791 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
792 break;
793
794 case ir_binop_greater:
795 case ir_binop_gequal:
796 case ir_binop_less:
797 case ir_binop_lequal:
798 case ir_binop_equal:
799 case ir_binop_nequal:
800 emit(CMP(dst_null_d(), op[0], op[1],
801 brw_conditional_for_comparison(expr->operation)));
802 break;
803
804 default:
805 assert(!"not reached");
806 break;
807 }
808 return;
809 }
810
811 ir->accept(this);
812
813 resolve_ud_negate(&this->result);
814
815 if (brw->gen >= 6) {
816 vec4_instruction *inst = emit(AND(dst_null_d(),
817 this->result, src_reg(1)));
818 inst->conditional_mod = BRW_CONDITIONAL_NZ;
819 } else {
820 vec4_instruction *inst = emit(MOV(dst_null_d(), this->result));
821 inst->conditional_mod = BRW_CONDITIONAL_NZ;
822 }
823 }
824
825 /**
826 * Emit a gen6 IF statement with the comparison folded into the IF
827 * instruction.
828 */
829 void
830 vec4_visitor::emit_if_gen6(ir_if *ir)
831 {
832 ir_expression *expr = ir->condition->as_expression();
833
834 if (expr) {
835 src_reg op[2];
836 dst_reg temp;
837
838 assert(expr->get_num_operands() <= 2);
839 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
840 expr->operands[i]->accept(this);
841 op[i] = this->result;
842 }
843
844 switch (expr->operation) {
845 case ir_unop_logic_not:
846 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
847 return;
848
849 case ir_binop_logic_xor:
850 emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
851 return;
852
853 case ir_binop_logic_or:
854 temp = dst_reg(this, glsl_type::bool_type);
855 emit(OR(temp, op[0], op[1]));
856 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
857 return;
858
859 case ir_binop_logic_and:
860 temp = dst_reg(this, glsl_type::bool_type);
861 emit(AND(temp, op[0], op[1]));
862 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
863 return;
864
865 case ir_unop_f2b:
866 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
867 return;
868
869 case ir_unop_i2b:
870 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
871 return;
872
873 case ir_binop_greater:
874 case ir_binop_gequal:
875 case ir_binop_less:
876 case ir_binop_lequal:
877 case ir_binop_equal:
878 case ir_binop_nequal:
879 emit(IF(op[0], op[1],
880 brw_conditional_for_comparison(expr->operation)));
881 return;
882
883 case ir_binop_all_equal:
884 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
885 emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
886 return;
887
888 case ir_binop_any_nequal:
889 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
890 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
891 return;
892
893 case ir_unop_any:
894 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
895 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
896 return;
897
898 default:
899 assert(!"not reached");
900 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
901 return;
902 }
903 return;
904 }
905
906 ir->condition->accept(this);
907
908 emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
909 }
910
911 dst_reg
912 with_writemask(dst_reg const & r, int mask)
913 {
914 dst_reg result = r;
915 result.writemask = mask;
916 return result;
917 }
918
919 void
920 vec4_vs_visitor::emit_prolog()
921 {
922 dst_reg sign_recovery_shift;
923 dst_reg normalize_factor;
924 dst_reg es3_normalize_factor;
925
926 for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
927 if (vs_prog_data->inputs_read & BITFIELD64_BIT(i)) {
928 uint8_t wa_flags = vs_compile->key.gl_attrib_wa_flags[i];
929 dst_reg reg(ATTR, i);
930 dst_reg reg_d = reg;
931 reg_d.type = BRW_REGISTER_TYPE_D;
932 dst_reg reg_ud = reg;
933 reg_ud.type = BRW_REGISTER_TYPE_UD;
934
935 /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED attributes
936 * come in as floating point conversions of the integer values.
937 */
938 if (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK) {
939 dst_reg dst = reg;
940 dst.type = brw_type_for_base_type(glsl_type::vec4_type);
941 dst.writemask = (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK)) - 1;
942 emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
943 }
944
945 /* Do sign recovery for 2101010 formats if required. */
946 if (wa_flags & BRW_ATTRIB_WA_SIGN) {
947 if (sign_recovery_shift.file == BAD_FILE) {
948 /* shift constant: <22,22,22,30> */
949 sign_recovery_shift = dst_reg(this, glsl_type::uvec4_type);
950 emit(MOV(with_writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u)));
951 emit(MOV(with_writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u)));
952 }
953
954 emit(SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift)));
955 emit(ASR(reg_d, src_reg(reg_d), src_reg(sign_recovery_shift)));
956 }
957
958 /* Apply BGRA swizzle if required. */
959 if (wa_flags & BRW_ATTRIB_WA_BGRA) {
960 src_reg temp = src_reg(reg);
961 temp.swizzle = BRW_SWIZZLE4(2,1,0,3);
962 emit(MOV(reg, temp));
963 }
964
965 if (wa_flags & BRW_ATTRIB_WA_NORMALIZE) {
966 /* ES 3.0 has different rules for converting signed normalized
967 * fixed-point numbers than desktop GL.
968 */
969 if (_mesa_is_gles3(ctx) && (wa_flags & BRW_ATTRIB_WA_SIGN)) {
970 /* According to equation 2.2 of the ES 3.0 specification,
971 * signed normalization conversion is done by:
972 *
973 * f = c / (2^(b-1)-1)
974 */
975 if (es3_normalize_factor.file == BAD_FILE) {
976 /* mul constant: 1 / (2^(b-1) - 1) */
977 es3_normalize_factor = dst_reg(this, glsl_type::vec4_type);
978 emit(MOV(with_writemask(es3_normalize_factor, WRITEMASK_XYZ),
979 src_reg(1.0f / ((1<<9) - 1))));
980 emit(MOV(with_writemask(es3_normalize_factor, WRITEMASK_W),
981 src_reg(1.0f / ((1<<1) - 1))));
982 }
983
984 dst_reg dst = reg;
985 dst.type = brw_type_for_base_type(glsl_type::vec4_type);
986 emit(MOV(dst, src_reg(reg_d)));
987 emit(MUL(dst, src_reg(dst), src_reg(es3_normalize_factor)));
988 emit_minmax(BRW_CONDITIONAL_G, dst, src_reg(dst), src_reg(-1.0f));
989 } else {
990 /* The following equations are from the OpenGL 3.2 specification:
991 *
992 * 2.1 unsigned normalization
993 * f = c/(2^n-1)
994 *
995 * 2.2 signed normalization
996 * f = (2c+1)/(2^n-1)
997 *
998 * Both of these share a common divisor, which is represented by
999 * "normalize_factor" in the code below.
1000 */
1001 if (normalize_factor.file == BAD_FILE) {
1002 /* 1 / (2^b - 1) for b=<10,10,10,2> */
1003 normalize_factor = dst_reg(this, glsl_type::vec4_type);
1004 emit(MOV(with_writemask(normalize_factor, WRITEMASK_XYZ),
1005 src_reg(1.0f / ((1<<10) - 1))));
1006 emit(MOV(with_writemask(normalize_factor, WRITEMASK_W),
1007 src_reg(1.0f / ((1<<2) - 1))));
1008 }
1009
1010 dst_reg dst = reg;
1011 dst.type = brw_type_for_base_type(glsl_type::vec4_type);
1012 emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));
1013
1014 /* For signed normalization, we want the numerator to be 2c+1. */
1015 if (wa_flags & BRW_ATTRIB_WA_SIGN) {
1016 emit(MUL(dst, src_reg(dst), src_reg(2.0f)));
1017 emit(ADD(dst, src_reg(dst), src_reg(1.0f)));
1018 }
1019
1020 emit(MUL(dst, src_reg(dst), src_reg(normalize_factor)));
1021 }
1022 }
1023
1024 if (wa_flags & BRW_ATTRIB_WA_SCALE) {
1025 dst_reg dst = reg;
1026 dst.type = brw_type_for_base_type(glsl_type::vec4_type);
1027 emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));
1028 }
1029 }
1030 }
1031 }
1032
1033
1034 dst_reg *
1035 vec4_vs_visitor::make_reg_for_system_value(ir_variable *ir)
1036 {
1037 /* VertexID is stored by the VF as the last vertex element, but
1038 * we don't represent it with a flag in inputs_read, so we call
1039 * it VERT_ATTRIB_MAX, which setup_attributes() picks up on.
1040 */
1041 dst_reg *reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX);
1042 vs_prog_data->uses_vertexid = true;
1043
1044 switch (ir->location) {
1045 case SYSTEM_VALUE_VERTEX_ID:
1046 reg->writemask = WRITEMASK_X;
1047 break;
1048 case SYSTEM_VALUE_INSTANCE_ID:
1049 reg->writemask = WRITEMASK_Y;
1050 break;
1051 default:
1052 assert(!"not reached");
1053 break;
1054 }
1055
1056 return reg;
1057 }
1058
1059
1060 void
1061 vec4_visitor::visit(ir_variable *ir)
1062 {
1063 dst_reg *reg = NULL;
1064
1065 if (variable_storage(ir))
1066 return;
1067
1068 switch (ir->mode) {
1069 case ir_var_shader_in:
1070 reg = new(mem_ctx) dst_reg(ATTR, ir->location);
1071 break;
1072
1073 case ir_var_shader_out:
1074 reg = new(mem_ctx) dst_reg(this, ir->type);
1075
1076 for (int i = 0; i < type_size(ir->type); i++) {
1077 output_reg[ir->location + i] = *reg;
1078 output_reg[ir->location + i].reg_offset = i;
1079 output_reg[ir->location + i].type =
1080 brw_type_for_base_type(ir->type->get_scalar_type());
1081 output_reg_annotation[ir->location + i] = ir->name;
1082 }
1083 break;
1084
1085 case ir_var_auto:
1086 case ir_var_temporary:
1087 reg = new(mem_ctx) dst_reg(this, ir->type);
1088 break;
1089
1090 case ir_var_uniform:
1091 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
1092
1093 /* Thanks to the lower_ubo_reference pass, we will see only
1094 * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
1095 * variables, so no need for them to be in variable_ht.
1096 */
1097 if (ir->is_in_uniform_block())
1098 return;
1099
1100 /* Track how big the whole uniform variable is, in case we need to put a
1101 * copy of its data into pull constants for array access.
1102 */
1103 this->uniform_size[this->uniforms] = type_size(ir->type);
1104
1105 if (!strncmp(ir->name, "gl_", 3)) {
1106 setup_builtin_uniform_values(ir);
1107 } else {
1108 setup_uniform_values(ir);
1109 }
1110 break;
1111
1112 case ir_var_system_value:
1113 reg = make_reg_for_system_value(ir);
1114 break;
1115
1116 default:
1117 assert(!"not reached");
1118 }
1119
1120 reg->type = brw_type_for_base_type(ir->type);
1121 hash_table_insert(this->variable_ht, reg, ir);
1122 }
1123
1124 void
1125 vec4_visitor::visit(ir_loop *ir)
1126 {
1127 dst_reg counter;
1128
1129 /* We don't want debugging output to print the whole body of the
1130 * loop as the annotation.
1131 */
1132 this->base_ir = NULL;
1133
1134 if (ir->counter != NULL) {
1135 this->base_ir = ir->counter;
1136 ir->counter->accept(this);
1137 counter = *(variable_storage(ir->counter));
1138
1139 if (ir->from != NULL) {
1140 this->base_ir = ir->from;
1141 ir->from->accept(this);
1142
1143 emit(MOV(counter, this->result));
1144 }
1145 }
1146
1147 emit(BRW_OPCODE_DO);
1148
1149 if (ir->to) {
1150 this->base_ir = ir->to;
1151 ir->to->accept(this);
1152
1153 emit(CMP(dst_null_d(), src_reg(counter), this->result,
1154 brw_conditional_for_comparison(ir->cmp)));
1155
1156 vec4_instruction *inst = emit(BRW_OPCODE_BREAK);
1157 inst->predicate = BRW_PREDICATE_NORMAL;
1158 }
1159
1160 visit_instructions(&ir->body_instructions);
1161
1162
1163 if (ir->increment) {
1164 this->base_ir = ir->increment;
1165 ir->increment->accept(this);
1166 emit(ADD(counter, src_reg(counter), this->result));
1167 }
1168
1169 emit(BRW_OPCODE_WHILE);
1170 }
1171
1172 void
1173 vec4_visitor::visit(ir_loop_jump *ir)
1174 {
1175 switch (ir->mode) {
1176 case ir_loop_jump::jump_break:
1177 emit(BRW_OPCODE_BREAK);
1178 break;
1179 case ir_loop_jump::jump_continue:
1180 emit(BRW_OPCODE_CONTINUE);
1181 break;
1182 }
1183 }
1184
1185
1186 void
1187 vec4_visitor::visit(ir_function_signature *ir)
1188 {
1189 assert(0);
1190 (void)ir;
1191 }
1192
1193 void
1194 vec4_visitor::visit(ir_function *ir)
1195 {
1196 /* Ignore function bodies other than main() -- we shouldn't see calls to
1197 * them since they should all be inlined.
1198 */
1199 if (strcmp(ir->name, "main") == 0) {
1200 const ir_function_signature *sig;
1201 exec_list empty;
1202
1203 sig = ir->matching_signature(&empty);
1204
1205 assert(sig);
1206
1207 visit_instructions(&sig->body);
1208 }
1209 }
1210
1211 bool
1212 vec4_visitor::try_emit_sat(ir_expression *ir)
1213 {
1214 ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
1215 if (!sat_src)
1216 return false;
1217
1218 sat_src->accept(this);
1219 src_reg src = this->result;
1220
1221 this->result = src_reg(this, ir->type);
1222 vec4_instruction *inst;
1223 inst = emit(MOV(dst_reg(this->result), src));
1224 inst->saturate = true;
1225
1226 return true;
1227 }
1228
1229 bool
1230 vec4_visitor::try_emit_mad(ir_expression *ir, int mul_arg)
1231 {
1232 /* 3-src instructions were introduced in gen6. */
1233 if (brw->gen < 6)
1234 return false;
1235
1236 /* MAD can only handle floating-point data. */
1237 if (ir->type->base_type != GLSL_TYPE_FLOAT)
1238 return false;
1239
1240 ir_rvalue *nonmul = ir->operands[1 - mul_arg];
1241 ir_expression *mul = ir->operands[mul_arg]->as_expression();
1242
1243 if (!mul || mul->operation != ir_binop_mul)
1244 return false;
1245
1246 nonmul->accept(this);
1247 src_reg src0 = fix_3src_operand(this->result);
1248
1249 mul->operands[0]->accept(this);
1250 src_reg src1 = fix_3src_operand(this->result);
1251
1252 mul->operands[1]->accept(this);
1253 src_reg src2 = fix_3src_operand(this->result);
1254
1255 this->result = src_reg(this, ir->type);
1256 emit(BRW_OPCODE_MAD, dst_reg(this->result), src0, src1, src2);
1257
1258 return true;
1259 }
1260
1261 void
1262 vec4_visitor::emit_bool_comparison(unsigned int op,
1263 dst_reg dst, src_reg src0, src_reg src1)
1264 {
1265 /* original gen4 does destination conversion before comparison. */
1266 if (brw->gen < 5)
1267 dst.type = src0.type;
1268
1269 emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op)));
1270
1271 dst.type = BRW_REGISTER_TYPE_D;
1272 emit(AND(dst, src_reg(dst), src_reg(0x1)));
1273 }
1274
1275 void
1276 vec4_visitor::emit_minmax(uint32_t conditionalmod, dst_reg dst,
1277 src_reg src0, src_reg src1)
1278 {
1279 vec4_instruction *inst;
1280
1281 if (brw->gen >= 6) {
1282 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
1283 inst->conditional_mod = conditionalmod;
1284 } else {
1285 emit(CMP(dst, src0, src1, conditionalmod));
1286
1287 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
1288 inst->predicate = BRW_PREDICATE_NORMAL;
1289 }
1290 }
1291
1292 static bool
1293 is_16bit_constant(ir_rvalue *rvalue)
1294 {
1295 ir_constant *constant = rvalue->as_constant();
1296 if (!constant)
1297 return false;
1298
1299 if (constant->type != glsl_type::int_type &&
1300 constant->type != glsl_type::uint_type)
1301 return false;
1302
1303 return constant->value.u[0] < (1 << 16);
1304 }
1305
1306 void
1307 vec4_visitor::visit(ir_expression *ir)
1308 {
1309 unsigned int operand;
1310 src_reg op[Elements(ir->operands)];
1311 src_reg result_src;
1312 dst_reg result_dst;
1313 vec4_instruction *inst;
1314
1315 if (try_emit_sat(ir))
1316 return;
1317
1318 if (ir->operation == ir_binop_add) {
1319 if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1))
1320 return;
1321 }
1322
1323 for (operand = 0; operand < ir->get_num_operands(); operand++) {
1324 this->result.file = BAD_FILE;
1325 ir->operands[operand]->accept(this);
1326 if (this->result.file == BAD_FILE) {
1327 printf("Failed to get tree for expression operand:\n");
1328 ir->operands[operand]->print();
1329 exit(1);
1330 }
1331 op[operand] = this->result;
1332
1333 /* Matrix expression operands should have been broken down to vector
1334 * operations already.
1335 */
1336 assert(!ir->operands[operand]->type->is_matrix());
1337 }
1338
1339 int vector_elements = ir->operands[0]->type->vector_elements;
1340 if (ir->operands[1]) {
1341 vector_elements = MAX2(vector_elements,
1342 ir->operands[1]->type->vector_elements);
1343 }
1344
1345 this->result.file = BAD_FILE;
1346
1347 /* Storage for our result. Ideally for an assignment we'd be using
1348 * the actual storage for the result here, instead.
1349 */
1350 result_src = src_reg(this, ir->type);
1351 /* convenience for the emit functions below. */
1352 result_dst = dst_reg(result_src);
1353 /* If nothing special happens, this is the result. */
1354 this->result = result_src;
1355 /* Limit writes to the channels that will be used by result_src later.
1356 * This does limit this temp's use as a temporary for multi-instruction
1357 * sequences.
1358 */
1359 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1360
1361 switch (ir->operation) {
1362 case ir_unop_logic_not:
1363 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
1364 * ones complement of the whole register, not just bit 0.
1365 */
1366 emit(XOR(result_dst, op[0], src_reg(1)));
1367 break;
1368 case ir_unop_neg:
1369 op[0].negate = !op[0].negate;
1370 emit(MOV(result_dst, op[0]));
1371 break;
1372 case ir_unop_abs:
1373 op[0].abs = true;
1374 op[0].negate = false;
1375 emit(MOV(result_dst, op[0]));
1376 break;
1377
1378 case ir_unop_sign:
1379 emit(MOV(result_dst, src_reg(0.0f)));
1380
1381 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G));
1382 inst = emit(MOV(result_dst, src_reg(1.0f)));
1383 inst->predicate = BRW_PREDICATE_NORMAL;
1384
1385 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L));
1386 inst = emit(MOV(result_dst, src_reg(-1.0f)));
1387 inst->predicate = BRW_PREDICATE_NORMAL;
1388
1389 break;
1390
1391 case ir_unop_rcp:
1392 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
1393 break;
1394
1395 case ir_unop_exp2:
1396 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
1397 break;
1398 case ir_unop_log2:
1399 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
1400 break;
1401 case ir_unop_exp:
1402 case ir_unop_log:
1403 assert(!"not reached: should be handled by ir_explog_to_explog2");
1404 break;
1405 case ir_unop_sin:
1406 case ir_unop_sin_reduced:
1407 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
1408 break;
1409 case ir_unop_cos:
1410 case ir_unop_cos_reduced:
1411 emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
1412 break;
1413
1414 case ir_unop_dFdx:
1415 case ir_unop_dFdy:
1416 assert(!"derivatives not valid in vertex shader");
1417 break;
1418
1419 case ir_unop_bitfield_reverse:
1420 emit(BFREV(result_dst, op[0]));
1421 break;
1422 case ir_unop_bit_count:
1423 emit(CBIT(result_dst, op[0]));
1424 break;
1425 case ir_unop_find_msb: {
1426 src_reg temp = src_reg(this, glsl_type::uint_type);
1427
1428 inst = emit(FBH(dst_reg(temp), op[0]));
1429 inst->dst.writemask = WRITEMASK_XYZW;
1430
1431 /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
1432 * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
1433 * subtract the result from 31 to convert the MSB count into an LSB count.
1434 */
1435
1436 /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
1437 temp.swizzle = BRW_SWIZZLE_NOOP;
1438 emit(MOV(result_dst, temp));
1439
1440 src_reg src_tmp = src_reg(result_dst);
1441 emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ));
1442
1443 src_tmp.negate = true;
1444 inst = emit(ADD(result_dst, src_tmp, src_reg(31)));
1445 inst->predicate = BRW_PREDICATE_NORMAL;
1446 break;
1447 }
1448 case ir_unop_find_lsb:
1449 emit(FBL(result_dst, op[0]));
1450 break;
1451
1452 case ir_unop_noise:
1453 assert(!"not reached: should be handled by lower_noise");
1454 break;
1455
1456 case ir_binop_add:
1457 emit(ADD(result_dst, op[0], op[1]));
1458 break;
1459 case ir_binop_sub:
1460 assert(!"not reached: should be handled by ir_sub_to_add_neg");
1461 break;
1462
1463 case ir_binop_mul:
1464 if (ir->type->is_integer()) {
1465 /* For integer multiplication, the MUL uses the low 16 bits of one of
1466 * the operands (src0 through SNB, src1 on IVB and later). The MACH
1467 * accumulates in the contribution of the upper 16 bits of that
1468 * operand. If we can determine that one of the args is in the low
1469 * 16 bits, though, we can just emit a single MUL.
1470 */
1471 if (is_16bit_constant(ir->operands[0])) {
1472 if (brw->gen < 7)
1473 emit(MUL(result_dst, op[0], op[1]));
1474 else
1475 emit(MUL(result_dst, op[1], op[0]));
1476 } else if (is_16bit_constant(ir->operands[1])) {
1477 if (brw->gen < 7)
1478 emit(MUL(result_dst, op[1], op[0]));
1479 else
1480 emit(MUL(result_dst, op[0], op[1]));
1481 } else {
1482 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
1483
1484 emit(MUL(acc, op[0], op[1]));
1485 emit(MACH(dst_null_d(), op[0], op[1]));
1486 emit(MOV(result_dst, src_reg(acc)));
1487 }
1488 } else {
1489 emit(MUL(result_dst, op[0], op[1]));
1490 }
1491 break;
1492 case ir_binop_div:
1493 /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
1494 assert(ir->type->is_integer());
1495 emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
1496 break;
1497 case ir_binop_mod:
1498 /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
1499 assert(ir->type->is_integer());
1500 emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
1501 break;
1502
1503 case ir_binop_less:
1504 case ir_binop_greater:
1505 case ir_binop_lequal:
1506 case ir_binop_gequal:
1507 case ir_binop_equal:
1508 case ir_binop_nequal: {
1509 emit(CMP(result_dst, op[0], op[1],
1510 brw_conditional_for_comparison(ir->operation)));
1511 emit(AND(result_dst, result_src, src_reg(0x1)));
1512 break;
1513 }
1514
1515 case ir_binop_all_equal:
1516 /* "==" operator producing a scalar boolean. */
1517 if (ir->operands[0]->type->is_vector() ||
1518 ir->operands[1]->type->is_vector()) {
1519 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
1520 emit(MOV(result_dst, src_reg(0)));
1521 inst = emit(MOV(result_dst, src_reg(1)));
1522 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
1523 } else {
1524 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
1525 emit(AND(result_dst, result_src, src_reg(0x1)));
1526 }
1527 break;
1528 case ir_binop_any_nequal:
1529 /* "!=" operator producing a scalar boolean. */
1530 if (ir->operands[0]->type->is_vector() ||
1531 ir->operands[1]->type->is_vector()) {
1532 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
1533
1534 emit(MOV(result_dst, src_reg(0)));
1535 inst = emit(MOV(result_dst, src_reg(1)));
1536 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1537 } else {
1538 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
1539 emit(AND(result_dst, result_src, src_reg(0x1)));
1540 }
1541 break;
1542
1543 case ir_unop_any:
1544 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1545 emit(MOV(result_dst, src_reg(0)));
1546
1547 inst = emit(MOV(result_dst, src_reg(1)));
1548 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1549 break;
1550
1551 case ir_binop_logic_xor:
1552 emit(XOR(result_dst, op[0], op[1]));
1553 break;
1554
1555 case ir_binop_logic_or:
1556 emit(OR(result_dst, op[0], op[1]));
1557 break;
1558
1559 case ir_binop_logic_and:
1560 emit(AND(result_dst, op[0], op[1]));
1561 break;
1562
1563 case ir_binop_dot:
1564 assert(ir->operands[0]->type->is_vector());
1565 assert(ir->operands[0]->type == ir->operands[1]->type);
1566 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1567 break;
1568
1569 case ir_unop_sqrt:
1570 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1571 break;
1572 case ir_unop_rsq:
1573 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1574 break;
1575
1576 case ir_unop_bitcast_i2f:
1577 case ir_unop_bitcast_u2f:
1578 this->result = op[0];
1579 this->result.type = BRW_REGISTER_TYPE_F;
1580 break;
1581
1582 case ir_unop_bitcast_f2i:
1583 this->result = op[0];
1584 this->result.type = BRW_REGISTER_TYPE_D;
1585 break;
1586
1587 case ir_unop_bitcast_f2u:
1588 this->result = op[0];
1589 this->result.type = BRW_REGISTER_TYPE_UD;
1590 break;
1591
1592 case ir_unop_i2f:
1593 case ir_unop_i2u:
1594 case ir_unop_u2i:
1595 case ir_unop_u2f:
1596 case ir_unop_b2f:
1597 case ir_unop_b2i:
1598 case ir_unop_f2i:
1599 case ir_unop_f2u:
1600 emit(MOV(result_dst, op[0]));
1601 break;
1602 case ir_unop_f2b:
1603 case ir_unop_i2b: {
1604 emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1605 emit(AND(result_dst, result_src, src_reg(1)));
1606 break;
1607 }
1608
1609 case ir_unop_trunc:
1610 emit(RNDZ(result_dst, op[0]));
1611 break;
1612 case ir_unop_ceil:
1613 op[0].negate = !op[0].negate;
1614 inst = emit(RNDD(result_dst, op[0]));
1615 this->result.negate = true;
1616 break;
1617 case ir_unop_floor:
1618 inst = emit(RNDD(result_dst, op[0]));
1619 break;
1620 case ir_unop_fract:
1621 inst = emit(FRC(result_dst, op[0]));
1622 break;
1623 case ir_unop_round_even:
1624 emit(RNDE(result_dst, op[0]));
1625 break;
1626
1627 case ir_binop_min:
1628 emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]);
1629 break;
1630 case ir_binop_max:
1631 emit_minmax(BRW_CONDITIONAL_G, result_dst, op[0], op[1]);
1632 break;
1633
1634 case ir_binop_pow:
1635 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1636 break;
1637
1638 case ir_unop_bit_not:
1639 inst = emit(NOT(result_dst, op[0]));
1640 break;
1641 case ir_binop_bit_and:
1642 inst = emit(AND(result_dst, op[0], op[1]));
1643 break;
1644 case ir_binop_bit_xor:
1645 inst = emit(XOR(result_dst, op[0], op[1]));
1646 break;
1647 case ir_binop_bit_or:
1648 inst = emit(OR(result_dst, op[0], op[1]));
1649 break;
1650
1651 case ir_binop_lshift:
1652 inst = emit(SHL(result_dst, op[0], op[1]));
1653 break;
1654
1655 case ir_binop_rshift:
1656 if (ir->type->base_type == GLSL_TYPE_INT)
1657 inst = emit(ASR(result_dst, op[0], op[1]));
1658 else
1659 inst = emit(SHR(result_dst, op[0], op[1]));
1660 break;
1661
1662 case ir_binop_bfm:
1663 emit(BFI1(result_dst, op[0], op[1]));
1664 break;
1665
1666 case ir_binop_ubo_load: {
1667 ir_constant *uniform_block = ir->operands[0]->as_constant();
1668 ir_constant *const_offset_ir = ir->operands[1]->as_constant();
1669 unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
1670 src_reg offset = op[1];
1671
1672 /* Now, load the vector from that offset. */
1673 assert(ir->type->is_vector() || ir->type->is_scalar());
1674
1675 src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
1676 packed_consts.type = result.type;
1677 src_reg surf_index =
1678 src_reg(SURF_INDEX_VEC4_UBO(uniform_block->value.u[0]));
1679 if (const_offset_ir) {
1680 offset = src_reg(const_offset / 16);
1681 } else {
1682 emit(SHR(dst_reg(offset), offset, src_reg(4)));
1683 }
1684
1685 vec4_instruction *pull =
1686 emit(new(mem_ctx) vec4_instruction(this,
1687 VS_OPCODE_PULL_CONSTANT_LOAD,
1688 dst_reg(packed_consts),
1689 surf_index,
1690 offset));
1691 pull->base_mrf = 14;
1692 pull->mlen = 1;
1693
1694 packed_consts.swizzle = swizzle_for_size(ir->type->vector_elements);
1695 packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4,
1696 const_offset % 16 / 4,
1697 const_offset % 16 / 4,
1698 const_offset % 16 / 4);
1699
1700 /* UBO bools are any nonzero int. We store bools as either 0 or 1. */
1701 if (ir->type->base_type == GLSL_TYPE_BOOL) {
1702 emit(CMP(result_dst, packed_consts, src_reg(0u),
1703 BRW_CONDITIONAL_NZ));
1704 emit(AND(result_dst, result, src_reg(0x1)));
1705 } else {
1706 emit(MOV(result_dst, packed_consts));
1707 }
1708 break;
1709 }
1710
1711 case ir_binop_vector_extract:
1712 assert(!"should have been lowered by vec_index_to_cond_assign");
1713 break;
1714
1715 case ir_triop_fma:
1716 op[0] = fix_3src_operand(op[0]);
1717 op[1] = fix_3src_operand(op[1]);
1718 op[2] = fix_3src_operand(op[2]);
1719 /* Note that the instruction's argument order is reversed from GLSL
1720 * and the IR.
1721 */
1722 emit(MAD(result_dst, op[2], op[1], op[0]));
1723 break;
1724
1725 case ir_triop_lrp:
1726 op[0] = fix_3src_operand(op[0]);
1727 op[1] = fix_3src_operand(op[1]);
1728 op[2] = fix_3src_operand(op[2]);
1729 /* Note that the instruction's argument order is reversed from GLSL
1730 * and the IR.
1731 */
1732 emit(LRP(result_dst, op[2], op[1], op[0]));
1733 break;
1734
1735 case ir_triop_bfi:
1736 op[0] = fix_3src_operand(op[0]);
1737 op[1] = fix_3src_operand(op[1]);
1738 op[2] = fix_3src_operand(op[2]);
1739 emit(BFI2(result_dst, op[0], op[1], op[2]));
1740 break;
1741
1742 case ir_triop_bitfield_extract:
1743 op[0] = fix_3src_operand(op[0]);
1744 op[1] = fix_3src_operand(op[1]);
1745 op[2] = fix_3src_operand(op[2]);
1746 /* Note that the instruction's argument order is reversed from GLSL
1747 * and the IR.
1748 */
1749 emit(BFE(result_dst, op[2], op[1], op[0]));
1750 break;
1751
1752 case ir_triop_vector_insert:
1753 assert(!"should have been lowered by lower_vector_insert");
1754 break;
1755
1756 case ir_quadop_bitfield_insert:
1757 assert(!"not reached: should be handled by "
1758 "bitfield_insert_to_bfm_bfi\n");
1759 break;
1760
1761 case ir_quadop_vector:
1762 assert(!"not reached: should be handled by lower_quadop_vector");
1763 break;
1764
1765 case ir_unop_pack_half_2x16:
1766 emit_pack_half_2x16(result_dst, op[0]);
1767 break;
1768 case ir_unop_unpack_half_2x16:
1769 emit_unpack_half_2x16(result_dst, op[0]);
1770 break;
1771 case ir_unop_pack_snorm_2x16:
1772 case ir_unop_pack_snorm_4x8:
1773 case ir_unop_pack_unorm_2x16:
1774 case ir_unop_pack_unorm_4x8:
1775 case ir_unop_unpack_snorm_2x16:
1776 case ir_unop_unpack_snorm_4x8:
1777 case ir_unop_unpack_unorm_2x16:
1778 case ir_unop_unpack_unorm_4x8:
1779 assert(!"not reached: should be handled by lower_packing_builtins");
1780 break;
1781 case ir_unop_unpack_half_2x16_split_x:
1782 case ir_unop_unpack_half_2x16_split_y:
1783 case ir_binop_pack_half_2x16_split:
1784 assert(!"not reached: should not occur in vertex shader");
1785 break;
1786 }
1787 }
1788
1789
1790 void
1791 vec4_visitor::visit(ir_swizzle *ir)
1792 {
1793 src_reg src;
1794 int i = 0;
1795 int swizzle[4];
1796
1797 /* Note that this is only swizzles in expressions, not those on the left
1798 * hand side of an assignment, which do write masking. See ir_assignment
1799 * for that.
1800 */
1801
1802 ir->val->accept(this);
1803 src = this->result;
1804 assert(src.file != BAD_FILE);
1805
1806 for (i = 0; i < ir->type->vector_elements; i++) {
1807 switch (i) {
1808 case 0:
1809 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1810 break;
1811 case 1:
1812 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1813 break;
1814 case 2:
1815 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1816 break;
1817 case 3:
1818 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1819 break;
1820 }
1821 }
1822 for (; i < 4; i++) {
1823 /* Replicate the last channel out. */
1824 swizzle[i] = swizzle[ir->type->vector_elements - 1];
1825 }
1826
1827 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1828
1829 this->result = src;
1830 }
1831
1832 void
1833 vec4_visitor::visit(ir_dereference_variable *ir)
1834 {
1835 const struct glsl_type *type = ir->type;
1836 dst_reg *reg = variable_storage(ir->var);
1837
1838 if (!reg) {
1839 fail("Failed to find variable storage for %s\n", ir->var->name);
1840 this->result = src_reg(brw_null_reg());
1841 return;
1842 }
1843
1844 this->result = src_reg(*reg);
1845
1846 /* System values get their swizzle from the dst_reg writemask */
1847 if (ir->var->mode == ir_var_system_value)
1848 return;
1849
1850 if (type->is_scalar() || type->is_vector() || type->is_matrix())
1851 this->result.swizzle = swizzle_for_size(type->vector_elements);
1852 }
1853
1854
1855 int
1856 vec4_visitor::compute_array_stride(ir_dereference_array *ir)
1857 {
1858 /* Under normal circumstances array elements are stored consecutively, so
1859 * the stride is equal to the size of the array element.
1860 */
1861 return type_size(ir->type);
1862 }
1863
1864
1865 void
1866 vec4_visitor::visit(ir_dereference_array *ir)
1867 {
1868 ir_constant *constant_index;
1869 src_reg src;
1870 int array_stride = compute_array_stride(ir);
1871
1872 constant_index = ir->array_index->constant_expression_value();
1873
1874 ir->array->accept(this);
1875 src = this->result;
1876
1877 if (constant_index) {
1878 src.reg_offset += constant_index->value.i[0] * array_stride;
1879 } else {
1880 /* Variable index array dereference. It eats the "vec4" of the
1881 * base of the array and an index that offsets the Mesa register
1882 * index.
1883 */
1884 ir->array_index->accept(this);
1885
1886 src_reg index_reg;
1887
1888 if (array_stride == 1) {
1889 index_reg = this->result;
1890 } else {
1891 index_reg = src_reg(this, glsl_type::int_type);
1892
1893 emit(MUL(dst_reg(index_reg), this->result, src_reg(array_stride)));
1894 }
1895
1896 if (src.reladdr) {
1897 src_reg temp = src_reg(this, glsl_type::int_type);
1898
1899 emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
1900
1901 index_reg = temp;
1902 }
1903
1904 src.reladdr = ralloc(mem_ctx, src_reg);
1905 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1906 }
1907
1908 /* If the type is smaller than a vec4, replicate the last channel out. */
1909 if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1910 src.swizzle = swizzle_for_size(ir->type->vector_elements);
1911 else
1912 src.swizzle = BRW_SWIZZLE_NOOP;
1913 src.type = brw_type_for_base_type(ir->type);
1914
1915 this->result = src;
1916 }
1917
1918 void
1919 vec4_visitor::visit(ir_dereference_record *ir)
1920 {
1921 unsigned int i;
1922 const glsl_type *struct_type = ir->record->type;
1923 int offset = 0;
1924
1925 ir->record->accept(this);
1926
1927 for (i = 0; i < struct_type->length; i++) {
1928 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1929 break;
1930 offset += type_size(struct_type->fields.structure[i].type);
1931 }
1932
1933 /* If the type is smaller than a vec4, replicate the last channel out. */
1934 if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1935 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1936 else
1937 this->result.swizzle = BRW_SWIZZLE_NOOP;
1938 this->result.type = brw_type_for_base_type(ir->type);
1939
1940 this->result.reg_offset += offset;
1941 }
1942
1943 /**
1944 * We want to be careful in assignment setup to hit the actual storage
1945 * instead of potentially using a temporary like we might with the
1946 * ir_dereference handler.
1947 */
1948 static dst_reg
1949 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1950 {
1951 /* The LHS must be a dereference. If the LHS is a variable indexed array
1952 * access of a vector, it must be separated into a series conditional moves
1953 * before reaching this point (see ir_vec_index_to_cond_assign).
1954 */
1955 assert(ir->as_dereference());
1956 ir_dereference_array *deref_array = ir->as_dereference_array();
1957 if (deref_array) {
1958 assert(!deref_array->array->type->is_vector());
1959 }
1960
1961 /* Use the rvalue deref handler for the most part. We'll ignore
1962 * swizzles in it and write swizzles using writemask, though.
1963 */
1964 ir->accept(v);
1965 return dst_reg(v->result);
1966 }
1967
1968 void
1969 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1970 const struct glsl_type *type, uint32_t predicate)
1971 {
1972 if (type->base_type == GLSL_TYPE_STRUCT) {
1973 for (unsigned int i = 0; i < type->length; i++) {
1974 emit_block_move(dst, src, type->fields.structure[i].type, predicate);
1975 }
1976 return;
1977 }
1978
1979 if (type->is_array()) {
1980 for (unsigned int i = 0; i < type->length; i++) {
1981 emit_block_move(dst, src, type->fields.array, predicate);
1982 }
1983 return;
1984 }
1985
1986 if (type->is_matrix()) {
1987 const struct glsl_type *vec_type;
1988
1989 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1990 type->vector_elements, 1);
1991
1992 for (int i = 0; i < type->matrix_columns; i++) {
1993 emit_block_move(dst, src, vec_type, predicate);
1994 }
1995 return;
1996 }
1997
1998 assert(type->is_scalar() || type->is_vector());
1999
2000 dst->type = brw_type_for_base_type(type);
2001 src->type = dst->type;
2002
2003 dst->writemask = (1 << type->vector_elements) - 1;
2004
2005 src->swizzle = swizzle_for_size(type->vector_elements);
2006
2007 vec4_instruction *inst = emit(MOV(*dst, *src));
2008 inst->predicate = predicate;
2009
2010 dst->reg_offset++;
2011 src->reg_offset++;
2012 }
2013
2014
2015 /* If the RHS processing resulted in an instruction generating a
2016 * temporary value, and it would be easy to rewrite the instruction to
2017 * generate its result right into the LHS instead, do so. This ends
2018 * up reliably removing instructions where it can be tricky to do so
2019 * later without real UD chain information.
2020 */
2021 bool
2022 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
2023 dst_reg dst,
2024 src_reg src,
2025 vec4_instruction *pre_rhs_inst,
2026 vec4_instruction *last_rhs_inst)
2027 {
2028 /* This could be supported, but it would take more smarts. */
2029 if (ir->condition)
2030 return false;
2031
2032 if (pre_rhs_inst == last_rhs_inst)
2033 return false; /* No instructions generated to work with. */
2034
2035 /* Make sure the last instruction generated our source reg. */
2036 if (src.file != GRF ||
2037 src.file != last_rhs_inst->dst.file ||
2038 src.reg != last_rhs_inst->dst.reg ||
2039 src.reg_offset != last_rhs_inst->dst.reg_offset ||
2040 src.reladdr ||
2041 src.abs ||
2042 src.negate ||
2043 last_rhs_inst->predicate != BRW_PREDICATE_NONE)
2044 return false;
2045
2046 /* Check that that last instruction fully initialized the channels
2047 * we want to use, in the order we want to use them. We could
2048 * potentially reswizzle the operands of many instructions so that
2049 * we could handle out of order channels, but don't yet.
2050 */
2051
2052 for (unsigned i = 0; i < 4; i++) {
2053 if (dst.writemask & (1 << i)) {
2054 if (!(last_rhs_inst->dst.writemask & (1 << i)))
2055 return false;
2056
2057 if (BRW_GET_SWZ(src.swizzle, i) != i)
2058 return false;
2059 }
2060 }
2061
2062 /* Success! Rewrite the instruction. */
2063 last_rhs_inst->dst.file = dst.file;
2064 last_rhs_inst->dst.reg = dst.reg;
2065 last_rhs_inst->dst.reg_offset = dst.reg_offset;
2066 last_rhs_inst->dst.reladdr = dst.reladdr;
2067 last_rhs_inst->dst.writemask &= dst.writemask;
2068
2069 return true;
2070 }
2071
2072 void
2073 vec4_visitor::visit(ir_assignment *ir)
2074 {
2075 dst_reg dst = get_assignment_lhs(ir->lhs, this);
2076 uint32_t predicate = BRW_PREDICATE_NONE;
2077
2078 if (!ir->lhs->type->is_scalar() &&
2079 !ir->lhs->type->is_vector()) {
2080 ir->rhs->accept(this);
2081 src_reg src = this->result;
2082
2083 if (ir->condition) {
2084 emit_bool_to_cond_code(ir->condition, &predicate);
2085 }
2086
2087 /* emit_block_move doesn't account for swizzles in the source register.
2088 * This should be ok, since the source register is a structure or an
2089 * array, and those can't be swizzled. But double-check to be sure.
2090 */
2091 assert(src.swizzle ==
2092 (ir->rhs->type->is_matrix()
2093 ? swizzle_for_size(ir->rhs->type->vector_elements)
2094 : BRW_SWIZZLE_NOOP));
2095
2096 emit_block_move(&dst, &src, ir->rhs->type, predicate);
2097 return;
2098 }
2099
2100 /* Now we're down to just a scalar/vector with writemasks. */
2101 int i;
2102
2103 vec4_instruction *pre_rhs_inst, *last_rhs_inst;
2104 pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
2105
2106 ir->rhs->accept(this);
2107
2108 last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
2109
2110 src_reg src = this->result;
2111
2112 int swizzles[4];
2113 int first_enabled_chan = 0;
2114 int src_chan = 0;
2115
2116 assert(ir->lhs->type->is_vector() ||
2117 ir->lhs->type->is_scalar());
2118 dst.writemask = ir->write_mask;
2119
2120 for (int i = 0; i < 4; i++) {
2121 if (dst.writemask & (1 << i)) {
2122 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
2123 break;
2124 }
2125 }
2126
2127 /* Swizzle a small RHS vector into the channels being written.
2128 *
2129 * glsl ir treats write_mask as dictating how many channels are
2130 * present on the RHS while in our instructions we need to make
2131 * those channels appear in the slots of the vec4 they're written to.
2132 */
2133 for (int i = 0; i < 4; i++) {
2134 if (dst.writemask & (1 << i))
2135 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
2136 else
2137 swizzles[i] = first_enabled_chan;
2138 }
2139 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
2140 swizzles[2], swizzles[3]);
2141
2142 if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
2143 return;
2144 }
2145
2146 if (ir->condition) {
2147 emit_bool_to_cond_code(ir->condition, &predicate);
2148 }
2149
2150 for (i = 0; i < type_size(ir->lhs->type); i++) {
2151 vec4_instruction *inst = emit(MOV(dst, src));
2152 inst->predicate = predicate;
2153
2154 dst.reg_offset++;
2155 src.reg_offset++;
2156 }
2157 }
2158
2159 void
2160 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
2161 {
2162 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
2163 foreach_list(node, &ir->components) {
2164 ir_constant *field_value = (ir_constant *)node;
2165
2166 emit_constant_values(dst, field_value);
2167 }
2168 return;
2169 }
2170
2171 if (ir->type->is_array()) {
2172 for (unsigned int i = 0; i < ir->type->length; i++) {
2173 emit_constant_values(dst, ir->array_elements[i]);
2174 }
2175 return;
2176 }
2177
2178 if (ir->type->is_matrix()) {
2179 for (int i = 0; i < ir->type->matrix_columns; i++) {
2180 float *vec = &ir->value.f[i * ir->type->vector_elements];
2181
2182 for (int j = 0; j < ir->type->vector_elements; j++) {
2183 dst->writemask = 1 << j;
2184 dst->type = BRW_REGISTER_TYPE_F;
2185
2186 emit(MOV(*dst, src_reg(vec[j])));
2187 }
2188 dst->reg_offset++;
2189 }
2190 return;
2191 }
2192
2193 int remaining_writemask = (1 << ir->type->vector_elements) - 1;
2194
2195 for (int i = 0; i < ir->type->vector_elements; i++) {
2196 if (!(remaining_writemask & (1 << i)))
2197 continue;
2198
2199 dst->writemask = 1 << i;
2200 dst->type = brw_type_for_base_type(ir->type);
2201
2202 /* Find other components that match the one we're about to
2203 * write. Emits fewer instructions for things like vec4(0.5,
2204 * 1.5, 1.5, 1.5).
2205 */
2206 for (int j = i + 1; j < ir->type->vector_elements; j++) {
2207 if (ir->type->base_type == GLSL_TYPE_BOOL) {
2208 if (ir->value.b[i] == ir->value.b[j])
2209 dst->writemask |= (1 << j);
2210 } else {
2211 /* u, i, and f storage all line up, so no need for a
2212 * switch case for comparing each type.
2213 */
2214 if (ir->value.u[i] == ir->value.u[j])
2215 dst->writemask |= (1 << j);
2216 }
2217 }
2218
2219 switch (ir->type->base_type) {
2220 case GLSL_TYPE_FLOAT:
2221 emit(MOV(*dst, src_reg(ir->value.f[i])));
2222 break;
2223 case GLSL_TYPE_INT:
2224 emit(MOV(*dst, src_reg(ir->value.i[i])));
2225 break;
2226 case GLSL_TYPE_UINT:
2227 emit(MOV(*dst, src_reg(ir->value.u[i])));
2228 break;
2229 case GLSL_TYPE_BOOL:
2230 emit(MOV(*dst, src_reg(ir->value.b[i])));
2231 break;
2232 default:
2233 assert(!"Non-float/uint/int/bool constant");
2234 break;
2235 }
2236
2237 remaining_writemask &= ~dst->writemask;
2238 }
2239 dst->reg_offset++;
2240 }
2241
2242 void
2243 vec4_visitor::visit(ir_constant *ir)
2244 {
2245 dst_reg dst = dst_reg(this, ir->type);
2246 this->result = src_reg(dst);
2247
2248 emit_constant_values(&dst, ir);
2249 }
2250
2251 void
2252 vec4_visitor::visit(ir_call *ir)
2253 {
2254 assert(!"not reached");
2255 }
2256
2257 void
2258 vec4_visitor::visit(ir_texture *ir)
2259 {
2260 int sampler =
2261 _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog);
2262
2263 /* Should be lowered by do_lower_texture_projection */
2264 assert(!ir->projector);
2265
2266 /* Generate code to compute all the subexpression trees. This has to be
2267 * done before loading any values into MRFs for the sampler message since
2268 * generating these values may involve SEND messages that need the MRFs.
2269 */
2270 src_reg coordinate;
2271 if (ir->coordinate) {
2272 ir->coordinate->accept(this);
2273 coordinate = this->result;
2274 }
2275
2276 src_reg shadow_comparitor;
2277 if (ir->shadow_comparitor) {
2278 ir->shadow_comparitor->accept(this);
2279 shadow_comparitor = this->result;
2280 }
2281
2282 const glsl_type *lod_type = NULL, *sample_index_type = NULL;
2283 src_reg lod, dPdx, dPdy, sample_index;
2284 switch (ir->op) {
2285 case ir_tex:
2286 lod = src_reg(0.0f);
2287 lod_type = glsl_type::float_type;
2288 break;
2289 case ir_txf:
2290 case ir_txl:
2291 case ir_txs:
2292 ir->lod_info.lod->accept(this);
2293 lod = this->result;
2294 lod_type = ir->lod_info.lod->type;
2295 break;
2296 case ir_txf_ms:
2297 ir->lod_info.sample_index->accept(this);
2298 sample_index = this->result;
2299 sample_index_type = ir->lod_info.sample_index->type;
2300 break;
2301 case ir_txd:
2302 ir->lod_info.grad.dPdx->accept(this);
2303 dPdx = this->result;
2304
2305 ir->lod_info.grad.dPdy->accept(this);
2306 dPdy = this->result;
2307
2308 lod_type = ir->lod_info.grad.dPdx->type;
2309 break;
2310 case ir_txb:
2311 case ir_lod:
2312 break;
2313 }
2314
2315 vec4_instruction *inst = NULL;
2316 switch (ir->op) {
2317 case ir_tex:
2318 case ir_txl:
2319 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXL);
2320 break;
2321 case ir_txd:
2322 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXD);
2323 break;
2324 case ir_txf:
2325 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF);
2326 break;
2327 case ir_txf_ms:
2328 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF_MS);
2329 break;
2330 case ir_txs:
2331 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXS);
2332 break;
2333 case ir_txb:
2334 assert(!"TXB is not valid for vertex shaders.");
2335 break;
2336 case ir_lod:
2337 assert(!"LOD is not valid for vertex shaders.");
2338 break;
2339 }
2340
2341 bool use_texture_offset = ir->offset != NULL && ir->op != ir_txf;
2342
2343 /* Texel offsets go in the message header; Gen4 also requires headers. */
2344 inst->header_present = use_texture_offset || brw->gen < 5;
2345 inst->base_mrf = 2;
2346 inst->mlen = inst->header_present + 1; /* always at least one */
2347 inst->sampler = sampler;
2348 inst->dst = dst_reg(this, ir->type);
2349 inst->dst.writemask = WRITEMASK_XYZW;
2350 inst->shadow_compare = ir->shadow_comparitor != NULL;
2351
2352 if (use_texture_offset)
2353 inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
2354
2355 /* MRF for the first parameter */
2356 int param_base = inst->base_mrf + inst->header_present;
2357
2358 if (ir->op == ir_txs) {
2359 int writemask = brw->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
2360 emit(MOV(dst_reg(MRF, param_base, lod_type, writemask), lod));
2361 } else {
2362 int i, coord_mask = 0, zero_mask = 0;
2363 /* Load the coordinate */
2364 /* FINISHME: gl_clamp_mask and saturate */
2365 for (i = 0; i < ir->coordinate->type->vector_elements; i++)
2366 coord_mask |= (1 << i);
2367 for (; i < 4; i++)
2368 zero_mask |= (1 << i);
2369
2370 if (ir->offset && ir->op == ir_txf) {
2371 /* It appears that the ld instruction used for txf does its
2372 * address bounds check before adding in the offset. To work
2373 * around this, just add the integer offset to the integer
2374 * texel coordinate, and don't put the offset in the header.
2375 */
2376 ir_constant *offset = ir->offset->as_constant();
2377 assert(offset);
2378
2379 for (int j = 0; j < ir->coordinate->type->vector_elements; j++) {
2380 src_reg src = coordinate;
2381 src.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(src.swizzle, j),
2382 BRW_GET_SWZ(src.swizzle, j),
2383 BRW_GET_SWZ(src.swizzle, j),
2384 BRW_GET_SWZ(src.swizzle, j));
2385 emit(ADD(dst_reg(MRF, param_base, ir->coordinate->type, 1 << j),
2386 src, offset->value.i[j]));
2387 }
2388 } else {
2389 emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
2390 coordinate));
2391 }
2392 emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
2393 src_reg(0)));
2394 /* Load the shadow comparitor */
2395 if (ir->shadow_comparitor && ir->op != ir_txd) {
2396 emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
2397 WRITEMASK_X),
2398 shadow_comparitor));
2399 inst->mlen++;
2400 }
2401
2402 /* Load the LOD info */
2403 if (ir->op == ir_tex || ir->op == ir_txl) {
2404 int mrf, writemask;
2405 if (brw->gen >= 5) {
2406 mrf = param_base + 1;
2407 if (ir->shadow_comparitor) {
2408 writemask = WRITEMASK_Y;
2409 /* mlen already incremented */
2410 } else {
2411 writemask = WRITEMASK_X;
2412 inst->mlen++;
2413 }
2414 } else /* brw->gen == 4 */ {
2415 mrf = param_base;
2416 writemask = WRITEMASK_W;
2417 }
2418 emit(MOV(dst_reg(MRF, mrf, lod_type, writemask), lod));
2419 } else if (ir->op == ir_txf) {
2420 emit(MOV(dst_reg(MRF, param_base, lod_type, WRITEMASK_W), lod));
2421 } else if (ir->op == ir_txf_ms) {
2422 emit(MOV(dst_reg(MRF, param_base + 1, sample_index_type, WRITEMASK_X),
2423 sample_index));
2424 inst->mlen++;
2425
2426 /* on Gen7, there is an additional MCS parameter here after SI,
2427 * but we don't bother to emit it since it's always zero. If
2428 * we start supporting texturing from CMS surfaces, this will have
2429 * to change
2430 */
2431 } else if (ir->op == ir_txd) {
2432 const glsl_type *type = lod_type;
2433
2434 if (brw->gen >= 5) {
2435 dPdx.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
2436 dPdy.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
2437 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), dPdx));
2438 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), dPdy));
2439 inst->mlen++;
2440
2441 if (ir->type->vector_elements == 3 || ir->shadow_comparitor) {
2442 dPdx.swizzle = BRW_SWIZZLE_ZZZZ;
2443 dPdy.swizzle = BRW_SWIZZLE_ZZZZ;
2444 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), dPdx));
2445 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), dPdy));
2446 inst->mlen++;
2447
2448 if (ir->shadow_comparitor) {
2449 emit(MOV(dst_reg(MRF, param_base + 2,
2450 ir->shadow_comparitor->type, WRITEMASK_Z),
2451 shadow_comparitor));
2452 }
2453 }
2454 } else /* brw->gen == 4 */ {
2455 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), dPdx));
2456 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), dPdy));
2457 inst->mlen += 2;
2458 }
2459 }
2460 }
2461
2462 emit(inst);
2463
2464 /* fixup num layers (z) for cube arrays: hardware returns faces * layers;
2465 * spec requires layers.
2466 */
2467 if (ir->op == ir_txs) {
2468 glsl_type const *type = ir->sampler->type;
2469 if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
2470 type->sampler_array) {
2471 emit_math(SHADER_OPCODE_INT_QUOTIENT,
2472 with_writemask(inst->dst, WRITEMASK_Z),
2473 src_reg(inst->dst), src_reg(6));
2474 }
2475 }
2476
2477 swizzle_result(ir, src_reg(inst->dst), sampler);
2478 }
2479
2480 void
2481 vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, int sampler)
2482 {
2483 int s = key->tex.swizzles[sampler];
2484
2485 this->result = src_reg(this, ir->type);
2486 dst_reg swizzled_result(this->result);
2487
2488 if (ir->op == ir_txs || ir->type == glsl_type::float_type
2489 || s == SWIZZLE_NOOP) {
2490 emit(MOV(swizzled_result, orig_val));
2491 return;
2492 }
2493
2494 int zero_mask = 0, one_mask = 0, copy_mask = 0;
2495 int swizzle[4] = {0};
2496
2497 for (int i = 0; i < 4; i++) {
2498 switch (GET_SWZ(s, i)) {
2499 case SWIZZLE_ZERO:
2500 zero_mask |= (1 << i);
2501 break;
2502 case SWIZZLE_ONE:
2503 one_mask |= (1 << i);
2504 break;
2505 default:
2506 copy_mask |= (1 << i);
2507 swizzle[i] = GET_SWZ(s, i);
2508 break;
2509 }
2510 }
2511
2512 if (copy_mask) {
2513 orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
2514 swizzled_result.writemask = copy_mask;
2515 emit(MOV(swizzled_result, orig_val));
2516 }
2517
2518 if (zero_mask) {
2519 swizzled_result.writemask = zero_mask;
2520 emit(MOV(swizzled_result, src_reg(0.0f)));
2521 }
2522
2523 if (one_mask) {
2524 swizzled_result.writemask = one_mask;
2525 emit(MOV(swizzled_result, src_reg(1.0f)));
2526 }
2527 }
2528
2529 void
2530 vec4_visitor::visit(ir_return *ir)
2531 {
2532 assert(!"not reached");
2533 }
2534
2535 void
2536 vec4_visitor::visit(ir_discard *ir)
2537 {
2538 assert(!"not reached");
2539 }
2540
2541 void
2542 vec4_visitor::visit(ir_if *ir)
2543 {
2544 /* Don't point the annotation at the if statement, because then it plus
2545 * the then and else blocks get printed.
2546 */
2547 this->base_ir = ir->condition;
2548
2549 if (brw->gen == 6) {
2550 emit_if_gen6(ir);
2551 } else {
2552 uint32_t predicate;
2553 emit_bool_to_cond_code(ir->condition, &predicate);
2554 emit(IF(predicate));
2555 }
2556
2557 visit_instructions(&ir->then_instructions);
2558
2559 if (!ir->else_instructions.is_empty()) {
2560 this->base_ir = ir->condition;
2561 emit(BRW_OPCODE_ELSE);
2562
2563 visit_instructions(&ir->else_instructions);
2564 }
2565
2566 this->base_ir = ir->condition;
2567 emit(BRW_OPCODE_ENDIF);
2568 }
2569
2570 void
2571 vec4_visitor::visit(ir_emit_vertex *)
2572 {
2573 assert(!"not reached");
2574 }
2575
2576 void
2577 vec4_visitor::visit(ir_end_primitive *)
2578 {
2579 assert(!"not reached");
2580 }
2581
2582 void
2583 vec4_visitor::emit_ndc_computation()
2584 {
2585 /* Get the position */
2586 src_reg pos = src_reg(output_reg[VARYING_SLOT_POS]);
2587
2588 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
2589 dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
2590 output_reg[BRW_VARYING_SLOT_NDC] = ndc;
2591
2592 current_annotation = "NDC";
2593 dst_reg ndc_w = ndc;
2594 ndc_w.writemask = WRITEMASK_W;
2595 src_reg pos_w = pos;
2596 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
2597 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
2598
2599 dst_reg ndc_xyz = ndc;
2600 ndc_xyz.writemask = WRITEMASK_XYZ;
2601
2602 emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
2603 }
2604
2605 void
2606 vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
2607 {
2608 if (brw->gen < 6 &&
2609 ((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) ||
2610 key->userclip_active || brw->has_negative_rhw_bug)) {
2611 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
2612 dst_reg header1_w = header1;
2613 header1_w.writemask = WRITEMASK_W;
2614
2615 emit(MOV(header1, 0u));
2616
2617 if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
2618 src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]);
2619
2620 current_annotation = "Point size";
2621 emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
2622 emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
2623 }
2624
2625 if (key->userclip_active) {
2626 current_annotation = "Clipping flags";
2627 dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
2628 dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
2629
2630 emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), src_reg(0.0f), BRW_CONDITIONAL_L));
2631 emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, src_reg(0));
2632 emit(OR(header1_w, src_reg(header1_w), src_reg(flags0)));
2633
2634 emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), src_reg(0.0f), BRW_CONDITIONAL_L));
2635 emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, src_reg(0));
2636 emit(SHL(flags1, src_reg(flags1), src_reg(4)));
2637 emit(OR(header1_w, src_reg(header1_w), src_reg(flags1)));
2638 }
2639
2640 /* i965 clipping workaround:
2641 * 1) Test for -ve rhw
2642 * 2) If set,
2643 * set ndc = (0,0,0,0)
2644 * set ucp[6] = 1
2645 *
2646 * Later, clipping will detect ucp[6] and ensure the primitive is
2647 * clipped against all fixed planes.
2648 */
2649 if (brw->has_negative_rhw_bug) {
2650 src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]);
2651 ndc_w.swizzle = BRW_SWIZZLE_WWWW;
2652 emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L));
2653 vec4_instruction *inst;
2654 inst = emit(OR(header1_w, src_reg(header1_w), src_reg(1u << 6)));
2655 inst->predicate = BRW_PREDICATE_NORMAL;
2656 inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f)));
2657 inst->predicate = BRW_PREDICATE_NORMAL;
2658 }
2659
2660 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
2661 } else if (brw->gen < 6) {
2662 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
2663 } else {
2664 emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
2665 if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
2666 emit(MOV(brw_writemask(reg, WRITEMASK_W),
2667 src_reg(output_reg[VARYING_SLOT_PSIZ])));
2668 }
2669 if (prog_data->vue_map.slots_valid & VARYING_BIT_LAYER) {
2670 emit(MOV(retype(brw_writemask(reg, WRITEMASK_Y), BRW_REGISTER_TYPE_D),
2671 src_reg(output_reg[VARYING_SLOT_LAYER])));
2672 }
2673 }
2674 }
2675
2676 void
2677 vec4_visitor::emit_clip_distances(dst_reg reg, int offset)
2678 {
2679 /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
2680 *
2681 * "If a linked set of shaders forming the vertex stage contains no
2682 * static write to gl_ClipVertex or gl_ClipDistance, but the
2683 * application has requested clipping against user clip planes through
2684 * the API, then the coordinate written to gl_Position is used for
2685 * comparison against the user clip planes."
2686 *
2687 * This function is only called if the shader didn't write to
2688 * gl_ClipDistance. Accordingly, we use gl_ClipVertex to perform clipping
2689 * if the user wrote to it; otherwise we use gl_Position.
2690 */
2691 gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX;
2692 if (!(prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) {
2693 clip_vertex = VARYING_SLOT_POS;
2694 }
2695
2696 for (int i = 0; i + offset < key->nr_userclip_plane_consts && i < 4;
2697 ++i) {
2698 reg.writemask = 1 << i;
2699 emit(DP4(reg,
2700 src_reg(output_reg[clip_vertex]),
2701 src_reg(this->userplane[i + offset])));
2702 }
2703 }
2704
2705 void
2706 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
2707 {
2708 assert (varying < VARYING_SLOT_MAX);
2709 reg.type = output_reg[varying].type;
2710 current_annotation = output_reg_annotation[varying];
2711 /* Copy the register, saturating if necessary */
2712 vec4_instruction *inst = emit(MOV(reg,
2713 src_reg(output_reg[varying])));
2714 if ((varying == VARYING_SLOT_COL0 ||
2715 varying == VARYING_SLOT_COL1 ||
2716 varying == VARYING_SLOT_BFC0 ||
2717 varying == VARYING_SLOT_BFC1) &&
2718 key->clamp_vertex_color) {
2719 inst->saturate = true;
2720 }
2721 }
2722
2723 void
2724 vec4_visitor::emit_urb_slot(int mrf, int varying)
2725 {
2726 struct brw_reg hw_reg = brw_message_reg(mrf);
2727 dst_reg reg = dst_reg(MRF, mrf);
2728 reg.type = BRW_REGISTER_TYPE_F;
2729
2730 switch (varying) {
2731 case VARYING_SLOT_PSIZ:
2732 /* PSIZ is always in slot 0, and is coupled with other flags. */
2733 current_annotation = "indices, point width, clip flags";
2734 emit_psiz_and_flags(hw_reg);
2735 break;
2736 case BRW_VARYING_SLOT_NDC:
2737 current_annotation = "NDC";
2738 emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
2739 break;
2740 case VARYING_SLOT_POS:
2741 current_annotation = "gl_Position";
2742 emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
2743 break;
2744 case VARYING_SLOT_EDGE:
2745 /* This is present when doing unfilled polygons. We're supposed to copy
2746 * the edge flag from the user-provided vertex array
2747 * (glEdgeFlagPointer), or otherwise we'll copy from the current value
2748 * of that attribute (starts as 1.0f). This is then used in clipping to
2749 * determine which edges should be drawn as wireframe.
2750 */
2751 current_annotation = "edge flag";
2752 emit(MOV(reg, src_reg(dst_reg(ATTR, VERT_ATTRIB_EDGEFLAG,
2753 glsl_type::float_type, WRITEMASK_XYZW))));
2754 break;
2755 case BRW_VARYING_SLOT_PAD:
2756 /* No need to write to this slot */
2757 break;
2758 default:
2759 emit_generic_urb_slot(reg, varying);
2760 break;
2761 }
2762 }
2763
2764 static int
2765 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
2766 {
2767 if (brw->gen >= 6) {
2768 /* URB data written (does not include the message header reg) must
2769 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
2770 * section 5.4.3.2.2: URB_INTERLEAVED.
2771 *
2772 * URB entries are allocated on a multiple of 1024 bits, so an
2773 * extra 128 bits written here to make the end align to 256 is
2774 * no problem.
2775 */
2776 if ((mlen % 2) != 1)
2777 mlen++;
2778 }
2779
2780 return mlen;
2781 }
2782
2783 void
2784 vec4_vs_visitor::emit_urb_write_header(int mrf)
2785 {
2786 /* No need to do anything for VS; an implied write to this MRF will be
2787 * performed by VS_OPCODE_URB_WRITE.
2788 */
2789 (void) mrf;
2790 }
2791
2792 vec4_instruction *
2793 vec4_vs_visitor::emit_urb_write_opcode(bool complete)
2794 {
2795 /* For VS, the URB writes end the thread. */
2796 if (complete) {
2797 if (INTEL_DEBUG & DEBUG_SHADER_TIME)
2798 emit_shader_time_end();
2799 }
2800
2801 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
2802 inst->urb_write_flags = complete ?
2803 BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS;
2804
2805 return inst;
2806 }
2807
2808 /**
2809 * Generates the VUE payload plus the necessary URB write instructions to
2810 * output it.
2811 *
2812 * The VUE layout is documented in Volume 2a.
2813 */
2814 void
2815 vec4_visitor::emit_vertex()
2816 {
2817 /* MRF 0 is reserved for the debugger, so start with message header
2818 * in MRF 1.
2819 */
2820 int base_mrf = 1;
2821 int mrf = base_mrf;
2822 /* In the process of generating our URB write message contents, we
2823 * may need to unspill a register or load from an array. Those
2824 * reads would use MRFs 14-15.
2825 */
2826 int max_usable_mrf = 13;
2827
2828 /* The following assertion verifies that max_usable_mrf causes an
2829 * even-numbered amount of URB write data, which will meet gen6's
2830 * requirements for length alignment.
2831 */
2832 assert ((max_usable_mrf - base_mrf) % 2 == 0);
2833
2834 /* First mrf is the g0-based message header containing URB handles and
2835 * such.
2836 */
2837 emit_urb_write_header(mrf++);
2838
2839 if (brw->gen < 6) {
2840 emit_ndc_computation();
2841 }
2842
2843 /* Lower legacy ff and ClipVertex clipping to clip distances */
2844 if (key->userclip_active && !key->uses_clip_distance) {
2845 current_annotation = "user clip distances";
2846
2847 output_reg[VARYING_SLOT_CLIP_DIST0] = dst_reg(this, glsl_type::vec4_type);
2848 output_reg[VARYING_SLOT_CLIP_DIST1] = dst_reg(this, glsl_type::vec4_type);
2849
2850 emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST0], 0);
2851 emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1], 4);
2852 }
2853
2854 /* Set up the VUE data for the first URB write */
2855 int slot;
2856 for (slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
2857 emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]);
2858
2859 /* If this was max_usable_mrf, we can't fit anything more into this URB
2860 * WRITE.
2861 */
2862 if (mrf > max_usable_mrf) {
2863 slot++;
2864 break;
2865 }
2866 }
2867
2868 bool complete = slot >= prog_data->vue_map.num_slots;
2869 current_annotation = "URB write";
2870 vec4_instruction *inst = emit_urb_write_opcode(complete);
2871 inst->base_mrf = base_mrf;
2872 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2873
2874 /* Optional second URB write */
2875 if (!complete) {
2876 mrf = base_mrf + 1;
2877
2878 for (; slot < prog_data->vue_map.num_slots; ++slot) {
2879 assert(mrf < max_usable_mrf);
2880
2881 emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]);
2882 }
2883
2884 current_annotation = "URB write";
2885 inst = emit_urb_write_opcode(true /* complete */);
2886 inst->base_mrf = base_mrf;
2887 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2888 /* URB destination offset. In the previous write, we got MRFs
2889 * 2-13 minus the one header MRF, so 12 regs. URB offset is in
2890 * URB row increments, and each of our MRFs is half of one of
2891 * those, since we're doing interleaved writes.
2892 */
2893 inst->offset = (max_usable_mrf - base_mrf) / 2;
2894 }
2895 }
2896
2897 void
2898 vec4_vs_visitor::emit_thread_end()
2899 {
2900 /* For VS, we always end the thread by emitting a single vertex.
2901 * emit_urb_write_opcode() will take care of setting the eot flag on the
2902 * SEND instruction.
2903 */
2904 emit_vertex();
2905 }
2906
2907 src_reg
2908 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
2909 src_reg *reladdr, int reg_offset)
2910 {
2911 /* Because we store the values to scratch interleaved like our
2912 * vertex data, we need to scale the vec4 index by 2.
2913 */
2914 int message_header_scale = 2;
2915
2916 /* Pre-gen6, the message header uses byte offsets instead of vec4
2917 * (16-byte) offset units.
2918 */
2919 if (brw->gen < 6)
2920 message_header_scale *= 16;
2921
2922 if (reladdr) {
2923 src_reg index = src_reg(this, glsl_type::int_type);
2924
2925 emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2926 emit_before(inst, MUL(dst_reg(index),
2927 index, src_reg(message_header_scale)));
2928
2929 return index;
2930 } else {
2931 return src_reg(reg_offset * message_header_scale);
2932 }
2933 }
2934
2935 src_reg
2936 vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
2937 src_reg *reladdr, int reg_offset)
2938 {
2939 if (reladdr) {
2940 src_reg index = src_reg(this, glsl_type::int_type);
2941
2942 emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2943
2944 /* Pre-gen6, the message header uses byte offsets instead of vec4
2945 * (16-byte) offset units.
2946 */
2947 if (brw->gen < 6) {
2948 emit_before(inst, MUL(dst_reg(index), index, src_reg(16)));
2949 }
2950
2951 return index;
2952 } else {
2953 int message_header_scale = brw->gen < 6 ? 16 : 1;
2954 return src_reg(reg_offset * message_header_scale);
2955 }
2956 }
2957
2958 /**
2959 * Emits an instruction before @inst to load the value named by @orig_src
2960 * from scratch space at @base_offset to @temp.
2961 *
2962 * @base_offset is measured in 32-byte units (the size of a register).
2963 */
2964 void
2965 vec4_visitor::emit_scratch_read(vec4_instruction *inst,
2966 dst_reg temp, src_reg orig_src,
2967 int base_offset)
2968 {
2969 int reg_offset = base_offset + orig_src.reg_offset;
2970 src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
2971
2972 emit_before(inst, SCRATCH_READ(temp, index));
2973 }
2974
2975 /**
2976 * Emits an instruction after @inst to store the value to be written
2977 * to @orig_dst to scratch space at @base_offset, from @temp.
2978 *
2979 * @base_offset is measured in 32-byte units (the size of a register).
2980 */
2981 void
2982 vec4_visitor::emit_scratch_write(vec4_instruction *inst, int base_offset)
2983 {
2984 int reg_offset = base_offset + inst->dst.reg_offset;
2985 src_reg index = get_scratch_offset(inst, inst->dst.reladdr, reg_offset);
2986
2987 /* Create a temporary register to store *inst's result in.
2988 *
2989 * We have to be careful in MOVing from our temporary result register in
2990 * the scratch write. If we swizzle from channels of the temporary that
2991 * weren't initialized, it will confuse live interval analysis, which will
2992 * make spilling fail to make progress.
2993 */
2994 src_reg temp = src_reg(this, glsl_type::vec4_type);
2995 temp.type = inst->dst.type;
2996 int first_writemask_chan = ffs(inst->dst.writemask) - 1;
2997 int swizzles[4];
2998 for (int i = 0; i < 4; i++)
2999 if (inst->dst.writemask & (1 << i))
3000 swizzles[i] = i;
3001 else
3002 swizzles[i] = first_writemask_chan;
3003 temp.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
3004 swizzles[2], swizzles[3]);
3005
3006 dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
3007 inst->dst.writemask));
3008 vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
3009 write->predicate = inst->predicate;
3010 write->ir = inst->ir;
3011 write->annotation = inst->annotation;
3012 inst->insert_after(write);
3013
3014 inst->dst.file = temp.file;
3015 inst->dst.reg = temp.reg;
3016 inst->dst.reg_offset = temp.reg_offset;
3017 inst->dst.reladdr = NULL;
3018 }
3019
3020 /**
3021 * We can't generally support array access in GRF space, because a
3022 * single instruction's destination can only span 2 contiguous
3023 * registers. So, we send all GRF arrays that get variable index
3024 * access to scratch space.
3025 */
3026 void
3027 vec4_visitor::move_grf_array_access_to_scratch()
3028 {
3029 int scratch_loc[this->virtual_grf_count];
3030
3031 for (int i = 0; i < this->virtual_grf_count; i++) {
3032 scratch_loc[i] = -1;
3033 }
3034
3035 /* First, calculate the set of virtual GRFs that need to be punted
3036 * to scratch due to having any array access on them, and where in
3037 * scratch.
3038 */
3039 foreach_list(node, &this->instructions) {
3040 vec4_instruction *inst = (vec4_instruction *)node;
3041
3042 if (inst->dst.file == GRF && inst->dst.reladdr &&
3043 scratch_loc[inst->dst.reg] == -1) {
3044 scratch_loc[inst->dst.reg] = c->last_scratch;
3045 c->last_scratch += this->virtual_grf_sizes[inst->dst.reg];
3046 }
3047
3048 for (int i = 0 ; i < 3; i++) {
3049 src_reg *src = &inst->src[i];
3050
3051 if (src->file == GRF && src->reladdr &&
3052 scratch_loc[src->reg] == -1) {
3053 scratch_loc[src->reg] = c->last_scratch;
3054 c->last_scratch += this->virtual_grf_sizes[src->reg];
3055 }
3056 }
3057 }
3058
3059 /* Now, for anything that will be accessed through scratch, rewrite
3060 * it to load/store. Note that this is a _safe list walk, because
3061 * we may generate a new scratch_write instruction after the one
3062 * we're processing.
3063 */
3064 foreach_list_safe(node, &this->instructions) {
3065 vec4_instruction *inst = (vec4_instruction *)node;
3066
3067 /* Set up the annotation tracking for new generated instructions. */
3068 base_ir = inst->ir;
3069 current_annotation = inst->annotation;
3070
3071 if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
3072 emit_scratch_write(inst, scratch_loc[inst->dst.reg]);
3073 }
3074
3075 for (int i = 0 ; i < 3; i++) {
3076 if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
3077 continue;
3078
3079 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
3080
3081 emit_scratch_read(inst, temp, inst->src[i],
3082 scratch_loc[inst->src[i].reg]);
3083
3084 inst->src[i].file = temp.file;
3085 inst->src[i].reg = temp.reg;
3086 inst->src[i].reg_offset = temp.reg_offset;
3087 inst->src[i].reladdr = NULL;
3088 }
3089 }
3090 }
3091
3092 /**
3093 * Emits an instruction before @inst to load the value named by @orig_src
3094 * from the pull constant buffer (surface) at @base_offset to @temp.
3095 */
3096 void
3097 vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
3098 dst_reg temp, src_reg orig_src,
3099 int base_offset)
3100 {
3101 int reg_offset = base_offset + orig_src.reg_offset;
3102 src_reg index = src_reg((unsigned)SURF_INDEX_VEC4_CONST_BUFFER);
3103 src_reg offset = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
3104 vec4_instruction *load;
3105
3106 if (brw->gen >= 7) {
3107 dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
3108 grf_offset.type = offset.type;
3109 emit_before(inst, MOV(grf_offset, offset));
3110
3111 load = new(mem_ctx) vec4_instruction(this,
3112 VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
3113 temp, index, src_reg(grf_offset));
3114 } else {
3115 load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
3116 temp, index, offset);
3117 load->base_mrf = 14;
3118 load->mlen = 1;
3119 }
3120 emit_before(inst, load);
3121 }
3122
3123 /**
3124 * Implements array access of uniforms by inserting a
3125 * PULL_CONSTANT_LOAD instruction.
3126 *
3127 * Unlike temporary GRF array access (where we don't support it due to
3128 * the difficulty of doing relative addressing on instruction
3129 * destinations), we could potentially do array access of uniforms
3130 * that were loaded in GRF space as push constants. In real-world
3131 * usage we've seen, though, the arrays being used are always larger
3132 * than we could load as push constants, so just always move all
3133 * uniform array access out to a pull constant buffer.
3134 */
3135 void
3136 vec4_visitor::move_uniform_array_access_to_pull_constants()
3137 {
3138 int pull_constant_loc[this->uniforms];
3139
3140 for (int i = 0; i < this->uniforms; i++) {
3141 pull_constant_loc[i] = -1;
3142 }
3143
3144 /* Walk through and find array access of uniforms. Put a copy of that
3145 * uniform in the pull constant buffer.
3146 *
3147 * Note that we don't move constant-indexed accesses to arrays. No
3148 * testing has been done of the performance impact of this choice.
3149 */
3150 foreach_list_safe(node, &this->instructions) {
3151 vec4_instruction *inst = (vec4_instruction *)node;
3152
3153 for (int i = 0 ; i < 3; i++) {
3154 if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
3155 continue;
3156
3157 int uniform = inst->src[i].reg;
3158
3159 /* If this array isn't already present in the pull constant buffer,
3160 * add it.
3161 */
3162 if (pull_constant_loc[uniform] == -1) {
3163 const float **values = &prog_data->param[uniform * 4];
3164
3165 pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
3166
3167 for (int j = 0; j < uniform_size[uniform] * 4; j++) {
3168 prog_data->pull_param[prog_data->nr_pull_params++]
3169 = values[j];
3170 }
3171 }
3172
3173 /* Set up the annotation tracking for new generated instructions. */
3174 base_ir = inst->ir;
3175 current_annotation = inst->annotation;
3176
3177 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
3178
3179 emit_pull_constant_load(inst, temp, inst->src[i],
3180 pull_constant_loc[uniform]);
3181
3182 inst->src[i].file = temp.file;
3183 inst->src[i].reg = temp.reg;
3184 inst->src[i].reg_offset = temp.reg_offset;
3185 inst->src[i].reladdr = NULL;
3186 }
3187 }
3188
3189 /* Now there are no accesses of the UNIFORM file with a reladdr, so
3190 * no need to track them as larger-than-vec4 objects. This will be
3191 * relied on in cutting out unused uniform vectors from push
3192 * constants.
3193 */
3194 split_uniform_registers();
3195 }
3196
3197 void
3198 vec4_visitor::resolve_ud_negate(src_reg *reg)
3199 {
3200 if (reg->type != BRW_REGISTER_TYPE_UD ||
3201 !reg->negate)
3202 return;
3203
3204 src_reg temp = src_reg(this, glsl_type::uvec4_type);
3205 emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
3206 *reg = temp;
3207 }
3208
3209 vec4_visitor::vec4_visitor(struct brw_context *brw,
3210 struct brw_vec4_compile *c,
3211 struct gl_program *prog,
3212 const struct brw_vec4_prog_key *key,
3213 struct brw_vec4_prog_data *prog_data,
3214 struct gl_shader_program *shader_prog,
3215 struct brw_shader *shader,
3216 void *mem_ctx,
3217 bool debug_flag)
3218 : debug_flag(debug_flag)
3219 {
3220 this->brw = brw;
3221 this->ctx = &brw->ctx;
3222 this->shader_prog = shader_prog;
3223 this->shader = shader;
3224
3225 this->mem_ctx = mem_ctx;
3226 this->failed = false;
3227
3228 this->base_ir = NULL;
3229 this->current_annotation = NULL;
3230 memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
3231
3232 this->c = c;
3233 this->prog = prog;
3234 this->key = key;
3235 this->prog_data = prog_data;
3236
3237 this->variable_ht = hash_table_ctor(0,
3238 hash_table_pointer_hash,
3239 hash_table_pointer_compare);
3240
3241 this->virtual_grf_start = NULL;
3242 this->virtual_grf_end = NULL;
3243 this->virtual_grf_sizes = NULL;
3244 this->virtual_grf_count = 0;
3245 this->virtual_grf_reg_map = NULL;
3246 this->virtual_grf_reg_count = 0;
3247 this->virtual_grf_array_size = 0;
3248 this->live_intervals_valid = false;
3249
3250 this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
3251
3252 this->uniforms = 0;
3253 }
3254
3255 vec4_visitor::~vec4_visitor()
3256 {
3257 hash_table_dtor(this->variable_ht);
3258 }
3259
3260
3261 vec4_vs_visitor::vec4_vs_visitor(struct brw_context *brw,
3262 struct brw_vs_compile *vs_compile,
3263 struct brw_vs_prog_data *vs_prog_data,
3264 struct gl_shader_program *prog,
3265 struct brw_shader *shader,
3266 void *mem_ctx)
3267 : vec4_visitor(brw, &vs_compile->base, &vs_compile->vp->program.Base,
3268 &vs_compile->key.base, &vs_prog_data->base, prog, shader,
3269 mem_ctx, INTEL_DEBUG & DEBUG_VS),
3270 vs_compile(vs_compile),
3271 vs_prog_data(vs_prog_data)
3272 {
3273 }
3274
3275
3276 void
3277 vec4_visitor::fail(const char *format, ...)
3278 {
3279 va_list va;
3280 char *msg;
3281
3282 if (failed)
3283 return;
3284
3285 failed = true;
3286
3287 va_start(va, format);
3288 msg = ralloc_vasprintf(mem_ctx, format, va);
3289 va_end(va);
3290 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
3291
3292 this->fail_msg = msg;
3293
3294 if (debug_flag) {
3295 fprintf(stderr, "%s", msg);
3296 }
3297 }
3298
3299 } /* namespace brw */