i965/vs: Add support for emitting DPH opcodes.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_visitor.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25 extern "C" {
26 #include "main/macros.h"
27 #include "program/prog_parameter.h"
28 #include "program/sampler.h"
29 }
30
31 namespace brw {
32
33 vec4_instruction::vec4_instruction(vec4_visitor *v,
34 enum opcode opcode, dst_reg dst,
35 src_reg src0, src_reg src1, src_reg src2)
36 {
37 this->opcode = opcode;
38 this->dst = dst;
39 this->src[0] = src0;
40 this->src[1] = src1;
41 this->src[2] = src2;
42 this->ir = v->base_ir;
43 this->annotation = v->current_annotation;
44 }
45
46 vec4_instruction *
47 vec4_visitor::emit(vec4_instruction *inst)
48 {
49 this->instructions.push_tail(inst);
50
51 return inst;
52 }
53
54 vec4_instruction *
55 vec4_visitor::emit_before(vec4_instruction *inst, vec4_instruction *new_inst)
56 {
57 new_inst->ir = inst->ir;
58 new_inst->annotation = inst->annotation;
59
60 inst->insert_before(new_inst);
61
62 return inst;
63 }
64
65 vec4_instruction *
66 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
67 src_reg src0, src_reg src1, src_reg src2)
68 {
69 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
70 src0, src1, src2));
71 }
72
73
74 vec4_instruction *
75 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
76 {
77 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
78 }
79
80 vec4_instruction *
81 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
82 {
83 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
84 }
85
86 vec4_instruction *
87 vec4_visitor::emit(enum opcode opcode)
88 {
89 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
90 }
91
92 #define ALU1(op) \
93 vec4_instruction * \
94 vec4_visitor::op(dst_reg dst, src_reg src0) \
95 { \
96 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
97 src0); \
98 }
99
100 #define ALU2(op) \
101 vec4_instruction * \
102 vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \
103 { \
104 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
105 src0, src1); \
106 }
107
108 ALU1(NOT)
109 ALU1(MOV)
110 ALU1(FRC)
111 ALU1(RNDD)
112 ALU1(RNDE)
113 ALU1(RNDZ)
114 ALU2(ADD)
115 ALU2(MUL)
116 ALU2(MACH)
117 ALU2(AND)
118 ALU2(OR)
119 ALU2(XOR)
120 ALU2(DP3)
121 ALU2(DP4)
122 ALU2(DPH)
123
124 /** Gen4 predicated IF. */
125 vec4_instruction *
126 vec4_visitor::IF(uint32_t predicate)
127 {
128 vec4_instruction *inst;
129
130 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF);
131 inst->predicate = predicate;
132
133 return inst;
134 }
135
136 /** Gen6+ IF with embedded comparison. */
137 vec4_instruction *
138 vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition)
139 {
140 assert(intel->gen >= 6);
141
142 vec4_instruction *inst;
143
144 resolve_ud_negate(&src0);
145 resolve_ud_negate(&src1);
146
147 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF, dst_null_d(),
148 src0, src1);
149 inst->conditional_mod = condition;
150
151 return inst;
152 }
153
154 /**
155 * CMP: Sets the low bit of the destination channels with the result
156 * of the comparison, while the upper bits are undefined, and updates
157 * the flag register with the packed 16 bits of the result.
158 */
159 vec4_instruction *
160 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition)
161 {
162 vec4_instruction *inst;
163
164 /* original gen4 does type conversion to the destination type
165 * before before comparison, producing garbage results for floating
166 * point comparisons.
167 */
168 if (intel->gen == 4) {
169 dst.type = src0.type;
170 if (dst.file == HW_REG)
171 dst.fixed_hw_reg.type = dst.type;
172 }
173
174 resolve_ud_negate(&src0);
175 resolve_ud_negate(&src1);
176
177 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_CMP, dst, src0, src1);
178 inst->conditional_mod = condition;
179
180 return inst;
181 }
182
183 vec4_instruction *
184 vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
185 {
186 vec4_instruction *inst;
187
188 inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_READ,
189 dst, index);
190 inst->base_mrf = 14;
191 inst->mlen = 2;
192
193 return inst;
194 }
195
196 vec4_instruction *
197 vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index)
198 {
199 vec4_instruction *inst;
200
201 inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_WRITE,
202 dst, src, index);
203 inst->base_mrf = 13;
204 inst->mlen = 3;
205
206 return inst;
207 }
208
209 void
210 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
211 {
212 static enum opcode dot_opcodes[] = {
213 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
214 };
215
216 emit(dot_opcodes[elements - 2], dst, src0, src1);
217 }
218
219 void
220 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
221 {
222 /* The gen6 math instruction ignores the source modifiers --
223 * swizzle, abs, negate, and at least some parts of the register
224 * region description.
225 *
226 * While it would seem that this MOV could be avoided at this point
227 * in the case that the swizzle is matched up with the destination
228 * writemask, note that uniform packing and register allocation
229 * could rearrange our swizzle, so let's leave this matter up to
230 * copy propagation later.
231 */
232 src_reg temp_src = src_reg(this, glsl_type::vec4_type);
233 emit(MOV(dst_reg(temp_src), src));
234
235 if (dst.writemask != WRITEMASK_XYZW) {
236 /* The gen6 math instruction must be align1, so we can't do
237 * writemasks.
238 */
239 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
240
241 emit(opcode, temp_dst, temp_src);
242
243 emit(MOV(dst, src_reg(temp_dst)));
244 } else {
245 emit(opcode, dst, temp_src);
246 }
247 }
248
249 void
250 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
251 {
252 vec4_instruction *inst = emit(opcode, dst, src);
253 inst->base_mrf = 1;
254 inst->mlen = 1;
255 }
256
257 void
258 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
259 {
260 switch (opcode) {
261 case SHADER_OPCODE_RCP:
262 case SHADER_OPCODE_RSQ:
263 case SHADER_OPCODE_SQRT:
264 case SHADER_OPCODE_EXP2:
265 case SHADER_OPCODE_LOG2:
266 case SHADER_OPCODE_SIN:
267 case SHADER_OPCODE_COS:
268 break;
269 default:
270 assert(!"not reached: bad math opcode");
271 return;
272 }
273
274 if (intel->gen >= 7) {
275 emit(opcode, dst, src);
276 } else if (intel->gen == 6) {
277 return emit_math1_gen6(opcode, dst, src);
278 } else {
279 return emit_math1_gen4(opcode, dst, src);
280 }
281 }
282
283 void
284 vec4_visitor::emit_math2_gen6(enum opcode opcode,
285 dst_reg dst, src_reg src0, src_reg src1)
286 {
287 src_reg expanded;
288
289 /* The gen6 math instruction ignores the source modifiers --
290 * swizzle, abs, negate, and at least some parts of the register
291 * region description. Move the sources to temporaries to make it
292 * generally work.
293 */
294
295 expanded = src_reg(this, glsl_type::vec4_type);
296 expanded.type = src0.type;
297 emit(MOV(dst_reg(expanded), src0));
298 src0 = expanded;
299
300 expanded = src_reg(this, glsl_type::vec4_type);
301 expanded.type = src1.type;
302 emit(MOV(dst_reg(expanded), src1));
303 src1 = expanded;
304
305 if (dst.writemask != WRITEMASK_XYZW) {
306 /* The gen6 math instruction must be align1, so we can't do
307 * writemasks.
308 */
309 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
310 temp_dst.type = dst.type;
311
312 emit(opcode, temp_dst, src0, src1);
313
314 emit(MOV(dst, src_reg(temp_dst)));
315 } else {
316 emit(opcode, dst, src0, src1);
317 }
318 }
319
320 void
321 vec4_visitor::emit_math2_gen4(enum opcode opcode,
322 dst_reg dst, src_reg src0, src_reg src1)
323 {
324 vec4_instruction *inst = emit(opcode, dst, src0, src1);
325 inst->base_mrf = 1;
326 inst->mlen = 2;
327 }
328
329 void
330 vec4_visitor::emit_math(enum opcode opcode,
331 dst_reg dst, src_reg src0, src_reg src1)
332 {
333 switch (opcode) {
334 case SHADER_OPCODE_POW:
335 case SHADER_OPCODE_INT_QUOTIENT:
336 case SHADER_OPCODE_INT_REMAINDER:
337 break;
338 default:
339 assert(!"not reached: unsupported binary math opcode");
340 return;
341 }
342
343 if (intel->gen >= 7) {
344 emit(opcode, dst, src0, src1);
345 } else if (intel->gen == 6) {
346 return emit_math2_gen6(opcode, dst, src0, src1);
347 } else {
348 return emit_math2_gen4(opcode, dst, src0, src1);
349 }
350 }
351
352 void
353 vec4_visitor::visit_instructions(const exec_list *list)
354 {
355 foreach_list(node, list) {
356 ir_instruction *ir = (ir_instruction *)node;
357
358 base_ir = ir;
359 ir->accept(this);
360 }
361 }
362
363
364 static int
365 type_size(const struct glsl_type *type)
366 {
367 unsigned int i;
368 int size;
369
370 switch (type->base_type) {
371 case GLSL_TYPE_UINT:
372 case GLSL_TYPE_INT:
373 case GLSL_TYPE_FLOAT:
374 case GLSL_TYPE_BOOL:
375 if (type->is_matrix()) {
376 return type->matrix_columns;
377 } else {
378 /* Regardless of size of vector, it gets a vec4. This is bad
379 * packing for things like floats, but otherwise arrays become a
380 * mess. Hopefully a later pass over the code can pack scalars
381 * down if appropriate.
382 */
383 return 1;
384 }
385 case GLSL_TYPE_ARRAY:
386 assert(type->length > 0);
387 return type_size(type->fields.array) * type->length;
388 case GLSL_TYPE_STRUCT:
389 size = 0;
390 for (i = 0; i < type->length; i++) {
391 size += type_size(type->fields.structure[i].type);
392 }
393 return size;
394 case GLSL_TYPE_SAMPLER:
395 /* Samplers take up one slot in UNIFORMS[], but they're baked in
396 * at link time.
397 */
398 return 1;
399 default:
400 assert(0);
401 return 0;
402 }
403 }
404
405 int
406 vec4_visitor::virtual_grf_alloc(int size)
407 {
408 if (virtual_grf_array_size <= virtual_grf_count) {
409 if (virtual_grf_array_size == 0)
410 virtual_grf_array_size = 16;
411 else
412 virtual_grf_array_size *= 2;
413 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
414 virtual_grf_array_size);
415 virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int,
416 virtual_grf_array_size);
417 }
418 virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count;
419 virtual_grf_reg_count += size;
420 virtual_grf_sizes[virtual_grf_count] = size;
421 return virtual_grf_count++;
422 }
423
424 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
425 {
426 init();
427
428 this->file = GRF;
429 this->reg = v->virtual_grf_alloc(type_size(type));
430
431 if (type->is_array() || type->is_record()) {
432 this->swizzle = BRW_SWIZZLE_NOOP;
433 } else {
434 this->swizzle = swizzle_for_size(type->vector_elements);
435 }
436
437 this->type = brw_type_for_base_type(type);
438 }
439
440 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
441 {
442 init();
443
444 this->file = GRF;
445 this->reg = v->virtual_grf_alloc(type_size(type));
446
447 if (type->is_array() || type->is_record()) {
448 this->writemask = WRITEMASK_XYZW;
449 } else {
450 this->writemask = (1 << type->vector_elements) - 1;
451 }
452
453 this->type = brw_type_for_base_type(type);
454 }
455
456 /* Our support for uniforms is piggy-backed on the struct
457 * gl_fragment_program, because that's where the values actually
458 * get stored, rather than in some global gl_shader_program uniform
459 * store.
460 */
461 int
462 vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
463 {
464 unsigned int offset = 0;
465 float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
466
467 if (type->is_matrix()) {
468 const glsl_type *column = type->column_type();
469
470 for (unsigned int i = 0; i < type->matrix_columns; i++) {
471 offset += setup_uniform_values(loc + offset, column);
472 }
473
474 return offset;
475 }
476
477 switch (type->base_type) {
478 case GLSL_TYPE_FLOAT:
479 case GLSL_TYPE_UINT:
480 case GLSL_TYPE_INT:
481 case GLSL_TYPE_BOOL:
482 for (unsigned int i = 0; i < type->vector_elements; i++) {
483 c->prog_data.param[this->uniforms * 4 + i] = &values[i];
484 }
485
486 /* Set up pad elements to get things aligned to a vec4 boundary. */
487 for (unsigned int i = type->vector_elements; i < 4; i++) {
488 static float zero = 0;
489
490 c->prog_data.param[this->uniforms * 4 + i] = &zero;
491 }
492
493 /* Track the size of this uniform vector, for future packing of
494 * uniforms.
495 */
496 this->uniform_vector_size[this->uniforms] = type->vector_elements;
497 this->uniforms++;
498
499 return 1;
500
501 case GLSL_TYPE_STRUCT:
502 for (unsigned int i = 0; i < type->length; i++) {
503 offset += setup_uniform_values(loc + offset,
504 type->fields.structure[i].type);
505 }
506 return offset;
507
508 case GLSL_TYPE_ARRAY:
509 for (unsigned int i = 0; i < type->length; i++) {
510 offset += setup_uniform_values(loc + offset, type->fields.array);
511 }
512 return offset;
513
514 case GLSL_TYPE_SAMPLER:
515 /* The sampler takes up a slot, but we don't use any values from it. */
516 return 1;
517
518 default:
519 assert(!"not reached");
520 return 0;
521 }
522 }
523
524 void
525 vec4_visitor::setup_uniform_clipplane_values()
526 {
527 gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
528
529 /* Pre-Gen6, we compact clip planes. For example, if the user
530 * enables just clip planes 0, 1, and 3, we will enable clip planes
531 * 0, 1, and 2 in the hardware, and we'll move clip plane 3 to clip
532 * plane 2. This simplifies the implementation of the Gen6 clip
533 * thread.
534 *
535 * In Gen6 and later, we don't compact clip planes, because this
536 * simplifies the implementation of gl_ClipDistance.
537 */
538 int compacted_clipplane_index = 0;
539 for (int i = 0; i < c->key.nr_userclip_plane_consts; ++i) {
540 if (intel->gen < 6 &&
541 !(c->key.userclip_planes_enabled_gen_4_5 & (1 << i))) {
542 continue;
543 }
544 this->uniform_vector_size[this->uniforms] = 4;
545 this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms);
546 this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F;
547 for (int j = 0; j < 4; ++j) {
548 c->prog_data.param[this->uniforms * 4 + j] = &clip_planes[i][j];
549 }
550 ++compacted_clipplane_index;
551 ++this->uniforms;
552 }
553 }
554
555 /* Our support for builtin uniforms is even scarier than non-builtin.
556 * It sits on top of the PROG_STATE_VAR parameters that are
557 * automatically updated from GL context state.
558 */
559 void
560 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
561 {
562 const ir_state_slot *const slots = ir->state_slots;
563 assert(ir->state_slots != NULL);
564
565 for (unsigned int i = 0; i < ir->num_state_slots; i++) {
566 /* This state reference has already been setup by ir_to_mesa,
567 * but we'll get the same index back here. We can reference
568 * ParameterValues directly, since unlike brw_fs.cpp, we never
569 * add new state references during compile.
570 */
571 int index = _mesa_add_state_reference(this->vp->Base.Parameters,
572 (gl_state_index *)slots[i].tokens);
573 float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
574
575 this->uniform_vector_size[this->uniforms] = 0;
576 /* Add each of the unique swizzled channels of the element.
577 * This will end up matching the size of the glsl_type of this field.
578 */
579 int last_swiz = -1;
580 for (unsigned int j = 0; j < 4; j++) {
581 int swiz = GET_SWZ(slots[i].swizzle, j);
582 last_swiz = swiz;
583
584 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
585 if (swiz <= last_swiz)
586 this->uniform_vector_size[this->uniforms]++;
587 }
588 this->uniforms++;
589 }
590 }
591
592 dst_reg *
593 vec4_visitor::variable_storage(ir_variable *var)
594 {
595 return (dst_reg *)hash_table_find(this->variable_ht, var);
596 }
597
598 void
599 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate)
600 {
601 ir_expression *expr = ir->as_expression();
602
603 *predicate = BRW_PREDICATE_NORMAL;
604
605 if (expr) {
606 src_reg op[2];
607 vec4_instruction *inst;
608
609 assert(expr->get_num_operands() <= 2);
610 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
611 expr->operands[i]->accept(this);
612 op[i] = this->result;
613
614 resolve_ud_negate(&op[i]);
615 }
616
617 switch (expr->operation) {
618 case ir_unop_logic_not:
619 inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
620 inst->conditional_mod = BRW_CONDITIONAL_Z;
621 break;
622
623 case ir_binop_logic_xor:
624 inst = emit(XOR(dst_null_d(), op[0], op[1]));
625 inst->conditional_mod = BRW_CONDITIONAL_NZ;
626 break;
627
628 case ir_binop_logic_or:
629 inst = emit(OR(dst_null_d(), op[0], op[1]));
630 inst->conditional_mod = BRW_CONDITIONAL_NZ;
631 break;
632
633 case ir_binop_logic_and:
634 inst = emit(AND(dst_null_d(), op[0], op[1]));
635 inst->conditional_mod = BRW_CONDITIONAL_NZ;
636 break;
637
638 case ir_unop_f2b:
639 if (intel->gen >= 6) {
640 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
641 } else {
642 inst = emit(MOV(dst_null_f(), op[0]));
643 inst->conditional_mod = BRW_CONDITIONAL_NZ;
644 }
645 break;
646
647 case ir_unop_i2b:
648 if (intel->gen >= 6) {
649 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
650 } else {
651 inst = emit(MOV(dst_null_d(), op[0]));
652 inst->conditional_mod = BRW_CONDITIONAL_NZ;
653 }
654 break;
655
656 case ir_binop_all_equal:
657 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
658 *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
659 break;
660
661 case ir_binop_any_nequal:
662 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
663 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
664 break;
665
666 case ir_unop_any:
667 inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
668 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
669 break;
670
671 case ir_binop_greater:
672 case ir_binop_gequal:
673 case ir_binop_less:
674 case ir_binop_lequal:
675 case ir_binop_equal:
676 case ir_binop_nequal:
677 emit(CMP(dst_null_d(), op[0], op[1],
678 brw_conditional_for_comparison(expr->operation)));
679 break;
680
681 default:
682 assert(!"not reached");
683 break;
684 }
685 return;
686 }
687
688 ir->accept(this);
689
690 resolve_ud_negate(&this->result);
691
692 if (intel->gen >= 6) {
693 vec4_instruction *inst = emit(AND(dst_null_d(),
694 this->result, src_reg(1)));
695 inst->conditional_mod = BRW_CONDITIONAL_NZ;
696 } else {
697 vec4_instruction *inst = emit(MOV(dst_null_d(), this->result));
698 inst->conditional_mod = BRW_CONDITIONAL_NZ;
699 }
700 }
701
702 /**
703 * Emit a gen6 IF statement with the comparison folded into the IF
704 * instruction.
705 */
706 void
707 vec4_visitor::emit_if_gen6(ir_if *ir)
708 {
709 ir_expression *expr = ir->condition->as_expression();
710
711 if (expr) {
712 src_reg op[2];
713 dst_reg temp;
714
715 assert(expr->get_num_operands() <= 2);
716 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
717 expr->operands[i]->accept(this);
718 op[i] = this->result;
719 }
720
721 switch (expr->operation) {
722 case ir_unop_logic_not:
723 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
724 return;
725
726 case ir_binop_logic_xor:
727 emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
728 return;
729
730 case ir_binop_logic_or:
731 temp = dst_reg(this, glsl_type::bool_type);
732 emit(OR(temp, op[0], op[1]));
733 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
734 return;
735
736 case ir_binop_logic_and:
737 temp = dst_reg(this, glsl_type::bool_type);
738 emit(AND(temp, op[0], op[1]));
739 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
740 return;
741
742 case ir_unop_f2b:
743 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
744 return;
745
746 case ir_unop_i2b:
747 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
748 return;
749
750 case ir_binop_greater:
751 case ir_binop_gequal:
752 case ir_binop_less:
753 case ir_binop_lequal:
754 case ir_binop_equal:
755 case ir_binop_nequal:
756 emit(IF(op[0], op[1],
757 brw_conditional_for_comparison(expr->operation)));
758 return;
759
760 case ir_binop_all_equal:
761 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
762 emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
763 return;
764
765 case ir_binop_any_nequal:
766 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
767 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
768 return;
769
770 case ir_unop_any:
771 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
772 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
773 return;
774
775 default:
776 assert(!"not reached");
777 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
778 return;
779 }
780 return;
781 }
782
783 ir->condition->accept(this);
784
785 emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
786 }
787
788 void
789 vec4_visitor::visit(ir_variable *ir)
790 {
791 dst_reg *reg = NULL;
792
793 if (variable_storage(ir))
794 return;
795
796 switch (ir->mode) {
797 case ir_var_in:
798 reg = new(mem_ctx) dst_reg(ATTR, ir->location);
799
800 /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED attributes
801 * come in as floating point conversions of the integer values.
802 */
803 for (int i = ir->location; i < ir->location + type_size(ir->type); i++) {
804 if (!c->key.gl_fixed_input_size[i])
805 continue;
806
807 dst_reg dst = *reg;
808 dst.type = brw_type_for_base_type(ir->type);
809 dst.writemask = (1 << c->key.gl_fixed_input_size[i]) - 1;
810 emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
811 }
812 break;
813
814 case ir_var_out:
815 reg = new(mem_ctx) dst_reg(this, ir->type);
816
817 for (int i = 0; i < type_size(ir->type); i++) {
818 output_reg[ir->location + i] = *reg;
819 output_reg[ir->location + i].reg_offset = i;
820 output_reg[ir->location + i].type =
821 brw_type_for_base_type(ir->type->get_scalar_type());
822 output_reg_annotation[ir->location + i] = ir->name;
823 }
824 break;
825
826 case ir_var_auto:
827 case ir_var_temporary:
828 reg = new(mem_ctx) dst_reg(this, ir->type);
829 break;
830
831 case ir_var_uniform:
832 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
833
834 /* Thanks to the lower_ubo_reference pass, we will see only
835 * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
836 * variables, so no need for them to be in variable_ht.
837 */
838 if (ir->uniform_block != -1)
839 return;
840
841 /* Track how big the whole uniform variable is, in case we need to put a
842 * copy of its data into pull constants for array access.
843 */
844 this->uniform_size[this->uniforms] = type_size(ir->type);
845
846 if (!strncmp(ir->name, "gl_", 3)) {
847 setup_builtin_uniform_values(ir);
848 } else {
849 setup_uniform_values(ir->location, ir->type);
850 }
851 break;
852
853 case ir_var_system_value:
854 /* VertexID is stored by the VF as the last vertex element, but
855 * we don't represent it with a flag in inputs_read, so we call
856 * it VERT_ATTRIB_MAX, which setup_attributes() picks up on.
857 */
858 reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX);
859 prog_data->uses_vertexid = true;
860
861 switch (ir->location) {
862 case SYSTEM_VALUE_VERTEX_ID:
863 reg->writemask = WRITEMASK_X;
864 break;
865 case SYSTEM_VALUE_INSTANCE_ID:
866 reg->writemask = WRITEMASK_Y;
867 break;
868 default:
869 assert(!"not reached");
870 break;
871 }
872 break;
873
874 default:
875 assert(!"not reached");
876 }
877
878 reg->type = brw_type_for_base_type(ir->type);
879 hash_table_insert(this->variable_ht, reg, ir);
880 }
881
882 void
883 vec4_visitor::visit(ir_loop *ir)
884 {
885 dst_reg counter;
886
887 /* We don't want debugging output to print the whole body of the
888 * loop as the annotation.
889 */
890 this->base_ir = NULL;
891
892 if (ir->counter != NULL) {
893 this->base_ir = ir->counter;
894 ir->counter->accept(this);
895 counter = *(variable_storage(ir->counter));
896
897 if (ir->from != NULL) {
898 this->base_ir = ir->from;
899 ir->from->accept(this);
900
901 emit(MOV(counter, this->result));
902 }
903 }
904
905 emit(BRW_OPCODE_DO);
906
907 if (ir->to) {
908 this->base_ir = ir->to;
909 ir->to->accept(this);
910
911 emit(CMP(dst_null_d(), src_reg(counter), this->result,
912 brw_conditional_for_comparison(ir->cmp)));
913
914 vec4_instruction *inst = emit(BRW_OPCODE_BREAK);
915 inst->predicate = BRW_PREDICATE_NORMAL;
916 }
917
918 visit_instructions(&ir->body_instructions);
919
920
921 if (ir->increment) {
922 this->base_ir = ir->increment;
923 ir->increment->accept(this);
924 emit(ADD(counter, src_reg(counter), this->result));
925 }
926
927 emit(BRW_OPCODE_WHILE);
928 }
929
930 void
931 vec4_visitor::visit(ir_loop_jump *ir)
932 {
933 switch (ir->mode) {
934 case ir_loop_jump::jump_break:
935 emit(BRW_OPCODE_BREAK);
936 break;
937 case ir_loop_jump::jump_continue:
938 emit(BRW_OPCODE_CONTINUE);
939 break;
940 }
941 }
942
943
944 void
945 vec4_visitor::visit(ir_function_signature *ir)
946 {
947 assert(0);
948 (void)ir;
949 }
950
951 void
952 vec4_visitor::visit(ir_function *ir)
953 {
954 /* Ignore function bodies other than main() -- we shouldn't see calls to
955 * them since they should all be inlined.
956 */
957 if (strcmp(ir->name, "main") == 0) {
958 const ir_function_signature *sig;
959 exec_list empty;
960
961 sig = ir->matching_signature(&empty);
962
963 assert(sig);
964
965 visit_instructions(&sig->body);
966 }
967 }
968
969 bool
970 vec4_visitor::try_emit_sat(ir_expression *ir)
971 {
972 ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
973 if (!sat_src)
974 return false;
975
976 sat_src->accept(this);
977 src_reg src = this->result;
978
979 this->result = src_reg(this, ir->type);
980 vec4_instruction *inst;
981 inst = emit(MOV(dst_reg(this->result), src));
982 inst->saturate = true;
983
984 return true;
985 }
986
987 void
988 vec4_visitor::emit_bool_comparison(unsigned int op,
989 dst_reg dst, src_reg src0, src_reg src1)
990 {
991 /* original gen4 does destination conversion before comparison. */
992 if (intel->gen < 5)
993 dst.type = src0.type;
994
995 emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op)));
996
997 dst.type = BRW_REGISTER_TYPE_D;
998 emit(AND(dst, src_reg(dst), src_reg(0x1)));
999 }
1000
1001 void
1002 vec4_visitor::visit(ir_expression *ir)
1003 {
1004 unsigned int operand;
1005 src_reg op[Elements(ir->operands)];
1006 src_reg result_src;
1007 dst_reg result_dst;
1008 vec4_instruction *inst;
1009
1010 if (try_emit_sat(ir))
1011 return;
1012
1013 for (operand = 0; operand < ir->get_num_operands(); operand++) {
1014 this->result.file = BAD_FILE;
1015 ir->operands[operand]->accept(this);
1016 if (this->result.file == BAD_FILE) {
1017 printf("Failed to get tree for expression operand:\n");
1018 ir->operands[operand]->print();
1019 exit(1);
1020 }
1021 op[operand] = this->result;
1022
1023 /* Matrix expression operands should have been broken down to vector
1024 * operations already.
1025 */
1026 assert(!ir->operands[operand]->type->is_matrix());
1027 }
1028
1029 int vector_elements = ir->operands[0]->type->vector_elements;
1030 if (ir->operands[1]) {
1031 vector_elements = MAX2(vector_elements,
1032 ir->operands[1]->type->vector_elements);
1033 }
1034
1035 this->result.file = BAD_FILE;
1036
1037 /* Storage for our result. Ideally for an assignment we'd be using
1038 * the actual storage for the result here, instead.
1039 */
1040 result_src = src_reg(this, ir->type);
1041 /* convenience for the emit functions below. */
1042 result_dst = dst_reg(result_src);
1043 /* If nothing special happens, this is the result. */
1044 this->result = result_src;
1045 /* Limit writes to the channels that will be used by result_src later.
1046 * This does limit this temp's use as a temporary for multi-instruction
1047 * sequences.
1048 */
1049 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1050
1051 switch (ir->operation) {
1052 case ir_unop_logic_not:
1053 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
1054 * ones complement of the whole register, not just bit 0.
1055 */
1056 emit(XOR(result_dst, op[0], src_reg(1)));
1057 break;
1058 case ir_unop_neg:
1059 op[0].negate = !op[0].negate;
1060 this->result = op[0];
1061 break;
1062 case ir_unop_abs:
1063 op[0].abs = true;
1064 op[0].negate = false;
1065 this->result = op[0];
1066 break;
1067
1068 case ir_unop_sign:
1069 emit(MOV(result_dst, src_reg(0.0f)));
1070
1071 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G));
1072 inst = emit(MOV(result_dst, src_reg(1.0f)));
1073 inst->predicate = BRW_PREDICATE_NORMAL;
1074
1075 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L));
1076 inst = emit(MOV(result_dst, src_reg(-1.0f)));
1077 inst->predicate = BRW_PREDICATE_NORMAL;
1078
1079 break;
1080
1081 case ir_unop_rcp:
1082 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
1083 break;
1084
1085 case ir_unop_exp2:
1086 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
1087 break;
1088 case ir_unop_log2:
1089 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
1090 break;
1091 case ir_unop_exp:
1092 case ir_unop_log:
1093 assert(!"not reached: should be handled by ir_explog_to_explog2");
1094 break;
1095 case ir_unop_sin:
1096 case ir_unop_sin_reduced:
1097 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
1098 break;
1099 case ir_unop_cos:
1100 case ir_unop_cos_reduced:
1101 emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
1102 break;
1103
1104 case ir_unop_dFdx:
1105 case ir_unop_dFdy:
1106 assert(!"derivatives not valid in vertex shader");
1107 break;
1108
1109 case ir_unop_noise:
1110 assert(!"not reached: should be handled by lower_noise");
1111 break;
1112
1113 case ir_binop_add:
1114 emit(ADD(result_dst, op[0], op[1]));
1115 break;
1116 case ir_binop_sub:
1117 assert(!"not reached: should be handled by ir_sub_to_add_neg");
1118 break;
1119
1120 case ir_binop_mul:
1121 if (ir->type->is_integer()) {
1122 /* For integer multiplication, the MUL uses the low 16 bits
1123 * of one of the operands (src0 on gen6, src1 on gen7). The
1124 * MACH accumulates in the contribution of the upper 16 bits
1125 * of that operand.
1126 *
1127 * FINISHME: Emit just the MUL if we know an operand is small
1128 * enough.
1129 */
1130 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
1131
1132 emit(MUL(acc, op[0], op[1]));
1133 emit(MACH(dst_null_d(), op[0], op[1]));
1134 emit(MOV(result_dst, src_reg(acc)));
1135 } else {
1136 emit(MUL(result_dst, op[0], op[1]));
1137 }
1138 break;
1139 case ir_binop_div:
1140 /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
1141 assert(ir->type->is_integer());
1142 emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
1143 break;
1144 case ir_binop_mod:
1145 /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
1146 assert(ir->type->is_integer());
1147 emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
1148 break;
1149
1150 case ir_binop_less:
1151 case ir_binop_greater:
1152 case ir_binop_lequal:
1153 case ir_binop_gequal:
1154 case ir_binop_equal:
1155 case ir_binop_nequal: {
1156 emit(CMP(result_dst, op[0], op[1],
1157 brw_conditional_for_comparison(ir->operation)));
1158 emit(AND(result_dst, result_src, src_reg(0x1)));
1159 break;
1160 }
1161
1162 case ir_binop_all_equal:
1163 /* "==" operator producing a scalar boolean. */
1164 if (ir->operands[0]->type->is_vector() ||
1165 ir->operands[1]->type->is_vector()) {
1166 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
1167 emit(MOV(result_dst, src_reg(0)));
1168 inst = emit(MOV(result_dst, src_reg(1)));
1169 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
1170 } else {
1171 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
1172 emit(AND(result_dst, result_src, src_reg(0x1)));
1173 }
1174 break;
1175 case ir_binop_any_nequal:
1176 /* "!=" operator producing a scalar boolean. */
1177 if (ir->operands[0]->type->is_vector() ||
1178 ir->operands[1]->type->is_vector()) {
1179 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
1180
1181 emit(MOV(result_dst, src_reg(0)));
1182 inst = emit(MOV(result_dst, src_reg(1)));
1183 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1184 } else {
1185 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
1186 emit(AND(result_dst, result_src, src_reg(0x1)));
1187 }
1188 break;
1189
1190 case ir_unop_any:
1191 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1192 emit(MOV(result_dst, src_reg(0)));
1193
1194 inst = emit(MOV(result_dst, src_reg(1)));
1195 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1196 break;
1197
1198 case ir_binop_logic_xor:
1199 emit(XOR(result_dst, op[0], op[1]));
1200 break;
1201
1202 case ir_binop_logic_or:
1203 emit(OR(result_dst, op[0], op[1]));
1204 break;
1205
1206 case ir_binop_logic_and:
1207 emit(AND(result_dst, op[0], op[1]));
1208 break;
1209
1210 case ir_binop_dot:
1211 assert(ir->operands[0]->type->is_vector());
1212 assert(ir->operands[0]->type == ir->operands[1]->type);
1213 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1214 break;
1215
1216 case ir_unop_sqrt:
1217 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1218 break;
1219 case ir_unop_rsq:
1220 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1221 break;
1222
1223 case ir_unop_bitcast_i2f:
1224 case ir_unop_bitcast_u2f:
1225 this->result = op[0];
1226 this->result.type = BRW_REGISTER_TYPE_F;
1227 break;
1228
1229 case ir_unop_bitcast_f2i:
1230 this->result = op[0];
1231 this->result.type = BRW_REGISTER_TYPE_D;
1232 break;
1233
1234 case ir_unop_bitcast_f2u:
1235 this->result = op[0];
1236 this->result.type = BRW_REGISTER_TYPE_UD;
1237 break;
1238
1239 case ir_unop_i2f:
1240 case ir_unop_i2u:
1241 case ir_unop_u2i:
1242 case ir_unop_u2f:
1243 case ir_unop_b2f:
1244 case ir_unop_b2i:
1245 case ir_unop_f2i:
1246 case ir_unop_f2u:
1247 emit(MOV(result_dst, op[0]));
1248 break;
1249 case ir_unop_f2b:
1250 case ir_unop_i2b: {
1251 emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1252 emit(AND(result_dst, result_src, src_reg(1)));
1253 break;
1254 }
1255
1256 case ir_unop_trunc:
1257 emit(RNDZ(result_dst, op[0]));
1258 break;
1259 case ir_unop_ceil:
1260 op[0].negate = !op[0].negate;
1261 inst = emit(RNDD(result_dst, op[0]));
1262 this->result.negate = true;
1263 break;
1264 case ir_unop_floor:
1265 inst = emit(RNDD(result_dst, op[0]));
1266 break;
1267 case ir_unop_fract:
1268 inst = emit(FRC(result_dst, op[0]));
1269 break;
1270 case ir_unop_round_even:
1271 emit(RNDE(result_dst, op[0]));
1272 break;
1273
1274 case ir_binop_min:
1275 if (intel->gen >= 6) {
1276 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1277 inst->conditional_mod = BRW_CONDITIONAL_L;
1278 } else {
1279 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_L));
1280
1281 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1282 inst->predicate = BRW_PREDICATE_NORMAL;
1283 }
1284 break;
1285 case ir_binop_max:
1286 if (intel->gen >= 6) {
1287 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1288 inst->conditional_mod = BRW_CONDITIONAL_G;
1289 } else {
1290 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_G));
1291
1292 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1293 inst->predicate = BRW_PREDICATE_NORMAL;
1294 }
1295 break;
1296
1297 case ir_binop_pow:
1298 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1299 break;
1300
1301 case ir_unop_bit_not:
1302 inst = emit(NOT(result_dst, op[0]));
1303 break;
1304 case ir_binop_bit_and:
1305 inst = emit(AND(result_dst, op[0], op[1]));
1306 break;
1307 case ir_binop_bit_xor:
1308 inst = emit(XOR(result_dst, op[0], op[1]));
1309 break;
1310 case ir_binop_bit_or:
1311 inst = emit(OR(result_dst, op[0], op[1]));
1312 break;
1313
1314 case ir_binop_lshift:
1315 inst = emit(BRW_OPCODE_SHL, result_dst, op[0], op[1]);
1316 break;
1317
1318 case ir_binop_rshift:
1319 if (ir->type->base_type == GLSL_TYPE_INT)
1320 inst = emit(BRW_OPCODE_ASR, result_dst, op[0], op[1]);
1321 else
1322 inst = emit(BRW_OPCODE_SHR, result_dst, op[0], op[1]);
1323 break;
1324
1325 case ir_binop_ubo_load: {
1326 ir_constant *uniform_block = ir->operands[0]->as_constant();
1327 ir_constant *const_offset_ir = ir->operands[1]->as_constant();
1328 unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
1329 src_reg offset = op[1];
1330
1331 /* Now, load the vector from that offset. */
1332 assert(ir->type->is_vector() || ir->type->is_scalar());
1333
1334 src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
1335 packed_consts.type = result.type;
1336 src_reg surf_index =
1337 src_reg(SURF_INDEX_VS_UBO(uniform_block->value.u[0]));
1338 if (const_offset_ir) {
1339 offset = src_reg(const_offset / 16);
1340 } else {
1341 emit(BRW_OPCODE_SHR, dst_reg(offset), offset, src_reg(4));
1342 }
1343
1344 vec4_instruction *pull =
1345 emit(new(mem_ctx) vec4_instruction(this,
1346 VS_OPCODE_PULL_CONSTANT_LOAD,
1347 dst_reg(packed_consts),
1348 surf_index,
1349 offset));
1350 pull->base_mrf = 14;
1351 pull->mlen = 1;
1352
1353 packed_consts.swizzle = swizzle_for_size(ir->type->vector_elements);
1354 packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4,
1355 const_offset % 16 / 4,
1356 const_offset % 16 / 4,
1357 const_offset % 16 / 4);
1358
1359 /* UBO bools are any nonzero int. We store bools as either 0 or 1. */
1360 if (ir->type->base_type == GLSL_TYPE_BOOL) {
1361 emit(CMP(result_dst, packed_consts, src_reg(0u),
1362 BRW_CONDITIONAL_NZ));
1363 emit(AND(result_dst, result, src_reg(0x1)));
1364 } else {
1365 emit(MOV(result_dst, packed_consts));
1366 }
1367 break;
1368 }
1369
1370 case ir_quadop_vector:
1371 assert(!"not reached: should be handled by lower_quadop_vector");
1372 break;
1373 }
1374 }
1375
1376
1377 void
1378 vec4_visitor::visit(ir_swizzle *ir)
1379 {
1380 src_reg src;
1381 int i = 0;
1382 int swizzle[4];
1383
1384 /* Note that this is only swizzles in expressions, not those on the left
1385 * hand side of an assignment, which do write masking. See ir_assignment
1386 * for that.
1387 */
1388
1389 ir->val->accept(this);
1390 src = this->result;
1391 assert(src.file != BAD_FILE);
1392
1393 for (i = 0; i < ir->type->vector_elements; i++) {
1394 switch (i) {
1395 case 0:
1396 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1397 break;
1398 case 1:
1399 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1400 break;
1401 case 2:
1402 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1403 break;
1404 case 3:
1405 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1406 break;
1407 }
1408 }
1409 for (; i < 4; i++) {
1410 /* Replicate the last channel out. */
1411 swizzle[i] = swizzle[ir->type->vector_elements - 1];
1412 }
1413
1414 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1415
1416 this->result = src;
1417 }
1418
1419 void
1420 vec4_visitor::visit(ir_dereference_variable *ir)
1421 {
1422 const struct glsl_type *type = ir->type;
1423 dst_reg *reg = variable_storage(ir->var);
1424
1425 if (!reg) {
1426 fail("Failed to find variable storage for %s\n", ir->var->name);
1427 this->result = src_reg(brw_null_reg());
1428 return;
1429 }
1430
1431 this->result = src_reg(*reg);
1432
1433 /* System values get their swizzle from the dst_reg writemask */
1434 if (ir->var->mode == ir_var_system_value)
1435 return;
1436
1437 if (type->is_scalar() || type->is_vector() || type->is_matrix())
1438 this->result.swizzle = swizzle_for_size(type->vector_elements);
1439 }
1440
1441 void
1442 vec4_visitor::visit(ir_dereference_array *ir)
1443 {
1444 ir_constant *constant_index;
1445 src_reg src;
1446 int element_size = type_size(ir->type);
1447
1448 constant_index = ir->array_index->constant_expression_value();
1449
1450 ir->array->accept(this);
1451 src = this->result;
1452
1453 if (constant_index) {
1454 src.reg_offset += constant_index->value.i[0] * element_size;
1455 } else {
1456 /* Variable index array dereference. It eats the "vec4" of the
1457 * base of the array and an index that offsets the Mesa register
1458 * index.
1459 */
1460 ir->array_index->accept(this);
1461
1462 src_reg index_reg;
1463
1464 if (element_size == 1) {
1465 index_reg = this->result;
1466 } else {
1467 index_reg = src_reg(this, glsl_type::int_type);
1468
1469 emit(MUL(dst_reg(index_reg), this->result, src_reg(element_size)));
1470 }
1471
1472 if (src.reladdr) {
1473 src_reg temp = src_reg(this, glsl_type::int_type);
1474
1475 emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
1476
1477 index_reg = temp;
1478 }
1479
1480 src.reladdr = ralloc(mem_ctx, src_reg);
1481 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1482 }
1483
1484 /* If the type is smaller than a vec4, replicate the last channel out. */
1485 if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1486 src.swizzle = swizzle_for_size(ir->type->vector_elements);
1487 else
1488 src.swizzle = BRW_SWIZZLE_NOOP;
1489 src.type = brw_type_for_base_type(ir->type);
1490
1491 this->result = src;
1492 }
1493
1494 void
1495 vec4_visitor::visit(ir_dereference_record *ir)
1496 {
1497 unsigned int i;
1498 const glsl_type *struct_type = ir->record->type;
1499 int offset = 0;
1500
1501 ir->record->accept(this);
1502
1503 for (i = 0; i < struct_type->length; i++) {
1504 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1505 break;
1506 offset += type_size(struct_type->fields.structure[i].type);
1507 }
1508
1509 /* If the type is smaller than a vec4, replicate the last channel out. */
1510 if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1511 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1512 else
1513 this->result.swizzle = BRW_SWIZZLE_NOOP;
1514 this->result.type = brw_type_for_base_type(ir->type);
1515
1516 this->result.reg_offset += offset;
1517 }
1518
1519 /**
1520 * We want to be careful in assignment setup to hit the actual storage
1521 * instead of potentially using a temporary like we might with the
1522 * ir_dereference handler.
1523 */
1524 static dst_reg
1525 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1526 {
1527 /* The LHS must be a dereference. If the LHS is a variable indexed array
1528 * access of a vector, it must be separated into a series conditional moves
1529 * before reaching this point (see ir_vec_index_to_cond_assign).
1530 */
1531 assert(ir->as_dereference());
1532 ir_dereference_array *deref_array = ir->as_dereference_array();
1533 if (deref_array) {
1534 assert(!deref_array->array->type->is_vector());
1535 }
1536
1537 /* Use the rvalue deref handler for the most part. We'll ignore
1538 * swizzles in it and write swizzles using writemask, though.
1539 */
1540 ir->accept(v);
1541 return dst_reg(v->result);
1542 }
1543
1544 void
1545 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1546 const struct glsl_type *type, uint32_t predicate)
1547 {
1548 if (type->base_type == GLSL_TYPE_STRUCT) {
1549 for (unsigned int i = 0; i < type->length; i++) {
1550 emit_block_move(dst, src, type->fields.structure[i].type, predicate);
1551 }
1552 return;
1553 }
1554
1555 if (type->is_array()) {
1556 for (unsigned int i = 0; i < type->length; i++) {
1557 emit_block_move(dst, src, type->fields.array, predicate);
1558 }
1559 return;
1560 }
1561
1562 if (type->is_matrix()) {
1563 const struct glsl_type *vec_type;
1564
1565 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1566 type->vector_elements, 1);
1567
1568 for (int i = 0; i < type->matrix_columns; i++) {
1569 emit_block_move(dst, src, vec_type, predicate);
1570 }
1571 return;
1572 }
1573
1574 assert(type->is_scalar() || type->is_vector());
1575
1576 dst->type = brw_type_for_base_type(type);
1577 src->type = dst->type;
1578
1579 dst->writemask = (1 << type->vector_elements) - 1;
1580
1581 src->swizzle = swizzle_for_size(type->vector_elements);
1582
1583 vec4_instruction *inst = emit(MOV(*dst, *src));
1584 inst->predicate = predicate;
1585
1586 dst->reg_offset++;
1587 src->reg_offset++;
1588 }
1589
1590
1591 /* If the RHS processing resulted in an instruction generating a
1592 * temporary value, and it would be easy to rewrite the instruction to
1593 * generate its result right into the LHS instead, do so. This ends
1594 * up reliably removing instructions where it can be tricky to do so
1595 * later without real UD chain information.
1596 */
1597 bool
1598 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
1599 dst_reg dst,
1600 src_reg src,
1601 vec4_instruction *pre_rhs_inst,
1602 vec4_instruction *last_rhs_inst)
1603 {
1604 /* This could be supported, but it would take more smarts. */
1605 if (ir->condition)
1606 return false;
1607
1608 if (pre_rhs_inst == last_rhs_inst)
1609 return false; /* No instructions generated to work with. */
1610
1611 /* Make sure the last instruction generated our source reg. */
1612 if (src.file != GRF ||
1613 src.file != last_rhs_inst->dst.file ||
1614 src.reg != last_rhs_inst->dst.reg ||
1615 src.reg_offset != last_rhs_inst->dst.reg_offset ||
1616 src.reladdr ||
1617 src.abs ||
1618 src.negate ||
1619 last_rhs_inst->predicate != BRW_PREDICATE_NONE)
1620 return false;
1621
1622 /* Check that that last instruction fully initialized the channels
1623 * we want to use, in the order we want to use them. We could
1624 * potentially reswizzle the operands of many instructions so that
1625 * we could handle out of order channels, but don't yet.
1626 */
1627
1628 for (unsigned i = 0; i < 4; i++) {
1629 if (dst.writemask & (1 << i)) {
1630 if (!(last_rhs_inst->dst.writemask & (1 << i)))
1631 return false;
1632
1633 if (BRW_GET_SWZ(src.swizzle, i) != i)
1634 return false;
1635 }
1636 }
1637
1638 /* Success! Rewrite the instruction. */
1639 last_rhs_inst->dst.file = dst.file;
1640 last_rhs_inst->dst.reg = dst.reg;
1641 last_rhs_inst->dst.reg_offset = dst.reg_offset;
1642 last_rhs_inst->dst.reladdr = dst.reladdr;
1643 last_rhs_inst->dst.writemask &= dst.writemask;
1644
1645 return true;
1646 }
1647
1648 void
1649 vec4_visitor::visit(ir_assignment *ir)
1650 {
1651 dst_reg dst = get_assignment_lhs(ir->lhs, this);
1652 uint32_t predicate = BRW_PREDICATE_NONE;
1653
1654 if (!ir->lhs->type->is_scalar() &&
1655 !ir->lhs->type->is_vector()) {
1656 ir->rhs->accept(this);
1657 src_reg src = this->result;
1658
1659 if (ir->condition) {
1660 emit_bool_to_cond_code(ir->condition, &predicate);
1661 }
1662
1663 /* emit_block_move doesn't account for swizzles in the source register.
1664 * This should be ok, since the source register is a structure or an
1665 * array, and those can't be swizzled. But double-check to be sure.
1666 */
1667 assert(src.swizzle ==
1668 (ir->rhs->type->is_matrix()
1669 ? swizzle_for_size(ir->rhs->type->vector_elements)
1670 : BRW_SWIZZLE_NOOP));
1671
1672 emit_block_move(&dst, &src, ir->rhs->type, predicate);
1673 return;
1674 }
1675
1676 /* Now we're down to just a scalar/vector with writemasks. */
1677 int i;
1678
1679 vec4_instruction *pre_rhs_inst, *last_rhs_inst;
1680 pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1681
1682 ir->rhs->accept(this);
1683
1684 last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1685
1686 src_reg src = this->result;
1687
1688 int swizzles[4];
1689 int first_enabled_chan = 0;
1690 int src_chan = 0;
1691
1692 assert(ir->lhs->type->is_vector() ||
1693 ir->lhs->type->is_scalar());
1694 dst.writemask = ir->write_mask;
1695
1696 for (int i = 0; i < 4; i++) {
1697 if (dst.writemask & (1 << i)) {
1698 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1699 break;
1700 }
1701 }
1702
1703 /* Swizzle a small RHS vector into the channels being written.
1704 *
1705 * glsl ir treats write_mask as dictating how many channels are
1706 * present on the RHS while in our instructions we need to make
1707 * those channels appear in the slots of the vec4 they're written to.
1708 */
1709 for (int i = 0; i < 4; i++) {
1710 if (dst.writemask & (1 << i))
1711 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1712 else
1713 swizzles[i] = first_enabled_chan;
1714 }
1715 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1716 swizzles[2], swizzles[3]);
1717
1718 if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
1719 return;
1720 }
1721
1722 if (ir->condition) {
1723 emit_bool_to_cond_code(ir->condition, &predicate);
1724 }
1725
1726 for (i = 0; i < type_size(ir->lhs->type); i++) {
1727 vec4_instruction *inst = emit(MOV(dst, src));
1728 inst->predicate = predicate;
1729
1730 dst.reg_offset++;
1731 src.reg_offset++;
1732 }
1733 }
1734
1735 void
1736 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
1737 {
1738 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1739 foreach_list(node, &ir->components) {
1740 ir_constant *field_value = (ir_constant *)node;
1741
1742 emit_constant_values(dst, field_value);
1743 }
1744 return;
1745 }
1746
1747 if (ir->type->is_array()) {
1748 for (unsigned int i = 0; i < ir->type->length; i++) {
1749 emit_constant_values(dst, ir->array_elements[i]);
1750 }
1751 return;
1752 }
1753
1754 if (ir->type->is_matrix()) {
1755 for (int i = 0; i < ir->type->matrix_columns; i++) {
1756 float *vec = &ir->value.f[i * ir->type->vector_elements];
1757
1758 for (int j = 0; j < ir->type->vector_elements; j++) {
1759 dst->writemask = 1 << j;
1760 dst->type = BRW_REGISTER_TYPE_F;
1761
1762 emit(MOV(*dst, src_reg(vec[j])));
1763 }
1764 dst->reg_offset++;
1765 }
1766 return;
1767 }
1768
1769 int remaining_writemask = (1 << ir->type->vector_elements) - 1;
1770
1771 for (int i = 0; i < ir->type->vector_elements; i++) {
1772 if (!(remaining_writemask & (1 << i)))
1773 continue;
1774
1775 dst->writemask = 1 << i;
1776 dst->type = brw_type_for_base_type(ir->type);
1777
1778 /* Find other components that match the one we're about to
1779 * write. Emits fewer instructions for things like vec4(0.5,
1780 * 1.5, 1.5, 1.5).
1781 */
1782 for (int j = i + 1; j < ir->type->vector_elements; j++) {
1783 if (ir->type->base_type == GLSL_TYPE_BOOL) {
1784 if (ir->value.b[i] == ir->value.b[j])
1785 dst->writemask |= (1 << j);
1786 } else {
1787 /* u, i, and f storage all line up, so no need for a
1788 * switch case for comparing each type.
1789 */
1790 if (ir->value.u[i] == ir->value.u[j])
1791 dst->writemask |= (1 << j);
1792 }
1793 }
1794
1795 switch (ir->type->base_type) {
1796 case GLSL_TYPE_FLOAT:
1797 emit(MOV(*dst, src_reg(ir->value.f[i])));
1798 break;
1799 case GLSL_TYPE_INT:
1800 emit(MOV(*dst, src_reg(ir->value.i[i])));
1801 break;
1802 case GLSL_TYPE_UINT:
1803 emit(MOV(*dst, src_reg(ir->value.u[i])));
1804 break;
1805 case GLSL_TYPE_BOOL:
1806 emit(MOV(*dst, src_reg(ir->value.b[i])));
1807 break;
1808 default:
1809 assert(!"Non-float/uint/int/bool constant");
1810 break;
1811 }
1812
1813 remaining_writemask &= ~dst->writemask;
1814 }
1815 dst->reg_offset++;
1816 }
1817
1818 void
1819 vec4_visitor::visit(ir_constant *ir)
1820 {
1821 dst_reg dst = dst_reg(this, ir->type);
1822 this->result = src_reg(dst);
1823
1824 emit_constant_values(&dst, ir);
1825 }
1826
1827 void
1828 vec4_visitor::visit(ir_call *ir)
1829 {
1830 assert(!"not reached");
1831 }
1832
1833 void
1834 vec4_visitor::visit(ir_texture *ir)
1835 {
1836 int sampler = _mesa_get_sampler_uniform_value(ir->sampler, prog, &vp->Base);
1837
1838 /* Should be lowered by do_lower_texture_projection */
1839 assert(!ir->projector);
1840
1841 /* Generate code to compute all the subexpression trees. This has to be
1842 * done before loading any values into MRFs for the sampler message since
1843 * generating these values may involve SEND messages that need the MRFs.
1844 */
1845 src_reg coordinate;
1846 if (ir->coordinate) {
1847 ir->coordinate->accept(this);
1848 coordinate = this->result;
1849 }
1850
1851 src_reg shadow_comparitor;
1852 if (ir->shadow_comparitor) {
1853 ir->shadow_comparitor->accept(this);
1854 shadow_comparitor = this->result;
1855 }
1856
1857 src_reg lod, dPdx, dPdy;
1858 switch (ir->op) {
1859 case ir_txf:
1860 case ir_txl:
1861 case ir_txs:
1862 ir->lod_info.lod->accept(this);
1863 lod = this->result;
1864 break;
1865 case ir_txd:
1866 ir->lod_info.grad.dPdx->accept(this);
1867 dPdx = this->result;
1868
1869 ir->lod_info.grad.dPdy->accept(this);
1870 dPdy = this->result;
1871 break;
1872 case ir_tex:
1873 case ir_txb:
1874 break;
1875 }
1876
1877 vec4_instruction *inst = NULL;
1878 switch (ir->op) {
1879 case ir_tex:
1880 case ir_txl:
1881 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXL);
1882 break;
1883 case ir_txd:
1884 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXD);
1885 break;
1886 case ir_txf:
1887 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF);
1888 break;
1889 case ir_txs:
1890 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXS);
1891 break;
1892 case ir_txb:
1893 assert(!"TXB is not valid for vertex shaders.");
1894 }
1895
1896 /* Texel offsets go in the message header; Gen4 also requires headers. */
1897 inst->header_present = ir->offset || intel->gen < 5;
1898 inst->base_mrf = 2;
1899 inst->mlen = inst->header_present + 1; /* always at least one */
1900 inst->sampler = sampler;
1901 inst->dst = dst_reg(this, ir->type);
1902 inst->shadow_compare = ir->shadow_comparitor != NULL;
1903
1904 if (ir->offset != NULL && ir->op != ir_txf)
1905 inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
1906
1907 /* MRF for the first parameter */
1908 int param_base = inst->base_mrf + inst->header_present;
1909
1910 if (ir->op == ir_txs) {
1911 int writemask = intel->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
1912 emit(MOV(dst_reg(MRF, param_base, ir->lod_info.lod->type, writemask),
1913 lod));
1914 } else {
1915 int i, coord_mask = 0, zero_mask = 0;
1916 /* Load the coordinate */
1917 /* FINISHME: gl_clamp_mask and saturate */
1918 for (i = 0; i < ir->coordinate->type->vector_elements; i++)
1919 coord_mask |= (1 << i);
1920 for (; i < 4; i++)
1921 zero_mask |= (1 << i);
1922
1923 if (ir->offset && ir->op == ir_txf) {
1924 /* It appears that the ld instruction used for txf does its
1925 * address bounds check before adding in the offset. To work
1926 * around this, just add the integer offset to the integer
1927 * texel coordinate, and don't put the offset in the header.
1928 */
1929 ir_constant *offset = ir->offset->as_constant();
1930 assert(offset);
1931
1932 for (int j = 0; j < ir->coordinate->type->vector_elements; j++) {
1933 src_reg src = coordinate;
1934 src.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(src.swizzle, j),
1935 BRW_GET_SWZ(src.swizzle, j),
1936 BRW_GET_SWZ(src.swizzle, j),
1937 BRW_GET_SWZ(src.swizzle, j));
1938 emit(ADD(dst_reg(MRF, param_base, ir->coordinate->type, 1 << j),
1939 src, offset->value.i[j]));
1940 }
1941 } else {
1942 emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
1943 coordinate));
1944 }
1945 emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
1946 src_reg(0)));
1947 /* Load the shadow comparitor */
1948 if (ir->shadow_comparitor) {
1949 emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
1950 WRITEMASK_X),
1951 shadow_comparitor));
1952 inst->mlen++;
1953 }
1954
1955 /* Load the LOD info */
1956 if (ir->op == ir_txl) {
1957 int mrf, writemask;
1958 if (intel->gen >= 5) {
1959 mrf = param_base + 1;
1960 if (ir->shadow_comparitor) {
1961 writemask = WRITEMASK_Y;
1962 /* mlen already incremented */
1963 } else {
1964 writemask = WRITEMASK_X;
1965 inst->mlen++;
1966 }
1967 } else /* intel->gen == 4 */ {
1968 mrf = param_base;
1969 writemask = WRITEMASK_Z;
1970 }
1971 emit(MOV(dst_reg(MRF, mrf, ir->lod_info.lod->type, writemask), lod));
1972 } else if (ir->op == ir_txf) {
1973 emit(MOV(dst_reg(MRF, param_base, ir->lod_info.lod->type, WRITEMASK_W),
1974 lod));
1975 } else if (ir->op == ir_txd) {
1976 const glsl_type *type = ir->lod_info.grad.dPdx->type;
1977
1978 if (intel->gen >= 5) {
1979 dPdx.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
1980 dPdy.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
1981 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), dPdx));
1982 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), dPdy));
1983 inst->mlen++;
1984
1985 if (ir->type->vector_elements == 3) {
1986 dPdx.swizzle = BRW_SWIZZLE_ZZZZ;
1987 dPdy.swizzle = BRW_SWIZZLE_ZZZZ;
1988 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), dPdx));
1989 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), dPdy));
1990 inst->mlen++;
1991 }
1992 } else /* intel->gen == 4 */ {
1993 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), dPdx));
1994 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), dPdy));
1995 inst->mlen += 2;
1996 }
1997 }
1998 }
1999
2000 emit(inst);
2001
2002 swizzle_result(ir, src_reg(inst->dst), sampler);
2003 }
2004
2005 void
2006 vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, int sampler)
2007 {
2008 this->result = orig_val;
2009
2010 int s = c->key.tex.swizzles[sampler];
2011
2012 if (ir->op == ir_txs || ir->type == glsl_type::float_type
2013 || s == SWIZZLE_NOOP)
2014 return;
2015
2016 int zero_mask = 0, one_mask = 0, copy_mask = 0;
2017 int swizzle[4];
2018
2019 for (int i = 0; i < 4; i++) {
2020 switch (GET_SWZ(s, i)) {
2021 case SWIZZLE_ZERO:
2022 zero_mask |= (1 << i);
2023 break;
2024 case SWIZZLE_ONE:
2025 one_mask |= (1 << i);
2026 break;
2027 default:
2028 copy_mask |= (1 << i);
2029 swizzle[i] = GET_SWZ(s, i);
2030 break;
2031 }
2032 }
2033
2034 this->result = src_reg(this, ir->type);
2035 dst_reg swizzled_result(this->result);
2036
2037 if (copy_mask) {
2038 orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
2039 swizzled_result.writemask = copy_mask;
2040 emit(MOV(swizzled_result, orig_val));
2041 }
2042
2043 if (zero_mask) {
2044 swizzled_result.writemask = zero_mask;
2045 emit(MOV(swizzled_result, src_reg(0.0f)));
2046 }
2047
2048 if (one_mask) {
2049 swizzled_result.writemask = one_mask;
2050 emit(MOV(swizzled_result, src_reg(1.0f)));
2051 }
2052 }
2053
2054 void
2055 vec4_visitor::visit(ir_return *ir)
2056 {
2057 assert(!"not reached");
2058 }
2059
2060 void
2061 vec4_visitor::visit(ir_discard *ir)
2062 {
2063 assert(!"not reached");
2064 }
2065
2066 void
2067 vec4_visitor::visit(ir_if *ir)
2068 {
2069 /* Don't point the annotation at the if statement, because then it plus
2070 * the then and else blocks get printed.
2071 */
2072 this->base_ir = ir->condition;
2073
2074 if (intel->gen == 6) {
2075 emit_if_gen6(ir);
2076 } else {
2077 uint32_t predicate;
2078 emit_bool_to_cond_code(ir->condition, &predicate);
2079 emit(IF(predicate));
2080 }
2081
2082 visit_instructions(&ir->then_instructions);
2083
2084 if (!ir->else_instructions.is_empty()) {
2085 this->base_ir = ir->condition;
2086 emit(BRW_OPCODE_ELSE);
2087
2088 visit_instructions(&ir->else_instructions);
2089 }
2090
2091 this->base_ir = ir->condition;
2092 emit(BRW_OPCODE_ENDIF);
2093 }
2094
2095 void
2096 vec4_visitor::emit_ndc_computation()
2097 {
2098 /* Get the position */
2099 src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
2100
2101 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
2102 dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
2103 output_reg[BRW_VERT_RESULT_NDC] = ndc;
2104
2105 current_annotation = "NDC";
2106 dst_reg ndc_w = ndc;
2107 ndc_w.writemask = WRITEMASK_W;
2108 src_reg pos_w = pos;
2109 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
2110 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
2111
2112 dst_reg ndc_xyz = ndc;
2113 ndc_xyz.writemask = WRITEMASK_XYZ;
2114
2115 emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
2116 }
2117
2118 void
2119 vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
2120 {
2121 if (intel->gen < 6 &&
2122 ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
2123 c->key.userclip_active || brw->has_negative_rhw_bug)) {
2124 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
2125 dst_reg header1_w = header1;
2126 header1_w.writemask = WRITEMASK_W;
2127 GLuint i;
2128
2129 emit(MOV(header1, 0u));
2130
2131 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
2132 src_reg psiz = src_reg(output_reg[VERT_RESULT_PSIZ]);
2133
2134 current_annotation = "Point size";
2135 emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
2136 emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
2137 }
2138
2139 current_annotation = "Clipping flags";
2140 for (i = 0; i < c->key.nr_userclip_plane_consts; i++) {
2141 vec4_instruction *inst;
2142
2143 inst = emit(DP4(dst_null_f(), src_reg(output_reg[VERT_RESULT_HPOS]),
2144 src_reg(this->userplane[i])));
2145 inst->conditional_mod = BRW_CONDITIONAL_L;
2146
2147 inst = emit(OR(header1_w, src_reg(header1_w), 1u << i));
2148 inst->predicate = BRW_PREDICATE_NORMAL;
2149 }
2150
2151 /* i965 clipping workaround:
2152 * 1) Test for -ve rhw
2153 * 2) If set,
2154 * set ndc = (0,0,0,0)
2155 * set ucp[6] = 1
2156 *
2157 * Later, clipping will detect ucp[6] and ensure the primitive is
2158 * clipped against all fixed planes.
2159 */
2160 if (brw->has_negative_rhw_bug) {
2161 #if 0
2162 /* FINISHME */
2163 brw_CMP(p,
2164 vec8(brw_null_reg()),
2165 BRW_CONDITIONAL_L,
2166 brw_swizzle1(output_reg[BRW_VERT_RESULT_NDC], 3),
2167 brw_imm_f(0));
2168
2169 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
2170 brw_MOV(p, output_reg[BRW_VERT_RESULT_NDC], brw_imm_f(0));
2171 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2172 #endif
2173 }
2174
2175 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
2176 } else if (intel->gen < 6) {
2177 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
2178 } else {
2179 emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
2180 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
2181 emit(MOV(brw_writemask(reg, WRITEMASK_W),
2182 src_reg(output_reg[VERT_RESULT_PSIZ])));
2183 }
2184 }
2185 }
2186
2187 void
2188 vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset)
2189 {
2190 if (intel->gen < 6) {
2191 /* Clip distance slots are set aside in gen5, but they are not used. It
2192 * is not clear whether we actually need to set aside space for them,
2193 * but the performance cost is negligible.
2194 */
2195 return;
2196 }
2197
2198 /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
2199 *
2200 * "If a linked set of shaders forming the vertex stage contains no
2201 * static write to gl_ClipVertex or gl_ClipDistance, but the
2202 * application has requested clipping against user clip planes through
2203 * the API, then the coordinate written to gl_Position is used for
2204 * comparison against the user clip planes."
2205 *
2206 * This function is only called if the shader didn't write to
2207 * gl_ClipDistance. Accordingly, we use gl_ClipVertex to perform clipping
2208 * if the user wrote to it; otherwise we use gl_Position.
2209 */
2210 gl_vert_result clip_vertex = VERT_RESULT_CLIP_VERTEX;
2211 if (!(c->prog_data.outputs_written
2212 & BITFIELD64_BIT(VERT_RESULT_CLIP_VERTEX))) {
2213 clip_vertex = VERT_RESULT_HPOS;
2214 }
2215
2216 for (int i = 0; i + offset < c->key.nr_userclip_plane_consts && i < 4;
2217 ++i) {
2218 emit(DP4(dst_reg(brw_writemask(reg, 1 << i)),
2219 src_reg(output_reg[clip_vertex]),
2220 src_reg(this->userplane[i + offset])));
2221 }
2222 }
2223
2224 void
2225 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int vert_result)
2226 {
2227 assert (vert_result < VERT_RESULT_MAX);
2228 reg.type = output_reg[vert_result].type;
2229 current_annotation = output_reg_annotation[vert_result];
2230 /* Copy the register, saturating if necessary */
2231 vec4_instruction *inst = emit(MOV(reg,
2232 src_reg(output_reg[vert_result])));
2233 if ((vert_result == VERT_RESULT_COL0 ||
2234 vert_result == VERT_RESULT_COL1 ||
2235 vert_result == VERT_RESULT_BFC0 ||
2236 vert_result == VERT_RESULT_BFC1) &&
2237 c->key.clamp_vertex_color) {
2238 inst->saturate = true;
2239 }
2240 }
2241
2242 void
2243 vec4_visitor::emit_urb_slot(int mrf, int vert_result)
2244 {
2245 struct brw_reg hw_reg = brw_message_reg(mrf);
2246 dst_reg reg = dst_reg(MRF, mrf);
2247 reg.type = BRW_REGISTER_TYPE_F;
2248
2249 switch (vert_result) {
2250 case VERT_RESULT_PSIZ:
2251 /* PSIZ is always in slot 0, and is coupled with other flags. */
2252 current_annotation = "indices, point width, clip flags";
2253 emit_psiz_and_flags(hw_reg);
2254 break;
2255 case BRW_VERT_RESULT_NDC:
2256 current_annotation = "NDC";
2257 emit(MOV(reg, src_reg(output_reg[BRW_VERT_RESULT_NDC])));
2258 break;
2259 case BRW_VERT_RESULT_HPOS_DUPLICATE:
2260 case VERT_RESULT_HPOS:
2261 current_annotation = "gl_Position";
2262 emit(MOV(reg, src_reg(output_reg[VERT_RESULT_HPOS])));
2263 break;
2264 case VERT_RESULT_CLIP_DIST0:
2265 case VERT_RESULT_CLIP_DIST1:
2266 if (this->c->key.uses_clip_distance) {
2267 emit_generic_urb_slot(reg, vert_result);
2268 } else {
2269 current_annotation = "user clip distances";
2270 emit_clip_distances(hw_reg, (vert_result - VERT_RESULT_CLIP_DIST0) * 4);
2271 }
2272 break;
2273 case VERT_RESULT_EDGE:
2274 /* This is present when doing unfilled polygons. We're supposed to copy
2275 * the edge flag from the user-provided vertex array
2276 * (glEdgeFlagPointer), or otherwise we'll copy from the current value
2277 * of that attribute (starts as 1.0f). This is then used in clipping to
2278 * determine which edges should be drawn as wireframe.
2279 */
2280 current_annotation = "edge flag";
2281 emit(MOV(reg, src_reg(dst_reg(ATTR, VERT_ATTRIB_EDGEFLAG,
2282 glsl_type::float_type, WRITEMASK_XYZW))));
2283 break;
2284 case BRW_VERT_RESULT_PAD:
2285 /* No need to write to this slot */
2286 break;
2287 default:
2288 emit_generic_urb_slot(reg, vert_result);
2289 break;
2290 }
2291 }
2292
2293 static int
2294 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
2295 {
2296 struct intel_context *intel = &brw->intel;
2297
2298 if (intel->gen >= 6) {
2299 /* URB data written (does not include the message header reg) must
2300 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
2301 * section 5.4.3.2.2: URB_INTERLEAVED.
2302 *
2303 * URB entries are allocated on a multiple of 1024 bits, so an
2304 * extra 128 bits written here to make the end align to 256 is
2305 * no problem.
2306 */
2307 if ((mlen % 2) != 1)
2308 mlen++;
2309 }
2310
2311 return mlen;
2312 }
2313
2314 /**
2315 * Generates the VUE payload plus the 1 or 2 URB write instructions to
2316 * complete the VS thread.
2317 *
2318 * The VUE layout is documented in Volume 2a.
2319 */
2320 void
2321 vec4_visitor::emit_urb_writes()
2322 {
2323 /* MRF 0 is reserved for the debugger, so start with message header
2324 * in MRF 1.
2325 */
2326 int base_mrf = 1;
2327 int mrf = base_mrf;
2328 /* In the process of generating our URB write message contents, we
2329 * may need to unspill a register or load from an array. Those
2330 * reads would use MRFs 14-15.
2331 */
2332 int max_usable_mrf = 13;
2333
2334 /* The following assertion verifies that max_usable_mrf causes an
2335 * even-numbered amount of URB write data, which will meet gen6's
2336 * requirements for length alignment.
2337 */
2338 assert ((max_usable_mrf - base_mrf) % 2 == 0);
2339
2340 /* First mrf is the g0-based message header containing URB handles and such,
2341 * which is implied in VS_OPCODE_URB_WRITE.
2342 */
2343 mrf++;
2344
2345 if (intel->gen < 6) {
2346 emit_ndc_computation();
2347 }
2348
2349 /* Set up the VUE data for the first URB write */
2350 int slot;
2351 for (slot = 0; slot < c->prog_data.vue_map.num_slots; ++slot) {
2352 emit_urb_slot(mrf++, c->prog_data.vue_map.slot_to_vert_result[slot]);
2353
2354 /* If this was max_usable_mrf, we can't fit anything more into this URB
2355 * WRITE.
2356 */
2357 if (mrf > max_usable_mrf) {
2358 slot++;
2359 break;
2360 }
2361 }
2362
2363 current_annotation = "URB write";
2364 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
2365 inst->base_mrf = base_mrf;
2366 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2367 inst->eot = (slot >= c->prog_data.vue_map.num_slots);
2368
2369 /* Optional second URB write */
2370 if (!inst->eot) {
2371 mrf = base_mrf + 1;
2372
2373 for (; slot < c->prog_data.vue_map.num_slots; ++slot) {
2374 assert(mrf < max_usable_mrf);
2375
2376 emit_urb_slot(mrf++, c->prog_data.vue_map.slot_to_vert_result[slot]);
2377 }
2378
2379 current_annotation = "URB write";
2380 inst = emit(VS_OPCODE_URB_WRITE);
2381 inst->base_mrf = base_mrf;
2382 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2383 inst->eot = true;
2384 /* URB destination offset. In the previous write, we got MRFs
2385 * 2-13 minus the one header MRF, so 12 regs. URB offset is in
2386 * URB row increments, and each of our MRFs is half of one of
2387 * those, since we're doing interleaved writes.
2388 */
2389 inst->offset = (max_usable_mrf - base_mrf) / 2;
2390 }
2391 }
2392
2393 src_reg
2394 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
2395 src_reg *reladdr, int reg_offset)
2396 {
2397 /* Because we store the values to scratch interleaved like our
2398 * vertex data, we need to scale the vec4 index by 2.
2399 */
2400 int message_header_scale = 2;
2401
2402 /* Pre-gen6, the message header uses byte offsets instead of vec4
2403 * (16-byte) offset units.
2404 */
2405 if (intel->gen < 6)
2406 message_header_scale *= 16;
2407
2408 if (reladdr) {
2409 src_reg index = src_reg(this, glsl_type::int_type);
2410
2411 emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2412 emit_before(inst, MUL(dst_reg(index),
2413 index, src_reg(message_header_scale)));
2414
2415 return index;
2416 } else {
2417 return src_reg(reg_offset * message_header_scale);
2418 }
2419 }
2420
2421 src_reg
2422 vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
2423 src_reg *reladdr, int reg_offset)
2424 {
2425 if (reladdr) {
2426 src_reg index = src_reg(this, glsl_type::int_type);
2427
2428 emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2429
2430 /* Pre-gen6, the message header uses byte offsets instead of vec4
2431 * (16-byte) offset units.
2432 */
2433 if (intel->gen < 6) {
2434 emit_before(inst, MUL(dst_reg(index), index, src_reg(16)));
2435 }
2436
2437 return index;
2438 } else {
2439 int message_header_scale = intel->gen < 6 ? 16 : 1;
2440 return src_reg(reg_offset * message_header_scale);
2441 }
2442 }
2443
2444 /**
2445 * Emits an instruction before @inst to load the value named by @orig_src
2446 * from scratch space at @base_offset to @temp.
2447 *
2448 * @base_offset is measured in 32-byte units (the size of a register).
2449 */
2450 void
2451 vec4_visitor::emit_scratch_read(vec4_instruction *inst,
2452 dst_reg temp, src_reg orig_src,
2453 int base_offset)
2454 {
2455 int reg_offset = base_offset + orig_src.reg_offset;
2456 src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
2457
2458 emit_before(inst, SCRATCH_READ(temp, index));
2459 }
2460
2461 /**
2462 * Emits an instruction after @inst to store the value to be written
2463 * to @orig_dst to scratch space at @base_offset, from @temp.
2464 *
2465 * @base_offset is measured in 32-byte units (the size of a register).
2466 */
2467 void
2468 vec4_visitor::emit_scratch_write(vec4_instruction *inst, int base_offset)
2469 {
2470 int reg_offset = base_offset + inst->dst.reg_offset;
2471 src_reg index = get_scratch_offset(inst, inst->dst.reladdr, reg_offset);
2472
2473 /* Create a temporary register to store *inst's result in.
2474 *
2475 * We have to be careful in MOVing from our temporary result register in
2476 * the scratch write. If we swizzle from channels of the temporary that
2477 * weren't initialized, it will confuse live interval analysis, which will
2478 * make spilling fail to make progress.
2479 */
2480 src_reg temp = src_reg(this, glsl_type::vec4_type);
2481 temp.type = inst->dst.type;
2482 int first_writemask_chan = ffs(inst->dst.writemask) - 1;
2483 int swizzles[4];
2484 for (int i = 0; i < 4; i++)
2485 if (inst->dst.writemask & (1 << i))
2486 swizzles[i] = i;
2487 else
2488 swizzles[i] = first_writemask_chan;
2489 temp.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
2490 swizzles[2], swizzles[3]);
2491
2492 dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
2493 inst->dst.writemask));
2494 vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
2495 write->predicate = inst->predicate;
2496 write->ir = inst->ir;
2497 write->annotation = inst->annotation;
2498 inst->insert_after(write);
2499
2500 inst->dst.file = temp.file;
2501 inst->dst.reg = temp.reg;
2502 inst->dst.reg_offset = temp.reg_offset;
2503 inst->dst.reladdr = NULL;
2504 }
2505
2506 /**
2507 * We can't generally support array access in GRF space, because a
2508 * single instruction's destination can only span 2 contiguous
2509 * registers. So, we send all GRF arrays that get variable index
2510 * access to scratch space.
2511 */
2512 void
2513 vec4_visitor::move_grf_array_access_to_scratch()
2514 {
2515 int scratch_loc[this->virtual_grf_count];
2516
2517 for (int i = 0; i < this->virtual_grf_count; i++) {
2518 scratch_loc[i] = -1;
2519 }
2520
2521 /* First, calculate the set of virtual GRFs that need to be punted
2522 * to scratch due to having any array access on them, and where in
2523 * scratch.
2524 */
2525 foreach_list(node, &this->instructions) {
2526 vec4_instruction *inst = (vec4_instruction *)node;
2527
2528 if (inst->dst.file == GRF && inst->dst.reladdr &&
2529 scratch_loc[inst->dst.reg] == -1) {
2530 scratch_loc[inst->dst.reg] = c->last_scratch;
2531 c->last_scratch += this->virtual_grf_sizes[inst->dst.reg];
2532 }
2533
2534 for (int i = 0 ; i < 3; i++) {
2535 src_reg *src = &inst->src[i];
2536
2537 if (src->file == GRF && src->reladdr &&
2538 scratch_loc[src->reg] == -1) {
2539 scratch_loc[src->reg] = c->last_scratch;
2540 c->last_scratch += this->virtual_grf_sizes[src->reg];
2541 }
2542 }
2543 }
2544
2545 /* Now, for anything that will be accessed through scratch, rewrite
2546 * it to load/store. Note that this is a _safe list walk, because
2547 * we may generate a new scratch_write instruction after the one
2548 * we're processing.
2549 */
2550 foreach_list_safe(node, &this->instructions) {
2551 vec4_instruction *inst = (vec4_instruction *)node;
2552
2553 /* Set up the annotation tracking for new generated instructions. */
2554 base_ir = inst->ir;
2555 current_annotation = inst->annotation;
2556
2557 if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
2558 emit_scratch_write(inst, scratch_loc[inst->dst.reg]);
2559 }
2560
2561 for (int i = 0 ; i < 3; i++) {
2562 if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
2563 continue;
2564
2565 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2566
2567 emit_scratch_read(inst, temp, inst->src[i],
2568 scratch_loc[inst->src[i].reg]);
2569
2570 inst->src[i].file = temp.file;
2571 inst->src[i].reg = temp.reg;
2572 inst->src[i].reg_offset = temp.reg_offset;
2573 inst->src[i].reladdr = NULL;
2574 }
2575 }
2576 }
2577
2578 /**
2579 * Emits an instruction before @inst to load the value named by @orig_src
2580 * from the pull constant buffer (surface) at @base_offset to @temp.
2581 */
2582 void
2583 vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
2584 dst_reg temp, src_reg orig_src,
2585 int base_offset)
2586 {
2587 int reg_offset = base_offset + orig_src.reg_offset;
2588 src_reg index = src_reg((unsigned)SURF_INDEX_VERT_CONST_BUFFER);
2589 src_reg offset = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
2590 vec4_instruction *load;
2591
2592 load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
2593 temp, index, offset);
2594 load->base_mrf = 14;
2595 load->mlen = 1;
2596 emit_before(inst, load);
2597 }
2598
2599 /**
2600 * Implements array access of uniforms by inserting a
2601 * PULL_CONSTANT_LOAD instruction.
2602 *
2603 * Unlike temporary GRF array access (where we don't support it due to
2604 * the difficulty of doing relative addressing on instruction
2605 * destinations), we could potentially do array access of uniforms
2606 * that were loaded in GRF space as push constants. In real-world
2607 * usage we've seen, though, the arrays being used are always larger
2608 * than we could load as push constants, so just always move all
2609 * uniform array access out to a pull constant buffer.
2610 */
2611 void
2612 vec4_visitor::move_uniform_array_access_to_pull_constants()
2613 {
2614 int pull_constant_loc[this->uniforms];
2615
2616 for (int i = 0; i < this->uniforms; i++) {
2617 pull_constant_loc[i] = -1;
2618 }
2619
2620 /* Walk through and find array access of uniforms. Put a copy of that
2621 * uniform in the pull constant buffer.
2622 *
2623 * Note that we don't move constant-indexed accesses to arrays. No
2624 * testing has been done of the performance impact of this choice.
2625 */
2626 foreach_list_safe(node, &this->instructions) {
2627 vec4_instruction *inst = (vec4_instruction *)node;
2628
2629 for (int i = 0 ; i < 3; i++) {
2630 if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
2631 continue;
2632
2633 int uniform = inst->src[i].reg;
2634
2635 /* If this array isn't already present in the pull constant buffer,
2636 * add it.
2637 */
2638 if (pull_constant_loc[uniform] == -1) {
2639 const float **values = &prog_data->param[uniform * 4];
2640
2641 pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
2642
2643 for (int j = 0; j < uniform_size[uniform] * 4; j++) {
2644 prog_data->pull_param[prog_data->nr_pull_params++] = values[j];
2645 }
2646 }
2647
2648 /* Set up the annotation tracking for new generated instructions. */
2649 base_ir = inst->ir;
2650 current_annotation = inst->annotation;
2651
2652 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2653
2654 emit_pull_constant_load(inst, temp, inst->src[i],
2655 pull_constant_loc[uniform]);
2656
2657 inst->src[i].file = temp.file;
2658 inst->src[i].reg = temp.reg;
2659 inst->src[i].reg_offset = temp.reg_offset;
2660 inst->src[i].reladdr = NULL;
2661 }
2662 }
2663
2664 /* Now there are no accesses of the UNIFORM file with a reladdr, so
2665 * no need to track them as larger-than-vec4 objects. This will be
2666 * relied on in cutting out unused uniform vectors from push
2667 * constants.
2668 */
2669 split_uniform_registers();
2670 }
2671
2672 void
2673 vec4_visitor::resolve_ud_negate(src_reg *reg)
2674 {
2675 if (reg->type != BRW_REGISTER_TYPE_UD ||
2676 !reg->negate)
2677 return;
2678
2679 src_reg temp = src_reg(this, glsl_type::uvec4_type);
2680 emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
2681 *reg = temp;
2682 }
2683
2684 vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
2685 struct gl_shader_program *prog,
2686 struct brw_shader *shader)
2687 {
2688 this->c = c;
2689 this->p = &c->func;
2690 this->brw = p->brw;
2691 this->intel = &brw->intel;
2692 this->ctx = &intel->ctx;
2693 this->prog = prog;
2694 this->shader = shader;
2695
2696 this->mem_ctx = ralloc_context(NULL);
2697 this->failed = false;
2698
2699 this->base_ir = NULL;
2700 this->current_annotation = NULL;
2701
2702 this->c = c;
2703 this->vp = (struct gl_vertex_program *)
2704 prog->_LinkedShaders[MESA_SHADER_VERTEX]->Program;
2705 this->prog_data = &c->prog_data;
2706
2707 this->variable_ht = hash_table_ctor(0,
2708 hash_table_pointer_hash,
2709 hash_table_pointer_compare);
2710
2711 this->virtual_grf_def = NULL;
2712 this->virtual_grf_use = NULL;
2713 this->virtual_grf_sizes = NULL;
2714 this->virtual_grf_count = 0;
2715 this->virtual_grf_reg_map = NULL;
2716 this->virtual_grf_reg_count = 0;
2717 this->virtual_grf_array_size = 0;
2718 this->live_intervals_valid = false;
2719
2720 this->max_grf = intel->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
2721
2722 this->uniforms = 0;
2723 }
2724
2725 vec4_visitor::~vec4_visitor()
2726 {
2727 ralloc_free(this->mem_ctx);
2728 hash_table_dtor(this->variable_ht);
2729 }
2730
2731
2732 void
2733 vec4_visitor::fail(const char *format, ...)
2734 {
2735 va_list va;
2736 char *msg;
2737
2738 if (failed)
2739 return;
2740
2741 failed = true;
2742
2743 va_start(va, format);
2744 msg = ralloc_vasprintf(mem_ctx, format, va);
2745 va_end(va);
2746 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
2747
2748 this->fail_msg = msg;
2749
2750 if (INTEL_DEBUG & DEBUG_VS) {
2751 fprintf(stderr, "%s", msg);
2752 }
2753 }
2754
2755 } /* namespace brw */