i965/vs: Replace brw_vs_emit.c with dumping code into the vec4_visitor.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_visitor.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25 extern "C" {
26 #include "main/macros.h"
27 #include "program/prog_parameter.h"
28 #include "program/sampler.h"
29 }
30
31 namespace brw {
32
33 vec4_instruction::vec4_instruction(vec4_visitor *v,
34 enum opcode opcode, dst_reg dst,
35 src_reg src0, src_reg src1, src_reg src2)
36 {
37 this->opcode = opcode;
38 this->dst = dst;
39 this->src[0] = src0;
40 this->src[1] = src1;
41 this->src[2] = src2;
42 this->ir = v->base_ir;
43 this->annotation = v->current_annotation;
44 }
45
46 vec4_instruction *
47 vec4_visitor::emit(vec4_instruction *inst)
48 {
49 this->instructions.push_tail(inst);
50
51 return inst;
52 }
53
54 vec4_instruction *
55 vec4_visitor::emit_before(vec4_instruction *inst, vec4_instruction *new_inst)
56 {
57 new_inst->ir = inst->ir;
58 new_inst->annotation = inst->annotation;
59
60 inst->insert_before(new_inst);
61
62 return inst;
63 }
64
65 vec4_instruction *
66 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
67 src_reg src0, src_reg src1, src_reg src2)
68 {
69 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
70 src0, src1, src2));
71 }
72
73
74 vec4_instruction *
75 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
76 {
77 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
78 }
79
80 vec4_instruction *
81 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
82 {
83 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
84 }
85
86 vec4_instruction *
87 vec4_visitor::emit(enum opcode opcode)
88 {
89 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
90 }
91
92 #define ALU1(op) \
93 vec4_instruction * \
94 vec4_visitor::op(dst_reg dst, src_reg src0) \
95 { \
96 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
97 src0); \
98 }
99
100 #define ALU2(op) \
101 vec4_instruction * \
102 vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \
103 { \
104 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
105 src0, src1); \
106 }
107
108 ALU1(NOT)
109 ALU1(MOV)
110 ALU1(FRC)
111 ALU1(RNDD)
112 ALU1(RNDE)
113 ALU1(RNDZ)
114 ALU2(ADD)
115 ALU2(MUL)
116 ALU2(MACH)
117 ALU2(AND)
118 ALU2(OR)
119 ALU2(XOR)
120 ALU2(DP3)
121 ALU2(DP4)
122 ALU2(DPH)
123
124 /** Gen4 predicated IF. */
125 vec4_instruction *
126 vec4_visitor::IF(uint32_t predicate)
127 {
128 vec4_instruction *inst;
129
130 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF);
131 inst->predicate = predicate;
132
133 return inst;
134 }
135
136 /** Gen6+ IF with embedded comparison. */
137 vec4_instruction *
138 vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition)
139 {
140 assert(intel->gen >= 6);
141
142 vec4_instruction *inst;
143
144 resolve_ud_negate(&src0);
145 resolve_ud_negate(&src1);
146
147 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF, dst_null_d(),
148 src0, src1);
149 inst->conditional_mod = condition;
150
151 return inst;
152 }
153
154 /**
155 * CMP: Sets the low bit of the destination channels with the result
156 * of the comparison, while the upper bits are undefined, and updates
157 * the flag register with the packed 16 bits of the result.
158 */
159 vec4_instruction *
160 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition)
161 {
162 vec4_instruction *inst;
163
164 /* original gen4 does type conversion to the destination type
165 * before before comparison, producing garbage results for floating
166 * point comparisons.
167 */
168 if (intel->gen == 4) {
169 dst.type = src0.type;
170 if (dst.file == HW_REG)
171 dst.fixed_hw_reg.type = dst.type;
172 }
173
174 resolve_ud_negate(&src0);
175 resolve_ud_negate(&src1);
176
177 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_CMP, dst, src0, src1);
178 inst->conditional_mod = condition;
179
180 return inst;
181 }
182
183 vec4_instruction *
184 vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
185 {
186 vec4_instruction *inst;
187
188 inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_READ,
189 dst, index);
190 inst->base_mrf = 14;
191 inst->mlen = 2;
192
193 return inst;
194 }
195
196 vec4_instruction *
197 vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index)
198 {
199 vec4_instruction *inst;
200
201 inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_WRITE,
202 dst, src, index);
203 inst->base_mrf = 13;
204 inst->mlen = 3;
205
206 return inst;
207 }
208
209 void
210 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
211 {
212 static enum opcode dot_opcodes[] = {
213 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
214 };
215
216 emit(dot_opcodes[elements - 2], dst, src0, src1);
217 }
218
219 void
220 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
221 {
222 /* The gen6 math instruction ignores the source modifiers --
223 * swizzle, abs, negate, and at least some parts of the register
224 * region description.
225 *
226 * While it would seem that this MOV could be avoided at this point
227 * in the case that the swizzle is matched up with the destination
228 * writemask, note that uniform packing and register allocation
229 * could rearrange our swizzle, so let's leave this matter up to
230 * copy propagation later.
231 */
232 src_reg temp_src = src_reg(this, glsl_type::vec4_type);
233 emit(MOV(dst_reg(temp_src), src));
234
235 if (dst.writemask != WRITEMASK_XYZW) {
236 /* The gen6 math instruction must be align1, so we can't do
237 * writemasks.
238 */
239 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
240
241 emit(opcode, temp_dst, temp_src);
242
243 emit(MOV(dst, src_reg(temp_dst)));
244 } else {
245 emit(opcode, dst, temp_src);
246 }
247 }
248
249 void
250 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
251 {
252 vec4_instruction *inst = emit(opcode, dst, src);
253 inst->base_mrf = 1;
254 inst->mlen = 1;
255 }
256
257 void
258 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
259 {
260 switch (opcode) {
261 case SHADER_OPCODE_RCP:
262 case SHADER_OPCODE_RSQ:
263 case SHADER_OPCODE_SQRT:
264 case SHADER_OPCODE_EXP2:
265 case SHADER_OPCODE_LOG2:
266 case SHADER_OPCODE_SIN:
267 case SHADER_OPCODE_COS:
268 break;
269 default:
270 assert(!"not reached: bad math opcode");
271 return;
272 }
273
274 if (intel->gen >= 7) {
275 emit(opcode, dst, src);
276 } else if (intel->gen == 6) {
277 return emit_math1_gen6(opcode, dst, src);
278 } else {
279 return emit_math1_gen4(opcode, dst, src);
280 }
281 }
282
283 void
284 vec4_visitor::emit_math2_gen6(enum opcode opcode,
285 dst_reg dst, src_reg src0, src_reg src1)
286 {
287 src_reg expanded;
288
289 /* The gen6 math instruction ignores the source modifiers --
290 * swizzle, abs, negate, and at least some parts of the register
291 * region description. Move the sources to temporaries to make it
292 * generally work.
293 */
294
295 expanded = src_reg(this, glsl_type::vec4_type);
296 expanded.type = src0.type;
297 emit(MOV(dst_reg(expanded), src0));
298 src0 = expanded;
299
300 expanded = src_reg(this, glsl_type::vec4_type);
301 expanded.type = src1.type;
302 emit(MOV(dst_reg(expanded), src1));
303 src1 = expanded;
304
305 if (dst.writemask != WRITEMASK_XYZW) {
306 /* The gen6 math instruction must be align1, so we can't do
307 * writemasks.
308 */
309 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
310 temp_dst.type = dst.type;
311
312 emit(opcode, temp_dst, src0, src1);
313
314 emit(MOV(dst, src_reg(temp_dst)));
315 } else {
316 emit(opcode, dst, src0, src1);
317 }
318 }
319
320 void
321 vec4_visitor::emit_math2_gen4(enum opcode opcode,
322 dst_reg dst, src_reg src0, src_reg src1)
323 {
324 vec4_instruction *inst = emit(opcode, dst, src0, src1);
325 inst->base_mrf = 1;
326 inst->mlen = 2;
327 }
328
329 void
330 vec4_visitor::emit_math(enum opcode opcode,
331 dst_reg dst, src_reg src0, src_reg src1)
332 {
333 switch (opcode) {
334 case SHADER_OPCODE_POW:
335 case SHADER_OPCODE_INT_QUOTIENT:
336 case SHADER_OPCODE_INT_REMAINDER:
337 break;
338 default:
339 assert(!"not reached: unsupported binary math opcode");
340 return;
341 }
342
343 if (intel->gen >= 7) {
344 emit(opcode, dst, src0, src1);
345 } else if (intel->gen == 6) {
346 return emit_math2_gen6(opcode, dst, src0, src1);
347 } else {
348 return emit_math2_gen4(opcode, dst, src0, src1);
349 }
350 }
351
352 void
353 vec4_visitor::visit_instructions(const exec_list *list)
354 {
355 foreach_list(node, list) {
356 ir_instruction *ir = (ir_instruction *)node;
357
358 base_ir = ir;
359 ir->accept(this);
360 }
361 }
362
363
364 static int
365 type_size(const struct glsl_type *type)
366 {
367 unsigned int i;
368 int size;
369
370 switch (type->base_type) {
371 case GLSL_TYPE_UINT:
372 case GLSL_TYPE_INT:
373 case GLSL_TYPE_FLOAT:
374 case GLSL_TYPE_BOOL:
375 if (type->is_matrix()) {
376 return type->matrix_columns;
377 } else {
378 /* Regardless of size of vector, it gets a vec4. This is bad
379 * packing for things like floats, but otherwise arrays become a
380 * mess. Hopefully a later pass over the code can pack scalars
381 * down if appropriate.
382 */
383 return 1;
384 }
385 case GLSL_TYPE_ARRAY:
386 assert(type->length > 0);
387 return type_size(type->fields.array) * type->length;
388 case GLSL_TYPE_STRUCT:
389 size = 0;
390 for (i = 0; i < type->length; i++) {
391 size += type_size(type->fields.structure[i].type);
392 }
393 return size;
394 case GLSL_TYPE_SAMPLER:
395 /* Samplers take up one slot in UNIFORMS[], but they're baked in
396 * at link time.
397 */
398 return 1;
399 default:
400 assert(0);
401 return 0;
402 }
403 }
404
405 int
406 vec4_visitor::virtual_grf_alloc(int size)
407 {
408 if (virtual_grf_array_size <= virtual_grf_count) {
409 if (virtual_grf_array_size == 0)
410 virtual_grf_array_size = 16;
411 else
412 virtual_grf_array_size *= 2;
413 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
414 virtual_grf_array_size);
415 virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int,
416 virtual_grf_array_size);
417 }
418 virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count;
419 virtual_grf_reg_count += size;
420 virtual_grf_sizes[virtual_grf_count] = size;
421 return virtual_grf_count++;
422 }
423
424 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
425 {
426 init();
427
428 this->file = GRF;
429 this->reg = v->virtual_grf_alloc(type_size(type));
430
431 if (type->is_array() || type->is_record()) {
432 this->swizzle = BRW_SWIZZLE_NOOP;
433 } else {
434 this->swizzle = swizzle_for_size(type->vector_elements);
435 }
436
437 this->type = brw_type_for_base_type(type);
438 }
439
440 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
441 {
442 init();
443
444 this->file = GRF;
445 this->reg = v->virtual_grf_alloc(type_size(type));
446
447 if (type->is_array() || type->is_record()) {
448 this->writemask = WRITEMASK_XYZW;
449 } else {
450 this->writemask = (1 << type->vector_elements) - 1;
451 }
452
453 this->type = brw_type_for_base_type(type);
454 }
455
456 /* Our support for uniforms is piggy-backed on the struct
457 * gl_fragment_program, because that's where the values actually
458 * get stored, rather than in some global gl_shader_program uniform
459 * store.
460 */
461 int
462 vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
463 {
464 unsigned int offset = 0;
465 float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
466
467 if (type->is_matrix()) {
468 const glsl_type *column = type->column_type();
469
470 for (unsigned int i = 0; i < type->matrix_columns; i++) {
471 offset += setup_uniform_values(loc + offset, column);
472 }
473
474 return offset;
475 }
476
477 switch (type->base_type) {
478 case GLSL_TYPE_FLOAT:
479 case GLSL_TYPE_UINT:
480 case GLSL_TYPE_INT:
481 case GLSL_TYPE_BOOL:
482 for (unsigned int i = 0; i < type->vector_elements; i++) {
483 c->prog_data.param[this->uniforms * 4 + i] = &values[i];
484 }
485
486 /* Set up pad elements to get things aligned to a vec4 boundary. */
487 for (unsigned int i = type->vector_elements; i < 4; i++) {
488 static float zero = 0;
489
490 c->prog_data.param[this->uniforms * 4 + i] = &zero;
491 }
492
493 /* Track the size of this uniform vector, for future packing of
494 * uniforms.
495 */
496 this->uniform_vector_size[this->uniforms] = type->vector_elements;
497 this->uniforms++;
498
499 return 1;
500
501 case GLSL_TYPE_STRUCT:
502 for (unsigned int i = 0; i < type->length; i++) {
503 offset += setup_uniform_values(loc + offset,
504 type->fields.structure[i].type);
505 }
506 return offset;
507
508 case GLSL_TYPE_ARRAY:
509 for (unsigned int i = 0; i < type->length; i++) {
510 offset += setup_uniform_values(loc + offset, type->fields.array);
511 }
512 return offset;
513
514 case GLSL_TYPE_SAMPLER:
515 /* The sampler takes up a slot, but we don't use any values from it. */
516 return 1;
517
518 default:
519 assert(!"not reached");
520 return 0;
521 }
522 }
523
524 void
525 vec4_visitor::setup_uniform_clipplane_values()
526 {
527 gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
528
529 /* Pre-Gen6, we compact clip planes. For example, if the user
530 * enables just clip planes 0, 1, and 3, we will enable clip planes
531 * 0, 1, and 2 in the hardware, and we'll move clip plane 3 to clip
532 * plane 2. This simplifies the implementation of the Gen6 clip
533 * thread.
534 *
535 * In Gen6 and later, we don't compact clip planes, because this
536 * simplifies the implementation of gl_ClipDistance.
537 */
538 int compacted_clipplane_index = 0;
539 for (int i = 0; i < c->key.nr_userclip_plane_consts; ++i) {
540 if (intel->gen < 6 &&
541 !(c->key.userclip_planes_enabled_gen_4_5 & (1 << i))) {
542 continue;
543 }
544 this->uniform_vector_size[this->uniforms] = 4;
545 this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms);
546 this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F;
547 for (int j = 0; j < 4; ++j) {
548 c->prog_data.param[this->uniforms * 4 + j] = &clip_planes[i][j];
549 }
550 ++compacted_clipplane_index;
551 ++this->uniforms;
552 }
553 }
554
555 /* Our support for builtin uniforms is even scarier than non-builtin.
556 * It sits on top of the PROG_STATE_VAR parameters that are
557 * automatically updated from GL context state.
558 */
559 void
560 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
561 {
562 const ir_state_slot *const slots = ir->state_slots;
563 assert(ir->state_slots != NULL);
564
565 for (unsigned int i = 0; i < ir->num_state_slots; i++) {
566 /* This state reference has already been setup by ir_to_mesa,
567 * but we'll get the same index back here. We can reference
568 * ParameterValues directly, since unlike brw_fs.cpp, we never
569 * add new state references during compile.
570 */
571 int index = _mesa_add_state_reference(this->vp->Base.Parameters,
572 (gl_state_index *)slots[i].tokens);
573 float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
574
575 this->uniform_vector_size[this->uniforms] = 0;
576 /* Add each of the unique swizzled channels of the element.
577 * This will end up matching the size of the glsl_type of this field.
578 */
579 int last_swiz = -1;
580 for (unsigned int j = 0; j < 4; j++) {
581 int swiz = GET_SWZ(slots[i].swizzle, j);
582 last_swiz = swiz;
583
584 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
585 if (swiz <= last_swiz)
586 this->uniform_vector_size[this->uniforms]++;
587 }
588 this->uniforms++;
589 }
590 }
591
592 dst_reg *
593 vec4_visitor::variable_storage(ir_variable *var)
594 {
595 return (dst_reg *)hash_table_find(this->variable_ht, var);
596 }
597
598 void
599 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate)
600 {
601 ir_expression *expr = ir->as_expression();
602
603 *predicate = BRW_PREDICATE_NORMAL;
604
605 if (expr) {
606 src_reg op[2];
607 vec4_instruction *inst;
608
609 assert(expr->get_num_operands() <= 2);
610 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
611 expr->operands[i]->accept(this);
612 op[i] = this->result;
613
614 resolve_ud_negate(&op[i]);
615 }
616
617 switch (expr->operation) {
618 case ir_unop_logic_not:
619 inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
620 inst->conditional_mod = BRW_CONDITIONAL_Z;
621 break;
622
623 case ir_binop_logic_xor:
624 inst = emit(XOR(dst_null_d(), op[0], op[1]));
625 inst->conditional_mod = BRW_CONDITIONAL_NZ;
626 break;
627
628 case ir_binop_logic_or:
629 inst = emit(OR(dst_null_d(), op[0], op[1]));
630 inst->conditional_mod = BRW_CONDITIONAL_NZ;
631 break;
632
633 case ir_binop_logic_and:
634 inst = emit(AND(dst_null_d(), op[0], op[1]));
635 inst->conditional_mod = BRW_CONDITIONAL_NZ;
636 break;
637
638 case ir_unop_f2b:
639 if (intel->gen >= 6) {
640 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
641 } else {
642 inst = emit(MOV(dst_null_f(), op[0]));
643 inst->conditional_mod = BRW_CONDITIONAL_NZ;
644 }
645 break;
646
647 case ir_unop_i2b:
648 if (intel->gen >= 6) {
649 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
650 } else {
651 inst = emit(MOV(dst_null_d(), op[0]));
652 inst->conditional_mod = BRW_CONDITIONAL_NZ;
653 }
654 break;
655
656 case ir_binop_all_equal:
657 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
658 *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
659 break;
660
661 case ir_binop_any_nequal:
662 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
663 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
664 break;
665
666 case ir_unop_any:
667 inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
668 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
669 break;
670
671 case ir_binop_greater:
672 case ir_binop_gequal:
673 case ir_binop_less:
674 case ir_binop_lequal:
675 case ir_binop_equal:
676 case ir_binop_nequal:
677 emit(CMP(dst_null_d(), op[0], op[1],
678 brw_conditional_for_comparison(expr->operation)));
679 break;
680
681 default:
682 assert(!"not reached");
683 break;
684 }
685 return;
686 }
687
688 ir->accept(this);
689
690 resolve_ud_negate(&this->result);
691
692 if (intel->gen >= 6) {
693 vec4_instruction *inst = emit(AND(dst_null_d(),
694 this->result, src_reg(1)));
695 inst->conditional_mod = BRW_CONDITIONAL_NZ;
696 } else {
697 vec4_instruction *inst = emit(MOV(dst_null_d(), this->result));
698 inst->conditional_mod = BRW_CONDITIONAL_NZ;
699 }
700 }
701
702 /**
703 * Emit a gen6 IF statement with the comparison folded into the IF
704 * instruction.
705 */
706 void
707 vec4_visitor::emit_if_gen6(ir_if *ir)
708 {
709 ir_expression *expr = ir->condition->as_expression();
710
711 if (expr) {
712 src_reg op[2];
713 dst_reg temp;
714
715 assert(expr->get_num_operands() <= 2);
716 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
717 expr->operands[i]->accept(this);
718 op[i] = this->result;
719 }
720
721 switch (expr->operation) {
722 case ir_unop_logic_not:
723 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
724 return;
725
726 case ir_binop_logic_xor:
727 emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
728 return;
729
730 case ir_binop_logic_or:
731 temp = dst_reg(this, glsl_type::bool_type);
732 emit(OR(temp, op[0], op[1]));
733 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
734 return;
735
736 case ir_binop_logic_and:
737 temp = dst_reg(this, glsl_type::bool_type);
738 emit(AND(temp, op[0], op[1]));
739 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
740 return;
741
742 case ir_unop_f2b:
743 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
744 return;
745
746 case ir_unop_i2b:
747 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
748 return;
749
750 case ir_binop_greater:
751 case ir_binop_gequal:
752 case ir_binop_less:
753 case ir_binop_lequal:
754 case ir_binop_equal:
755 case ir_binop_nequal:
756 emit(IF(op[0], op[1],
757 brw_conditional_for_comparison(expr->operation)));
758 return;
759
760 case ir_binop_all_equal:
761 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
762 emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
763 return;
764
765 case ir_binop_any_nequal:
766 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
767 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
768 return;
769
770 case ir_unop_any:
771 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
772 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
773 return;
774
775 default:
776 assert(!"not reached");
777 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
778 return;
779 }
780 return;
781 }
782
783 ir->condition->accept(this);
784
785 emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
786 }
787
788 void
789 vec4_visitor::visit(ir_variable *ir)
790 {
791 dst_reg *reg = NULL;
792
793 if (variable_storage(ir))
794 return;
795
796 switch (ir->mode) {
797 case ir_var_in:
798 reg = new(mem_ctx) dst_reg(ATTR, ir->location);
799
800 /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED attributes
801 * come in as floating point conversions of the integer values.
802 */
803 for (int i = ir->location; i < ir->location + type_size(ir->type); i++) {
804 if (!c->key.gl_fixed_input_size[i])
805 continue;
806
807 dst_reg dst = *reg;
808 dst.type = brw_type_for_base_type(ir->type);
809 dst.writemask = (1 << c->key.gl_fixed_input_size[i]) - 1;
810 emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
811 }
812 break;
813
814 case ir_var_out:
815 reg = new(mem_ctx) dst_reg(this, ir->type);
816
817 for (int i = 0; i < type_size(ir->type); i++) {
818 output_reg[ir->location + i] = *reg;
819 output_reg[ir->location + i].reg_offset = i;
820 output_reg[ir->location + i].type =
821 brw_type_for_base_type(ir->type->get_scalar_type());
822 output_reg_annotation[ir->location + i] = ir->name;
823 }
824 break;
825
826 case ir_var_auto:
827 case ir_var_temporary:
828 reg = new(mem_ctx) dst_reg(this, ir->type);
829 break;
830
831 case ir_var_uniform:
832 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
833
834 /* Thanks to the lower_ubo_reference pass, we will see only
835 * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
836 * variables, so no need for them to be in variable_ht.
837 */
838 if (ir->uniform_block != -1)
839 return;
840
841 /* Track how big the whole uniform variable is, in case we need to put a
842 * copy of its data into pull constants for array access.
843 */
844 this->uniform_size[this->uniforms] = type_size(ir->type);
845
846 if (!strncmp(ir->name, "gl_", 3)) {
847 setup_builtin_uniform_values(ir);
848 } else {
849 setup_uniform_values(ir->location, ir->type);
850 }
851 break;
852
853 case ir_var_system_value:
854 /* VertexID is stored by the VF as the last vertex element, but
855 * we don't represent it with a flag in inputs_read, so we call
856 * it VERT_ATTRIB_MAX, which setup_attributes() picks up on.
857 */
858 reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX);
859 prog_data->uses_vertexid = true;
860
861 switch (ir->location) {
862 case SYSTEM_VALUE_VERTEX_ID:
863 reg->writemask = WRITEMASK_X;
864 break;
865 case SYSTEM_VALUE_INSTANCE_ID:
866 reg->writemask = WRITEMASK_Y;
867 break;
868 default:
869 assert(!"not reached");
870 break;
871 }
872 break;
873
874 default:
875 assert(!"not reached");
876 }
877
878 reg->type = brw_type_for_base_type(ir->type);
879 hash_table_insert(this->variable_ht, reg, ir);
880 }
881
882 void
883 vec4_visitor::visit(ir_loop *ir)
884 {
885 dst_reg counter;
886
887 /* We don't want debugging output to print the whole body of the
888 * loop as the annotation.
889 */
890 this->base_ir = NULL;
891
892 if (ir->counter != NULL) {
893 this->base_ir = ir->counter;
894 ir->counter->accept(this);
895 counter = *(variable_storage(ir->counter));
896
897 if (ir->from != NULL) {
898 this->base_ir = ir->from;
899 ir->from->accept(this);
900
901 emit(MOV(counter, this->result));
902 }
903 }
904
905 emit(BRW_OPCODE_DO);
906
907 if (ir->to) {
908 this->base_ir = ir->to;
909 ir->to->accept(this);
910
911 emit(CMP(dst_null_d(), src_reg(counter), this->result,
912 brw_conditional_for_comparison(ir->cmp)));
913
914 vec4_instruction *inst = emit(BRW_OPCODE_BREAK);
915 inst->predicate = BRW_PREDICATE_NORMAL;
916 }
917
918 visit_instructions(&ir->body_instructions);
919
920
921 if (ir->increment) {
922 this->base_ir = ir->increment;
923 ir->increment->accept(this);
924 emit(ADD(counter, src_reg(counter), this->result));
925 }
926
927 emit(BRW_OPCODE_WHILE);
928 }
929
930 void
931 vec4_visitor::visit(ir_loop_jump *ir)
932 {
933 switch (ir->mode) {
934 case ir_loop_jump::jump_break:
935 emit(BRW_OPCODE_BREAK);
936 break;
937 case ir_loop_jump::jump_continue:
938 emit(BRW_OPCODE_CONTINUE);
939 break;
940 }
941 }
942
943
944 void
945 vec4_visitor::visit(ir_function_signature *ir)
946 {
947 assert(0);
948 (void)ir;
949 }
950
951 void
952 vec4_visitor::visit(ir_function *ir)
953 {
954 /* Ignore function bodies other than main() -- we shouldn't see calls to
955 * them since they should all be inlined.
956 */
957 if (strcmp(ir->name, "main") == 0) {
958 const ir_function_signature *sig;
959 exec_list empty;
960
961 sig = ir->matching_signature(&empty);
962
963 assert(sig);
964
965 visit_instructions(&sig->body);
966 }
967 }
968
969 bool
970 vec4_visitor::try_emit_sat(ir_expression *ir)
971 {
972 ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
973 if (!sat_src)
974 return false;
975
976 sat_src->accept(this);
977 src_reg src = this->result;
978
979 this->result = src_reg(this, ir->type);
980 vec4_instruction *inst;
981 inst = emit(MOV(dst_reg(this->result), src));
982 inst->saturate = true;
983
984 return true;
985 }
986
987 void
988 vec4_visitor::emit_bool_comparison(unsigned int op,
989 dst_reg dst, src_reg src0, src_reg src1)
990 {
991 /* original gen4 does destination conversion before comparison. */
992 if (intel->gen < 5)
993 dst.type = src0.type;
994
995 emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op)));
996
997 dst.type = BRW_REGISTER_TYPE_D;
998 emit(AND(dst, src_reg(dst), src_reg(0x1)));
999 }
1000
1001 void
1002 vec4_visitor::emit_minmax(uint32_t conditionalmod, dst_reg dst,
1003 src_reg src0, src_reg src1)
1004 {
1005 vec4_instruction *inst;
1006
1007 if (intel->gen >= 6) {
1008 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
1009 inst->conditional_mod = conditionalmod;
1010 } else {
1011 emit(CMP(dst, src0, src1, conditionalmod));
1012
1013 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
1014 inst->predicate = BRW_PREDICATE_NORMAL;
1015 }
1016 }
1017
1018 void
1019 vec4_visitor::visit(ir_expression *ir)
1020 {
1021 unsigned int operand;
1022 src_reg op[Elements(ir->operands)];
1023 src_reg result_src;
1024 dst_reg result_dst;
1025 vec4_instruction *inst;
1026
1027 if (try_emit_sat(ir))
1028 return;
1029
1030 for (operand = 0; operand < ir->get_num_operands(); operand++) {
1031 this->result.file = BAD_FILE;
1032 ir->operands[operand]->accept(this);
1033 if (this->result.file == BAD_FILE) {
1034 printf("Failed to get tree for expression operand:\n");
1035 ir->operands[operand]->print();
1036 exit(1);
1037 }
1038 op[operand] = this->result;
1039
1040 /* Matrix expression operands should have been broken down to vector
1041 * operations already.
1042 */
1043 assert(!ir->operands[operand]->type->is_matrix());
1044 }
1045
1046 int vector_elements = ir->operands[0]->type->vector_elements;
1047 if (ir->operands[1]) {
1048 vector_elements = MAX2(vector_elements,
1049 ir->operands[1]->type->vector_elements);
1050 }
1051
1052 this->result.file = BAD_FILE;
1053
1054 /* Storage for our result. Ideally for an assignment we'd be using
1055 * the actual storage for the result here, instead.
1056 */
1057 result_src = src_reg(this, ir->type);
1058 /* convenience for the emit functions below. */
1059 result_dst = dst_reg(result_src);
1060 /* If nothing special happens, this is the result. */
1061 this->result = result_src;
1062 /* Limit writes to the channels that will be used by result_src later.
1063 * This does limit this temp's use as a temporary for multi-instruction
1064 * sequences.
1065 */
1066 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1067
1068 switch (ir->operation) {
1069 case ir_unop_logic_not:
1070 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
1071 * ones complement of the whole register, not just bit 0.
1072 */
1073 emit(XOR(result_dst, op[0], src_reg(1)));
1074 break;
1075 case ir_unop_neg:
1076 op[0].negate = !op[0].negate;
1077 this->result = op[0];
1078 break;
1079 case ir_unop_abs:
1080 op[0].abs = true;
1081 op[0].negate = false;
1082 this->result = op[0];
1083 break;
1084
1085 case ir_unop_sign:
1086 emit(MOV(result_dst, src_reg(0.0f)));
1087
1088 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G));
1089 inst = emit(MOV(result_dst, src_reg(1.0f)));
1090 inst->predicate = BRW_PREDICATE_NORMAL;
1091
1092 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L));
1093 inst = emit(MOV(result_dst, src_reg(-1.0f)));
1094 inst->predicate = BRW_PREDICATE_NORMAL;
1095
1096 break;
1097
1098 case ir_unop_rcp:
1099 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
1100 break;
1101
1102 case ir_unop_exp2:
1103 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
1104 break;
1105 case ir_unop_log2:
1106 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
1107 break;
1108 case ir_unop_exp:
1109 case ir_unop_log:
1110 assert(!"not reached: should be handled by ir_explog_to_explog2");
1111 break;
1112 case ir_unop_sin:
1113 case ir_unop_sin_reduced:
1114 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
1115 break;
1116 case ir_unop_cos:
1117 case ir_unop_cos_reduced:
1118 emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
1119 break;
1120
1121 case ir_unop_dFdx:
1122 case ir_unop_dFdy:
1123 assert(!"derivatives not valid in vertex shader");
1124 break;
1125
1126 case ir_unop_noise:
1127 assert(!"not reached: should be handled by lower_noise");
1128 break;
1129
1130 case ir_binop_add:
1131 emit(ADD(result_dst, op[0], op[1]));
1132 break;
1133 case ir_binop_sub:
1134 assert(!"not reached: should be handled by ir_sub_to_add_neg");
1135 break;
1136
1137 case ir_binop_mul:
1138 if (ir->type->is_integer()) {
1139 /* For integer multiplication, the MUL uses the low 16 bits
1140 * of one of the operands (src0 on gen6, src1 on gen7). The
1141 * MACH accumulates in the contribution of the upper 16 bits
1142 * of that operand.
1143 *
1144 * FINISHME: Emit just the MUL if we know an operand is small
1145 * enough.
1146 */
1147 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
1148
1149 emit(MUL(acc, op[0], op[1]));
1150 emit(MACH(dst_null_d(), op[0], op[1]));
1151 emit(MOV(result_dst, src_reg(acc)));
1152 } else {
1153 emit(MUL(result_dst, op[0], op[1]));
1154 }
1155 break;
1156 case ir_binop_div:
1157 /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
1158 assert(ir->type->is_integer());
1159 emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
1160 break;
1161 case ir_binop_mod:
1162 /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
1163 assert(ir->type->is_integer());
1164 emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
1165 break;
1166
1167 case ir_binop_less:
1168 case ir_binop_greater:
1169 case ir_binop_lequal:
1170 case ir_binop_gequal:
1171 case ir_binop_equal:
1172 case ir_binop_nequal: {
1173 emit(CMP(result_dst, op[0], op[1],
1174 brw_conditional_for_comparison(ir->operation)));
1175 emit(AND(result_dst, result_src, src_reg(0x1)));
1176 break;
1177 }
1178
1179 case ir_binop_all_equal:
1180 /* "==" operator producing a scalar boolean. */
1181 if (ir->operands[0]->type->is_vector() ||
1182 ir->operands[1]->type->is_vector()) {
1183 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
1184 emit(MOV(result_dst, src_reg(0)));
1185 inst = emit(MOV(result_dst, src_reg(1)));
1186 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
1187 } else {
1188 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
1189 emit(AND(result_dst, result_src, src_reg(0x1)));
1190 }
1191 break;
1192 case ir_binop_any_nequal:
1193 /* "!=" operator producing a scalar boolean. */
1194 if (ir->operands[0]->type->is_vector() ||
1195 ir->operands[1]->type->is_vector()) {
1196 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
1197
1198 emit(MOV(result_dst, src_reg(0)));
1199 inst = emit(MOV(result_dst, src_reg(1)));
1200 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1201 } else {
1202 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
1203 emit(AND(result_dst, result_src, src_reg(0x1)));
1204 }
1205 break;
1206
1207 case ir_unop_any:
1208 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1209 emit(MOV(result_dst, src_reg(0)));
1210
1211 inst = emit(MOV(result_dst, src_reg(1)));
1212 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1213 break;
1214
1215 case ir_binop_logic_xor:
1216 emit(XOR(result_dst, op[0], op[1]));
1217 break;
1218
1219 case ir_binop_logic_or:
1220 emit(OR(result_dst, op[0], op[1]));
1221 break;
1222
1223 case ir_binop_logic_and:
1224 emit(AND(result_dst, op[0], op[1]));
1225 break;
1226
1227 case ir_binop_dot:
1228 assert(ir->operands[0]->type->is_vector());
1229 assert(ir->operands[0]->type == ir->operands[1]->type);
1230 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1231 break;
1232
1233 case ir_unop_sqrt:
1234 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1235 break;
1236 case ir_unop_rsq:
1237 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1238 break;
1239
1240 case ir_unop_bitcast_i2f:
1241 case ir_unop_bitcast_u2f:
1242 this->result = op[0];
1243 this->result.type = BRW_REGISTER_TYPE_F;
1244 break;
1245
1246 case ir_unop_bitcast_f2i:
1247 this->result = op[0];
1248 this->result.type = BRW_REGISTER_TYPE_D;
1249 break;
1250
1251 case ir_unop_bitcast_f2u:
1252 this->result = op[0];
1253 this->result.type = BRW_REGISTER_TYPE_UD;
1254 break;
1255
1256 case ir_unop_i2f:
1257 case ir_unop_i2u:
1258 case ir_unop_u2i:
1259 case ir_unop_u2f:
1260 case ir_unop_b2f:
1261 case ir_unop_b2i:
1262 case ir_unop_f2i:
1263 case ir_unop_f2u:
1264 emit(MOV(result_dst, op[0]));
1265 break;
1266 case ir_unop_f2b:
1267 case ir_unop_i2b: {
1268 emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1269 emit(AND(result_dst, result_src, src_reg(1)));
1270 break;
1271 }
1272
1273 case ir_unop_trunc:
1274 emit(RNDZ(result_dst, op[0]));
1275 break;
1276 case ir_unop_ceil:
1277 op[0].negate = !op[0].negate;
1278 inst = emit(RNDD(result_dst, op[0]));
1279 this->result.negate = true;
1280 break;
1281 case ir_unop_floor:
1282 inst = emit(RNDD(result_dst, op[0]));
1283 break;
1284 case ir_unop_fract:
1285 inst = emit(FRC(result_dst, op[0]));
1286 break;
1287 case ir_unop_round_even:
1288 emit(RNDE(result_dst, op[0]));
1289 break;
1290
1291 case ir_binop_min:
1292 emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]);
1293 break;
1294 case ir_binop_max:
1295 emit_minmax(BRW_CONDITIONAL_G, result_dst, op[0], op[1]);
1296 break;
1297
1298 case ir_binop_pow:
1299 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1300 break;
1301
1302 case ir_unop_bit_not:
1303 inst = emit(NOT(result_dst, op[0]));
1304 break;
1305 case ir_binop_bit_and:
1306 inst = emit(AND(result_dst, op[0], op[1]));
1307 break;
1308 case ir_binop_bit_xor:
1309 inst = emit(XOR(result_dst, op[0], op[1]));
1310 break;
1311 case ir_binop_bit_or:
1312 inst = emit(OR(result_dst, op[0], op[1]));
1313 break;
1314
1315 case ir_binop_lshift:
1316 inst = emit(BRW_OPCODE_SHL, result_dst, op[0], op[1]);
1317 break;
1318
1319 case ir_binop_rshift:
1320 if (ir->type->base_type == GLSL_TYPE_INT)
1321 inst = emit(BRW_OPCODE_ASR, result_dst, op[0], op[1]);
1322 else
1323 inst = emit(BRW_OPCODE_SHR, result_dst, op[0], op[1]);
1324 break;
1325
1326 case ir_binop_ubo_load: {
1327 ir_constant *uniform_block = ir->operands[0]->as_constant();
1328 ir_constant *const_offset_ir = ir->operands[1]->as_constant();
1329 unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
1330 src_reg offset = op[1];
1331
1332 /* Now, load the vector from that offset. */
1333 assert(ir->type->is_vector() || ir->type->is_scalar());
1334
1335 src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
1336 packed_consts.type = result.type;
1337 src_reg surf_index =
1338 src_reg(SURF_INDEX_VS_UBO(uniform_block->value.u[0]));
1339 if (const_offset_ir) {
1340 offset = src_reg(const_offset / 16);
1341 } else {
1342 emit(BRW_OPCODE_SHR, dst_reg(offset), offset, src_reg(4));
1343 }
1344
1345 vec4_instruction *pull =
1346 emit(new(mem_ctx) vec4_instruction(this,
1347 VS_OPCODE_PULL_CONSTANT_LOAD,
1348 dst_reg(packed_consts),
1349 surf_index,
1350 offset));
1351 pull->base_mrf = 14;
1352 pull->mlen = 1;
1353
1354 packed_consts.swizzle = swizzle_for_size(ir->type->vector_elements);
1355 packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4,
1356 const_offset % 16 / 4,
1357 const_offset % 16 / 4,
1358 const_offset % 16 / 4);
1359
1360 /* UBO bools are any nonzero int. We store bools as either 0 or 1. */
1361 if (ir->type->base_type == GLSL_TYPE_BOOL) {
1362 emit(CMP(result_dst, packed_consts, src_reg(0u),
1363 BRW_CONDITIONAL_NZ));
1364 emit(AND(result_dst, result, src_reg(0x1)));
1365 } else {
1366 emit(MOV(result_dst, packed_consts));
1367 }
1368 break;
1369 }
1370
1371 case ir_quadop_vector:
1372 assert(!"not reached: should be handled by lower_quadop_vector");
1373 break;
1374 }
1375 }
1376
1377
1378 void
1379 vec4_visitor::visit(ir_swizzle *ir)
1380 {
1381 src_reg src;
1382 int i = 0;
1383 int swizzle[4];
1384
1385 /* Note that this is only swizzles in expressions, not those on the left
1386 * hand side of an assignment, which do write masking. See ir_assignment
1387 * for that.
1388 */
1389
1390 ir->val->accept(this);
1391 src = this->result;
1392 assert(src.file != BAD_FILE);
1393
1394 for (i = 0; i < ir->type->vector_elements; i++) {
1395 switch (i) {
1396 case 0:
1397 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1398 break;
1399 case 1:
1400 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1401 break;
1402 case 2:
1403 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1404 break;
1405 case 3:
1406 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1407 break;
1408 }
1409 }
1410 for (; i < 4; i++) {
1411 /* Replicate the last channel out. */
1412 swizzle[i] = swizzle[ir->type->vector_elements - 1];
1413 }
1414
1415 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1416
1417 this->result = src;
1418 }
1419
1420 void
1421 vec4_visitor::visit(ir_dereference_variable *ir)
1422 {
1423 const struct glsl_type *type = ir->type;
1424 dst_reg *reg = variable_storage(ir->var);
1425
1426 if (!reg) {
1427 fail("Failed to find variable storage for %s\n", ir->var->name);
1428 this->result = src_reg(brw_null_reg());
1429 return;
1430 }
1431
1432 this->result = src_reg(*reg);
1433
1434 /* System values get their swizzle from the dst_reg writemask */
1435 if (ir->var->mode == ir_var_system_value)
1436 return;
1437
1438 if (type->is_scalar() || type->is_vector() || type->is_matrix())
1439 this->result.swizzle = swizzle_for_size(type->vector_elements);
1440 }
1441
1442 void
1443 vec4_visitor::visit(ir_dereference_array *ir)
1444 {
1445 ir_constant *constant_index;
1446 src_reg src;
1447 int element_size = type_size(ir->type);
1448
1449 constant_index = ir->array_index->constant_expression_value();
1450
1451 ir->array->accept(this);
1452 src = this->result;
1453
1454 if (constant_index) {
1455 src.reg_offset += constant_index->value.i[0] * element_size;
1456 } else {
1457 /* Variable index array dereference. It eats the "vec4" of the
1458 * base of the array and an index that offsets the Mesa register
1459 * index.
1460 */
1461 ir->array_index->accept(this);
1462
1463 src_reg index_reg;
1464
1465 if (element_size == 1) {
1466 index_reg = this->result;
1467 } else {
1468 index_reg = src_reg(this, glsl_type::int_type);
1469
1470 emit(MUL(dst_reg(index_reg), this->result, src_reg(element_size)));
1471 }
1472
1473 if (src.reladdr) {
1474 src_reg temp = src_reg(this, glsl_type::int_type);
1475
1476 emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
1477
1478 index_reg = temp;
1479 }
1480
1481 src.reladdr = ralloc(mem_ctx, src_reg);
1482 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1483 }
1484
1485 /* If the type is smaller than a vec4, replicate the last channel out. */
1486 if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1487 src.swizzle = swizzle_for_size(ir->type->vector_elements);
1488 else
1489 src.swizzle = BRW_SWIZZLE_NOOP;
1490 src.type = brw_type_for_base_type(ir->type);
1491
1492 this->result = src;
1493 }
1494
1495 void
1496 vec4_visitor::visit(ir_dereference_record *ir)
1497 {
1498 unsigned int i;
1499 const glsl_type *struct_type = ir->record->type;
1500 int offset = 0;
1501
1502 ir->record->accept(this);
1503
1504 for (i = 0; i < struct_type->length; i++) {
1505 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1506 break;
1507 offset += type_size(struct_type->fields.structure[i].type);
1508 }
1509
1510 /* If the type is smaller than a vec4, replicate the last channel out. */
1511 if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1512 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1513 else
1514 this->result.swizzle = BRW_SWIZZLE_NOOP;
1515 this->result.type = brw_type_for_base_type(ir->type);
1516
1517 this->result.reg_offset += offset;
1518 }
1519
1520 /**
1521 * We want to be careful in assignment setup to hit the actual storage
1522 * instead of potentially using a temporary like we might with the
1523 * ir_dereference handler.
1524 */
1525 static dst_reg
1526 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1527 {
1528 /* The LHS must be a dereference. If the LHS is a variable indexed array
1529 * access of a vector, it must be separated into a series conditional moves
1530 * before reaching this point (see ir_vec_index_to_cond_assign).
1531 */
1532 assert(ir->as_dereference());
1533 ir_dereference_array *deref_array = ir->as_dereference_array();
1534 if (deref_array) {
1535 assert(!deref_array->array->type->is_vector());
1536 }
1537
1538 /* Use the rvalue deref handler for the most part. We'll ignore
1539 * swizzles in it and write swizzles using writemask, though.
1540 */
1541 ir->accept(v);
1542 return dst_reg(v->result);
1543 }
1544
1545 void
1546 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1547 const struct glsl_type *type, uint32_t predicate)
1548 {
1549 if (type->base_type == GLSL_TYPE_STRUCT) {
1550 for (unsigned int i = 0; i < type->length; i++) {
1551 emit_block_move(dst, src, type->fields.structure[i].type, predicate);
1552 }
1553 return;
1554 }
1555
1556 if (type->is_array()) {
1557 for (unsigned int i = 0; i < type->length; i++) {
1558 emit_block_move(dst, src, type->fields.array, predicate);
1559 }
1560 return;
1561 }
1562
1563 if (type->is_matrix()) {
1564 const struct glsl_type *vec_type;
1565
1566 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1567 type->vector_elements, 1);
1568
1569 for (int i = 0; i < type->matrix_columns; i++) {
1570 emit_block_move(dst, src, vec_type, predicate);
1571 }
1572 return;
1573 }
1574
1575 assert(type->is_scalar() || type->is_vector());
1576
1577 dst->type = brw_type_for_base_type(type);
1578 src->type = dst->type;
1579
1580 dst->writemask = (1 << type->vector_elements) - 1;
1581
1582 src->swizzle = swizzle_for_size(type->vector_elements);
1583
1584 vec4_instruction *inst = emit(MOV(*dst, *src));
1585 inst->predicate = predicate;
1586
1587 dst->reg_offset++;
1588 src->reg_offset++;
1589 }
1590
1591
1592 /* If the RHS processing resulted in an instruction generating a
1593 * temporary value, and it would be easy to rewrite the instruction to
1594 * generate its result right into the LHS instead, do so. This ends
1595 * up reliably removing instructions where it can be tricky to do so
1596 * later without real UD chain information.
1597 */
1598 bool
1599 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
1600 dst_reg dst,
1601 src_reg src,
1602 vec4_instruction *pre_rhs_inst,
1603 vec4_instruction *last_rhs_inst)
1604 {
1605 /* This could be supported, but it would take more smarts. */
1606 if (ir->condition)
1607 return false;
1608
1609 if (pre_rhs_inst == last_rhs_inst)
1610 return false; /* No instructions generated to work with. */
1611
1612 /* Make sure the last instruction generated our source reg. */
1613 if (src.file != GRF ||
1614 src.file != last_rhs_inst->dst.file ||
1615 src.reg != last_rhs_inst->dst.reg ||
1616 src.reg_offset != last_rhs_inst->dst.reg_offset ||
1617 src.reladdr ||
1618 src.abs ||
1619 src.negate ||
1620 last_rhs_inst->predicate != BRW_PREDICATE_NONE)
1621 return false;
1622
1623 /* Check that that last instruction fully initialized the channels
1624 * we want to use, in the order we want to use them. We could
1625 * potentially reswizzle the operands of many instructions so that
1626 * we could handle out of order channels, but don't yet.
1627 */
1628
1629 for (unsigned i = 0; i < 4; i++) {
1630 if (dst.writemask & (1 << i)) {
1631 if (!(last_rhs_inst->dst.writemask & (1 << i)))
1632 return false;
1633
1634 if (BRW_GET_SWZ(src.swizzle, i) != i)
1635 return false;
1636 }
1637 }
1638
1639 /* Success! Rewrite the instruction. */
1640 last_rhs_inst->dst.file = dst.file;
1641 last_rhs_inst->dst.reg = dst.reg;
1642 last_rhs_inst->dst.reg_offset = dst.reg_offset;
1643 last_rhs_inst->dst.reladdr = dst.reladdr;
1644 last_rhs_inst->dst.writemask &= dst.writemask;
1645
1646 return true;
1647 }
1648
1649 void
1650 vec4_visitor::visit(ir_assignment *ir)
1651 {
1652 dst_reg dst = get_assignment_lhs(ir->lhs, this);
1653 uint32_t predicate = BRW_PREDICATE_NONE;
1654
1655 if (!ir->lhs->type->is_scalar() &&
1656 !ir->lhs->type->is_vector()) {
1657 ir->rhs->accept(this);
1658 src_reg src = this->result;
1659
1660 if (ir->condition) {
1661 emit_bool_to_cond_code(ir->condition, &predicate);
1662 }
1663
1664 /* emit_block_move doesn't account for swizzles in the source register.
1665 * This should be ok, since the source register is a structure or an
1666 * array, and those can't be swizzled. But double-check to be sure.
1667 */
1668 assert(src.swizzle ==
1669 (ir->rhs->type->is_matrix()
1670 ? swizzle_for_size(ir->rhs->type->vector_elements)
1671 : BRW_SWIZZLE_NOOP));
1672
1673 emit_block_move(&dst, &src, ir->rhs->type, predicate);
1674 return;
1675 }
1676
1677 /* Now we're down to just a scalar/vector with writemasks. */
1678 int i;
1679
1680 vec4_instruction *pre_rhs_inst, *last_rhs_inst;
1681 pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1682
1683 ir->rhs->accept(this);
1684
1685 last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1686
1687 src_reg src = this->result;
1688
1689 int swizzles[4];
1690 int first_enabled_chan = 0;
1691 int src_chan = 0;
1692
1693 assert(ir->lhs->type->is_vector() ||
1694 ir->lhs->type->is_scalar());
1695 dst.writemask = ir->write_mask;
1696
1697 for (int i = 0; i < 4; i++) {
1698 if (dst.writemask & (1 << i)) {
1699 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1700 break;
1701 }
1702 }
1703
1704 /* Swizzle a small RHS vector into the channels being written.
1705 *
1706 * glsl ir treats write_mask as dictating how many channels are
1707 * present on the RHS while in our instructions we need to make
1708 * those channels appear in the slots of the vec4 they're written to.
1709 */
1710 for (int i = 0; i < 4; i++) {
1711 if (dst.writemask & (1 << i))
1712 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1713 else
1714 swizzles[i] = first_enabled_chan;
1715 }
1716 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1717 swizzles[2], swizzles[3]);
1718
1719 if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
1720 return;
1721 }
1722
1723 if (ir->condition) {
1724 emit_bool_to_cond_code(ir->condition, &predicate);
1725 }
1726
1727 for (i = 0; i < type_size(ir->lhs->type); i++) {
1728 vec4_instruction *inst = emit(MOV(dst, src));
1729 inst->predicate = predicate;
1730
1731 dst.reg_offset++;
1732 src.reg_offset++;
1733 }
1734 }
1735
1736 void
1737 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
1738 {
1739 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1740 foreach_list(node, &ir->components) {
1741 ir_constant *field_value = (ir_constant *)node;
1742
1743 emit_constant_values(dst, field_value);
1744 }
1745 return;
1746 }
1747
1748 if (ir->type->is_array()) {
1749 for (unsigned int i = 0; i < ir->type->length; i++) {
1750 emit_constant_values(dst, ir->array_elements[i]);
1751 }
1752 return;
1753 }
1754
1755 if (ir->type->is_matrix()) {
1756 for (int i = 0; i < ir->type->matrix_columns; i++) {
1757 float *vec = &ir->value.f[i * ir->type->vector_elements];
1758
1759 for (int j = 0; j < ir->type->vector_elements; j++) {
1760 dst->writemask = 1 << j;
1761 dst->type = BRW_REGISTER_TYPE_F;
1762
1763 emit(MOV(*dst, src_reg(vec[j])));
1764 }
1765 dst->reg_offset++;
1766 }
1767 return;
1768 }
1769
1770 int remaining_writemask = (1 << ir->type->vector_elements) - 1;
1771
1772 for (int i = 0; i < ir->type->vector_elements; i++) {
1773 if (!(remaining_writemask & (1 << i)))
1774 continue;
1775
1776 dst->writemask = 1 << i;
1777 dst->type = brw_type_for_base_type(ir->type);
1778
1779 /* Find other components that match the one we're about to
1780 * write. Emits fewer instructions for things like vec4(0.5,
1781 * 1.5, 1.5, 1.5).
1782 */
1783 for (int j = i + 1; j < ir->type->vector_elements; j++) {
1784 if (ir->type->base_type == GLSL_TYPE_BOOL) {
1785 if (ir->value.b[i] == ir->value.b[j])
1786 dst->writemask |= (1 << j);
1787 } else {
1788 /* u, i, and f storage all line up, so no need for a
1789 * switch case for comparing each type.
1790 */
1791 if (ir->value.u[i] == ir->value.u[j])
1792 dst->writemask |= (1 << j);
1793 }
1794 }
1795
1796 switch (ir->type->base_type) {
1797 case GLSL_TYPE_FLOAT:
1798 emit(MOV(*dst, src_reg(ir->value.f[i])));
1799 break;
1800 case GLSL_TYPE_INT:
1801 emit(MOV(*dst, src_reg(ir->value.i[i])));
1802 break;
1803 case GLSL_TYPE_UINT:
1804 emit(MOV(*dst, src_reg(ir->value.u[i])));
1805 break;
1806 case GLSL_TYPE_BOOL:
1807 emit(MOV(*dst, src_reg(ir->value.b[i])));
1808 break;
1809 default:
1810 assert(!"Non-float/uint/int/bool constant");
1811 break;
1812 }
1813
1814 remaining_writemask &= ~dst->writemask;
1815 }
1816 dst->reg_offset++;
1817 }
1818
1819 void
1820 vec4_visitor::visit(ir_constant *ir)
1821 {
1822 dst_reg dst = dst_reg(this, ir->type);
1823 this->result = src_reg(dst);
1824
1825 emit_constant_values(&dst, ir);
1826 }
1827
1828 void
1829 vec4_visitor::visit(ir_call *ir)
1830 {
1831 assert(!"not reached");
1832 }
1833
1834 void
1835 vec4_visitor::visit(ir_texture *ir)
1836 {
1837 int sampler = _mesa_get_sampler_uniform_value(ir->sampler, prog, &vp->Base);
1838
1839 /* Should be lowered by do_lower_texture_projection */
1840 assert(!ir->projector);
1841
1842 /* Generate code to compute all the subexpression trees. This has to be
1843 * done before loading any values into MRFs for the sampler message since
1844 * generating these values may involve SEND messages that need the MRFs.
1845 */
1846 src_reg coordinate;
1847 if (ir->coordinate) {
1848 ir->coordinate->accept(this);
1849 coordinate = this->result;
1850 }
1851
1852 src_reg shadow_comparitor;
1853 if (ir->shadow_comparitor) {
1854 ir->shadow_comparitor->accept(this);
1855 shadow_comparitor = this->result;
1856 }
1857
1858 src_reg lod, dPdx, dPdy;
1859 switch (ir->op) {
1860 case ir_txf:
1861 case ir_txl:
1862 case ir_txs:
1863 ir->lod_info.lod->accept(this);
1864 lod = this->result;
1865 break;
1866 case ir_txd:
1867 ir->lod_info.grad.dPdx->accept(this);
1868 dPdx = this->result;
1869
1870 ir->lod_info.grad.dPdy->accept(this);
1871 dPdy = this->result;
1872 break;
1873 case ir_tex:
1874 case ir_txb:
1875 break;
1876 }
1877
1878 vec4_instruction *inst = NULL;
1879 switch (ir->op) {
1880 case ir_tex:
1881 case ir_txl:
1882 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXL);
1883 break;
1884 case ir_txd:
1885 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXD);
1886 break;
1887 case ir_txf:
1888 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF);
1889 break;
1890 case ir_txs:
1891 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXS);
1892 break;
1893 case ir_txb:
1894 assert(!"TXB is not valid for vertex shaders.");
1895 }
1896
1897 /* Texel offsets go in the message header; Gen4 also requires headers. */
1898 inst->header_present = ir->offset || intel->gen < 5;
1899 inst->base_mrf = 2;
1900 inst->mlen = inst->header_present + 1; /* always at least one */
1901 inst->sampler = sampler;
1902 inst->dst = dst_reg(this, ir->type);
1903 inst->shadow_compare = ir->shadow_comparitor != NULL;
1904
1905 if (ir->offset != NULL && ir->op != ir_txf)
1906 inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
1907
1908 /* MRF for the first parameter */
1909 int param_base = inst->base_mrf + inst->header_present;
1910
1911 if (ir->op == ir_txs) {
1912 int writemask = intel->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
1913 emit(MOV(dst_reg(MRF, param_base, ir->lod_info.lod->type, writemask),
1914 lod));
1915 } else {
1916 int i, coord_mask = 0, zero_mask = 0;
1917 /* Load the coordinate */
1918 /* FINISHME: gl_clamp_mask and saturate */
1919 for (i = 0; i < ir->coordinate->type->vector_elements; i++)
1920 coord_mask |= (1 << i);
1921 for (; i < 4; i++)
1922 zero_mask |= (1 << i);
1923
1924 if (ir->offset && ir->op == ir_txf) {
1925 /* It appears that the ld instruction used for txf does its
1926 * address bounds check before adding in the offset. To work
1927 * around this, just add the integer offset to the integer
1928 * texel coordinate, and don't put the offset in the header.
1929 */
1930 ir_constant *offset = ir->offset->as_constant();
1931 assert(offset);
1932
1933 for (int j = 0; j < ir->coordinate->type->vector_elements; j++) {
1934 src_reg src = coordinate;
1935 src.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(src.swizzle, j),
1936 BRW_GET_SWZ(src.swizzle, j),
1937 BRW_GET_SWZ(src.swizzle, j),
1938 BRW_GET_SWZ(src.swizzle, j));
1939 emit(ADD(dst_reg(MRF, param_base, ir->coordinate->type, 1 << j),
1940 src, offset->value.i[j]));
1941 }
1942 } else {
1943 emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
1944 coordinate));
1945 }
1946 emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
1947 src_reg(0)));
1948 /* Load the shadow comparitor */
1949 if (ir->shadow_comparitor) {
1950 emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
1951 WRITEMASK_X),
1952 shadow_comparitor));
1953 inst->mlen++;
1954 }
1955
1956 /* Load the LOD info */
1957 if (ir->op == ir_txl) {
1958 int mrf, writemask;
1959 if (intel->gen >= 5) {
1960 mrf = param_base + 1;
1961 if (ir->shadow_comparitor) {
1962 writemask = WRITEMASK_Y;
1963 /* mlen already incremented */
1964 } else {
1965 writemask = WRITEMASK_X;
1966 inst->mlen++;
1967 }
1968 } else /* intel->gen == 4 */ {
1969 mrf = param_base;
1970 writemask = WRITEMASK_Z;
1971 }
1972 emit(MOV(dst_reg(MRF, mrf, ir->lod_info.lod->type, writemask), lod));
1973 } else if (ir->op == ir_txf) {
1974 emit(MOV(dst_reg(MRF, param_base, ir->lod_info.lod->type, WRITEMASK_W),
1975 lod));
1976 } else if (ir->op == ir_txd) {
1977 const glsl_type *type = ir->lod_info.grad.dPdx->type;
1978
1979 if (intel->gen >= 5) {
1980 dPdx.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
1981 dPdy.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
1982 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), dPdx));
1983 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), dPdy));
1984 inst->mlen++;
1985
1986 if (ir->type->vector_elements == 3) {
1987 dPdx.swizzle = BRW_SWIZZLE_ZZZZ;
1988 dPdy.swizzle = BRW_SWIZZLE_ZZZZ;
1989 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), dPdx));
1990 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), dPdy));
1991 inst->mlen++;
1992 }
1993 } else /* intel->gen == 4 */ {
1994 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), dPdx));
1995 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), dPdy));
1996 inst->mlen += 2;
1997 }
1998 }
1999 }
2000
2001 emit(inst);
2002
2003 swizzle_result(ir, src_reg(inst->dst), sampler);
2004 }
2005
2006 void
2007 vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, int sampler)
2008 {
2009 this->result = orig_val;
2010
2011 int s = c->key.tex.swizzles[sampler];
2012
2013 if (ir->op == ir_txs || ir->type == glsl_type::float_type
2014 || s == SWIZZLE_NOOP)
2015 return;
2016
2017 int zero_mask = 0, one_mask = 0, copy_mask = 0;
2018 int swizzle[4];
2019
2020 for (int i = 0; i < 4; i++) {
2021 switch (GET_SWZ(s, i)) {
2022 case SWIZZLE_ZERO:
2023 zero_mask |= (1 << i);
2024 break;
2025 case SWIZZLE_ONE:
2026 one_mask |= (1 << i);
2027 break;
2028 default:
2029 copy_mask |= (1 << i);
2030 swizzle[i] = GET_SWZ(s, i);
2031 break;
2032 }
2033 }
2034
2035 this->result = src_reg(this, ir->type);
2036 dst_reg swizzled_result(this->result);
2037
2038 if (copy_mask) {
2039 orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
2040 swizzled_result.writemask = copy_mask;
2041 emit(MOV(swizzled_result, orig_val));
2042 }
2043
2044 if (zero_mask) {
2045 swizzled_result.writemask = zero_mask;
2046 emit(MOV(swizzled_result, src_reg(0.0f)));
2047 }
2048
2049 if (one_mask) {
2050 swizzled_result.writemask = one_mask;
2051 emit(MOV(swizzled_result, src_reg(1.0f)));
2052 }
2053 }
2054
2055 void
2056 vec4_visitor::visit(ir_return *ir)
2057 {
2058 assert(!"not reached");
2059 }
2060
2061 void
2062 vec4_visitor::visit(ir_discard *ir)
2063 {
2064 assert(!"not reached");
2065 }
2066
2067 void
2068 vec4_visitor::visit(ir_if *ir)
2069 {
2070 /* Don't point the annotation at the if statement, because then it plus
2071 * the then and else blocks get printed.
2072 */
2073 this->base_ir = ir->condition;
2074
2075 if (intel->gen == 6) {
2076 emit_if_gen6(ir);
2077 } else {
2078 uint32_t predicate;
2079 emit_bool_to_cond_code(ir->condition, &predicate);
2080 emit(IF(predicate));
2081 }
2082
2083 visit_instructions(&ir->then_instructions);
2084
2085 if (!ir->else_instructions.is_empty()) {
2086 this->base_ir = ir->condition;
2087 emit(BRW_OPCODE_ELSE);
2088
2089 visit_instructions(&ir->else_instructions);
2090 }
2091
2092 this->base_ir = ir->condition;
2093 emit(BRW_OPCODE_ENDIF);
2094 }
2095
2096 void
2097 vec4_visitor::emit_ndc_computation()
2098 {
2099 /* Get the position */
2100 src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
2101
2102 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
2103 dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
2104 output_reg[BRW_VERT_RESULT_NDC] = ndc;
2105
2106 current_annotation = "NDC";
2107 dst_reg ndc_w = ndc;
2108 ndc_w.writemask = WRITEMASK_W;
2109 src_reg pos_w = pos;
2110 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
2111 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
2112
2113 dst_reg ndc_xyz = ndc;
2114 ndc_xyz.writemask = WRITEMASK_XYZ;
2115
2116 emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
2117 }
2118
2119 void
2120 vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
2121 {
2122 if (intel->gen < 6 &&
2123 ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
2124 c->key.userclip_active || brw->has_negative_rhw_bug)) {
2125 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
2126 dst_reg header1_w = header1;
2127 header1_w.writemask = WRITEMASK_W;
2128 GLuint i;
2129
2130 emit(MOV(header1, 0u));
2131
2132 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
2133 src_reg psiz = src_reg(output_reg[VERT_RESULT_PSIZ]);
2134
2135 current_annotation = "Point size";
2136 emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
2137 emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
2138 }
2139
2140 current_annotation = "Clipping flags";
2141 for (i = 0; i < c->key.nr_userclip_plane_consts; i++) {
2142 vec4_instruction *inst;
2143
2144 inst = emit(DP4(dst_null_f(), src_reg(output_reg[VERT_RESULT_HPOS]),
2145 src_reg(this->userplane[i])));
2146 inst->conditional_mod = BRW_CONDITIONAL_L;
2147
2148 inst = emit(OR(header1_w, src_reg(header1_w), 1u << i));
2149 inst->predicate = BRW_PREDICATE_NORMAL;
2150 }
2151
2152 /* i965 clipping workaround:
2153 * 1) Test for -ve rhw
2154 * 2) If set,
2155 * set ndc = (0,0,0,0)
2156 * set ucp[6] = 1
2157 *
2158 * Later, clipping will detect ucp[6] and ensure the primitive is
2159 * clipped against all fixed planes.
2160 */
2161 if (brw->has_negative_rhw_bug) {
2162 #if 0
2163 /* FINISHME */
2164 brw_CMP(p,
2165 vec8(brw_null_reg()),
2166 BRW_CONDITIONAL_L,
2167 brw_swizzle1(output_reg[BRW_VERT_RESULT_NDC], 3),
2168 brw_imm_f(0));
2169
2170 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
2171 brw_MOV(p, output_reg[BRW_VERT_RESULT_NDC], brw_imm_f(0));
2172 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2173 #endif
2174 }
2175
2176 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
2177 } else if (intel->gen < 6) {
2178 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
2179 } else {
2180 emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
2181 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
2182 emit(MOV(brw_writemask(reg, WRITEMASK_W),
2183 src_reg(output_reg[VERT_RESULT_PSIZ])));
2184 }
2185 }
2186 }
2187
2188 void
2189 vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset)
2190 {
2191 if (intel->gen < 6) {
2192 /* Clip distance slots are set aside in gen5, but they are not used. It
2193 * is not clear whether we actually need to set aside space for them,
2194 * but the performance cost is negligible.
2195 */
2196 return;
2197 }
2198
2199 /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
2200 *
2201 * "If a linked set of shaders forming the vertex stage contains no
2202 * static write to gl_ClipVertex or gl_ClipDistance, but the
2203 * application has requested clipping against user clip planes through
2204 * the API, then the coordinate written to gl_Position is used for
2205 * comparison against the user clip planes."
2206 *
2207 * This function is only called if the shader didn't write to
2208 * gl_ClipDistance. Accordingly, we use gl_ClipVertex to perform clipping
2209 * if the user wrote to it; otherwise we use gl_Position.
2210 */
2211 gl_vert_result clip_vertex = VERT_RESULT_CLIP_VERTEX;
2212 if (!(c->prog_data.outputs_written
2213 & BITFIELD64_BIT(VERT_RESULT_CLIP_VERTEX))) {
2214 clip_vertex = VERT_RESULT_HPOS;
2215 }
2216
2217 for (int i = 0; i + offset < c->key.nr_userclip_plane_consts && i < 4;
2218 ++i) {
2219 emit(DP4(dst_reg(brw_writemask(reg, 1 << i)),
2220 src_reg(output_reg[clip_vertex]),
2221 src_reg(this->userplane[i + offset])));
2222 }
2223 }
2224
2225 void
2226 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int vert_result)
2227 {
2228 assert (vert_result < VERT_RESULT_MAX);
2229 reg.type = output_reg[vert_result].type;
2230 current_annotation = output_reg_annotation[vert_result];
2231 /* Copy the register, saturating if necessary */
2232 vec4_instruction *inst = emit(MOV(reg,
2233 src_reg(output_reg[vert_result])));
2234 if ((vert_result == VERT_RESULT_COL0 ||
2235 vert_result == VERT_RESULT_COL1 ||
2236 vert_result == VERT_RESULT_BFC0 ||
2237 vert_result == VERT_RESULT_BFC1) &&
2238 c->key.clamp_vertex_color) {
2239 inst->saturate = true;
2240 }
2241 }
2242
2243 void
2244 vec4_visitor::emit_urb_slot(int mrf, int vert_result)
2245 {
2246 struct brw_reg hw_reg = brw_message_reg(mrf);
2247 dst_reg reg = dst_reg(MRF, mrf);
2248 reg.type = BRW_REGISTER_TYPE_F;
2249
2250 switch (vert_result) {
2251 case VERT_RESULT_PSIZ:
2252 /* PSIZ is always in slot 0, and is coupled with other flags. */
2253 current_annotation = "indices, point width, clip flags";
2254 emit_psiz_and_flags(hw_reg);
2255 break;
2256 case BRW_VERT_RESULT_NDC:
2257 current_annotation = "NDC";
2258 emit(MOV(reg, src_reg(output_reg[BRW_VERT_RESULT_NDC])));
2259 break;
2260 case BRW_VERT_RESULT_HPOS_DUPLICATE:
2261 case VERT_RESULT_HPOS:
2262 current_annotation = "gl_Position";
2263 emit(MOV(reg, src_reg(output_reg[VERT_RESULT_HPOS])));
2264 break;
2265 case VERT_RESULT_CLIP_DIST0:
2266 case VERT_RESULT_CLIP_DIST1:
2267 if (this->c->key.uses_clip_distance) {
2268 emit_generic_urb_slot(reg, vert_result);
2269 } else {
2270 current_annotation = "user clip distances";
2271 emit_clip_distances(hw_reg, (vert_result - VERT_RESULT_CLIP_DIST0) * 4);
2272 }
2273 break;
2274 case VERT_RESULT_EDGE:
2275 /* This is present when doing unfilled polygons. We're supposed to copy
2276 * the edge flag from the user-provided vertex array
2277 * (glEdgeFlagPointer), or otherwise we'll copy from the current value
2278 * of that attribute (starts as 1.0f). This is then used in clipping to
2279 * determine which edges should be drawn as wireframe.
2280 */
2281 current_annotation = "edge flag";
2282 emit(MOV(reg, src_reg(dst_reg(ATTR, VERT_ATTRIB_EDGEFLAG,
2283 glsl_type::float_type, WRITEMASK_XYZW))));
2284 break;
2285 case BRW_VERT_RESULT_PAD:
2286 /* No need to write to this slot */
2287 break;
2288 default:
2289 emit_generic_urb_slot(reg, vert_result);
2290 break;
2291 }
2292 }
2293
2294 static int
2295 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
2296 {
2297 struct intel_context *intel = &brw->intel;
2298
2299 if (intel->gen >= 6) {
2300 /* URB data written (does not include the message header reg) must
2301 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
2302 * section 5.4.3.2.2: URB_INTERLEAVED.
2303 *
2304 * URB entries are allocated on a multiple of 1024 bits, so an
2305 * extra 128 bits written here to make the end align to 256 is
2306 * no problem.
2307 */
2308 if ((mlen % 2) != 1)
2309 mlen++;
2310 }
2311
2312 return mlen;
2313 }
2314
2315 /**
2316 * Generates the VUE payload plus the 1 or 2 URB write instructions to
2317 * complete the VS thread.
2318 *
2319 * The VUE layout is documented in Volume 2a.
2320 */
2321 void
2322 vec4_visitor::emit_urb_writes()
2323 {
2324 /* MRF 0 is reserved for the debugger, so start with message header
2325 * in MRF 1.
2326 */
2327 int base_mrf = 1;
2328 int mrf = base_mrf;
2329 /* In the process of generating our URB write message contents, we
2330 * may need to unspill a register or load from an array. Those
2331 * reads would use MRFs 14-15.
2332 */
2333 int max_usable_mrf = 13;
2334
2335 /* The following assertion verifies that max_usable_mrf causes an
2336 * even-numbered amount of URB write data, which will meet gen6's
2337 * requirements for length alignment.
2338 */
2339 assert ((max_usable_mrf - base_mrf) % 2 == 0);
2340
2341 /* First mrf is the g0-based message header containing URB handles and such,
2342 * which is implied in VS_OPCODE_URB_WRITE.
2343 */
2344 mrf++;
2345
2346 if (intel->gen < 6) {
2347 emit_ndc_computation();
2348 }
2349
2350 /* Set up the VUE data for the first URB write */
2351 int slot;
2352 for (slot = 0; slot < c->prog_data.vue_map.num_slots; ++slot) {
2353 emit_urb_slot(mrf++, c->prog_data.vue_map.slot_to_vert_result[slot]);
2354
2355 /* If this was max_usable_mrf, we can't fit anything more into this URB
2356 * WRITE.
2357 */
2358 if (mrf > max_usable_mrf) {
2359 slot++;
2360 break;
2361 }
2362 }
2363
2364 current_annotation = "URB write";
2365 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
2366 inst->base_mrf = base_mrf;
2367 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2368 inst->eot = (slot >= c->prog_data.vue_map.num_slots);
2369
2370 /* Optional second URB write */
2371 if (!inst->eot) {
2372 mrf = base_mrf + 1;
2373
2374 for (; slot < c->prog_data.vue_map.num_slots; ++slot) {
2375 assert(mrf < max_usable_mrf);
2376
2377 emit_urb_slot(mrf++, c->prog_data.vue_map.slot_to_vert_result[slot]);
2378 }
2379
2380 current_annotation = "URB write";
2381 inst = emit(VS_OPCODE_URB_WRITE);
2382 inst->base_mrf = base_mrf;
2383 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2384 inst->eot = true;
2385 /* URB destination offset. In the previous write, we got MRFs
2386 * 2-13 minus the one header MRF, so 12 regs. URB offset is in
2387 * URB row increments, and each of our MRFs is half of one of
2388 * those, since we're doing interleaved writes.
2389 */
2390 inst->offset = (max_usable_mrf - base_mrf) / 2;
2391 }
2392 }
2393
2394 src_reg
2395 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
2396 src_reg *reladdr, int reg_offset)
2397 {
2398 /* Because we store the values to scratch interleaved like our
2399 * vertex data, we need to scale the vec4 index by 2.
2400 */
2401 int message_header_scale = 2;
2402
2403 /* Pre-gen6, the message header uses byte offsets instead of vec4
2404 * (16-byte) offset units.
2405 */
2406 if (intel->gen < 6)
2407 message_header_scale *= 16;
2408
2409 if (reladdr) {
2410 src_reg index = src_reg(this, glsl_type::int_type);
2411
2412 emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2413 emit_before(inst, MUL(dst_reg(index),
2414 index, src_reg(message_header_scale)));
2415
2416 return index;
2417 } else {
2418 return src_reg(reg_offset * message_header_scale);
2419 }
2420 }
2421
2422 src_reg
2423 vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
2424 src_reg *reladdr, int reg_offset)
2425 {
2426 if (reladdr) {
2427 src_reg index = src_reg(this, glsl_type::int_type);
2428
2429 emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2430
2431 /* Pre-gen6, the message header uses byte offsets instead of vec4
2432 * (16-byte) offset units.
2433 */
2434 if (intel->gen < 6) {
2435 emit_before(inst, MUL(dst_reg(index), index, src_reg(16)));
2436 }
2437
2438 return index;
2439 } else {
2440 int message_header_scale = intel->gen < 6 ? 16 : 1;
2441 return src_reg(reg_offset * message_header_scale);
2442 }
2443 }
2444
2445 /**
2446 * Emits an instruction before @inst to load the value named by @orig_src
2447 * from scratch space at @base_offset to @temp.
2448 *
2449 * @base_offset is measured in 32-byte units (the size of a register).
2450 */
2451 void
2452 vec4_visitor::emit_scratch_read(vec4_instruction *inst,
2453 dst_reg temp, src_reg orig_src,
2454 int base_offset)
2455 {
2456 int reg_offset = base_offset + orig_src.reg_offset;
2457 src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
2458
2459 emit_before(inst, SCRATCH_READ(temp, index));
2460 }
2461
2462 /**
2463 * Emits an instruction after @inst to store the value to be written
2464 * to @orig_dst to scratch space at @base_offset, from @temp.
2465 *
2466 * @base_offset is measured in 32-byte units (the size of a register).
2467 */
2468 void
2469 vec4_visitor::emit_scratch_write(vec4_instruction *inst, int base_offset)
2470 {
2471 int reg_offset = base_offset + inst->dst.reg_offset;
2472 src_reg index = get_scratch_offset(inst, inst->dst.reladdr, reg_offset);
2473
2474 /* Create a temporary register to store *inst's result in.
2475 *
2476 * We have to be careful in MOVing from our temporary result register in
2477 * the scratch write. If we swizzle from channels of the temporary that
2478 * weren't initialized, it will confuse live interval analysis, which will
2479 * make spilling fail to make progress.
2480 */
2481 src_reg temp = src_reg(this, glsl_type::vec4_type);
2482 temp.type = inst->dst.type;
2483 int first_writemask_chan = ffs(inst->dst.writemask) - 1;
2484 int swizzles[4];
2485 for (int i = 0; i < 4; i++)
2486 if (inst->dst.writemask & (1 << i))
2487 swizzles[i] = i;
2488 else
2489 swizzles[i] = first_writemask_chan;
2490 temp.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
2491 swizzles[2], swizzles[3]);
2492
2493 dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
2494 inst->dst.writemask));
2495 vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
2496 write->predicate = inst->predicate;
2497 write->ir = inst->ir;
2498 write->annotation = inst->annotation;
2499 inst->insert_after(write);
2500
2501 inst->dst.file = temp.file;
2502 inst->dst.reg = temp.reg;
2503 inst->dst.reg_offset = temp.reg_offset;
2504 inst->dst.reladdr = NULL;
2505 }
2506
2507 /**
2508 * We can't generally support array access in GRF space, because a
2509 * single instruction's destination can only span 2 contiguous
2510 * registers. So, we send all GRF arrays that get variable index
2511 * access to scratch space.
2512 */
2513 void
2514 vec4_visitor::move_grf_array_access_to_scratch()
2515 {
2516 int scratch_loc[this->virtual_grf_count];
2517
2518 for (int i = 0; i < this->virtual_grf_count; i++) {
2519 scratch_loc[i] = -1;
2520 }
2521
2522 /* First, calculate the set of virtual GRFs that need to be punted
2523 * to scratch due to having any array access on them, and where in
2524 * scratch.
2525 */
2526 foreach_list(node, &this->instructions) {
2527 vec4_instruction *inst = (vec4_instruction *)node;
2528
2529 if (inst->dst.file == GRF && inst->dst.reladdr &&
2530 scratch_loc[inst->dst.reg] == -1) {
2531 scratch_loc[inst->dst.reg] = c->last_scratch;
2532 c->last_scratch += this->virtual_grf_sizes[inst->dst.reg];
2533 }
2534
2535 for (int i = 0 ; i < 3; i++) {
2536 src_reg *src = &inst->src[i];
2537
2538 if (src->file == GRF && src->reladdr &&
2539 scratch_loc[src->reg] == -1) {
2540 scratch_loc[src->reg] = c->last_scratch;
2541 c->last_scratch += this->virtual_grf_sizes[src->reg];
2542 }
2543 }
2544 }
2545
2546 /* Now, for anything that will be accessed through scratch, rewrite
2547 * it to load/store. Note that this is a _safe list walk, because
2548 * we may generate a new scratch_write instruction after the one
2549 * we're processing.
2550 */
2551 foreach_list_safe(node, &this->instructions) {
2552 vec4_instruction *inst = (vec4_instruction *)node;
2553
2554 /* Set up the annotation tracking for new generated instructions. */
2555 base_ir = inst->ir;
2556 current_annotation = inst->annotation;
2557
2558 if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
2559 emit_scratch_write(inst, scratch_loc[inst->dst.reg]);
2560 }
2561
2562 for (int i = 0 ; i < 3; i++) {
2563 if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
2564 continue;
2565
2566 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2567
2568 emit_scratch_read(inst, temp, inst->src[i],
2569 scratch_loc[inst->src[i].reg]);
2570
2571 inst->src[i].file = temp.file;
2572 inst->src[i].reg = temp.reg;
2573 inst->src[i].reg_offset = temp.reg_offset;
2574 inst->src[i].reladdr = NULL;
2575 }
2576 }
2577 }
2578
2579 /**
2580 * Emits an instruction before @inst to load the value named by @orig_src
2581 * from the pull constant buffer (surface) at @base_offset to @temp.
2582 */
2583 void
2584 vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
2585 dst_reg temp, src_reg orig_src,
2586 int base_offset)
2587 {
2588 int reg_offset = base_offset + orig_src.reg_offset;
2589 src_reg index = src_reg((unsigned)SURF_INDEX_VERT_CONST_BUFFER);
2590 src_reg offset = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
2591 vec4_instruction *load;
2592
2593 load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
2594 temp, index, offset);
2595 load->base_mrf = 14;
2596 load->mlen = 1;
2597 emit_before(inst, load);
2598 }
2599
2600 /**
2601 * Implements array access of uniforms by inserting a
2602 * PULL_CONSTANT_LOAD instruction.
2603 *
2604 * Unlike temporary GRF array access (where we don't support it due to
2605 * the difficulty of doing relative addressing on instruction
2606 * destinations), we could potentially do array access of uniforms
2607 * that were loaded in GRF space as push constants. In real-world
2608 * usage we've seen, though, the arrays being used are always larger
2609 * than we could load as push constants, so just always move all
2610 * uniform array access out to a pull constant buffer.
2611 */
2612 void
2613 vec4_visitor::move_uniform_array_access_to_pull_constants()
2614 {
2615 int pull_constant_loc[this->uniforms];
2616
2617 for (int i = 0; i < this->uniforms; i++) {
2618 pull_constant_loc[i] = -1;
2619 }
2620
2621 /* Walk through and find array access of uniforms. Put a copy of that
2622 * uniform in the pull constant buffer.
2623 *
2624 * Note that we don't move constant-indexed accesses to arrays. No
2625 * testing has been done of the performance impact of this choice.
2626 */
2627 foreach_list_safe(node, &this->instructions) {
2628 vec4_instruction *inst = (vec4_instruction *)node;
2629
2630 for (int i = 0 ; i < 3; i++) {
2631 if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
2632 continue;
2633
2634 int uniform = inst->src[i].reg;
2635
2636 /* If this array isn't already present in the pull constant buffer,
2637 * add it.
2638 */
2639 if (pull_constant_loc[uniform] == -1) {
2640 const float **values = &prog_data->param[uniform * 4];
2641
2642 pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
2643
2644 for (int j = 0; j < uniform_size[uniform] * 4; j++) {
2645 prog_data->pull_param[prog_data->nr_pull_params++] = values[j];
2646 }
2647 }
2648
2649 /* Set up the annotation tracking for new generated instructions. */
2650 base_ir = inst->ir;
2651 current_annotation = inst->annotation;
2652
2653 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2654
2655 emit_pull_constant_load(inst, temp, inst->src[i],
2656 pull_constant_loc[uniform]);
2657
2658 inst->src[i].file = temp.file;
2659 inst->src[i].reg = temp.reg;
2660 inst->src[i].reg_offset = temp.reg_offset;
2661 inst->src[i].reladdr = NULL;
2662 }
2663 }
2664
2665 /* Now there are no accesses of the UNIFORM file with a reladdr, so
2666 * no need to track them as larger-than-vec4 objects. This will be
2667 * relied on in cutting out unused uniform vectors from push
2668 * constants.
2669 */
2670 split_uniform_registers();
2671 }
2672
2673 void
2674 vec4_visitor::resolve_ud_negate(src_reg *reg)
2675 {
2676 if (reg->type != BRW_REGISTER_TYPE_UD ||
2677 !reg->negate)
2678 return;
2679
2680 src_reg temp = src_reg(this, glsl_type::uvec4_type);
2681 emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
2682 *reg = temp;
2683 }
2684
2685 vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
2686 struct gl_shader_program *prog,
2687 struct brw_shader *shader)
2688 {
2689 this->c = c;
2690 this->p = &c->func;
2691 this->brw = p->brw;
2692 this->intel = &brw->intel;
2693 this->ctx = &intel->ctx;
2694 this->prog = prog;
2695 this->shader = shader;
2696
2697 this->mem_ctx = ralloc_context(NULL);
2698 this->failed = false;
2699
2700 this->base_ir = NULL;
2701 this->current_annotation = NULL;
2702
2703 this->c = c;
2704 this->vp = &c->vp->program;
2705 this->prog_data = &c->prog_data;
2706
2707 this->variable_ht = hash_table_ctor(0,
2708 hash_table_pointer_hash,
2709 hash_table_pointer_compare);
2710
2711 this->virtual_grf_def = NULL;
2712 this->virtual_grf_use = NULL;
2713 this->virtual_grf_sizes = NULL;
2714 this->virtual_grf_count = 0;
2715 this->virtual_grf_reg_map = NULL;
2716 this->virtual_grf_reg_count = 0;
2717 this->virtual_grf_array_size = 0;
2718 this->live_intervals_valid = false;
2719
2720 this->max_grf = intel->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
2721
2722 this->uniforms = 0;
2723 }
2724
2725 vec4_visitor::~vec4_visitor()
2726 {
2727 ralloc_free(this->mem_ctx);
2728 hash_table_dtor(this->variable_ht);
2729 }
2730
2731
2732 void
2733 vec4_visitor::fail(const char *format, ...)
2734 {
2735 va_list va;
2736 char *msg;
2737
2738 if (failed)
2739 return;
2740
2741 failed = true;
2742
2743 va_start(va, format);
2744 msg = ralloc_vasprintf(mem_ctx, format, va);
2745 va_end(va);
2746 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
2747
2748 this->fail_msg = msg;
2749
2750 if (INTEL_DEBUG & DEBUG_VS) {
2751 fprintf(stderr, "%s", msg);
2752 }
2753 }
2754
2755 } /* namespace brw */