i965: new VS: use the VUE map to write out vertex attributes.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_visitor.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25 extern "C" {
26 #include "main/macros.h"
27 #include "program/prog_parameter.h"
28 }
29
30 namespace brw {
31
32 src_reg::src_reg(dst_reg reg)
33 {
34 init();
35
36 this->file = reg.file;
37 this->reg = reg.reg;
38 this->reg_offset = reg.reg_offset;
39 this->type = reg.type;
40 this->reladdr = reg.reladdr;
41 this->fixed_hw_reg = reg.fixed_hw_reg;
42
43 int swizzles[4];
44 int next_chan = 0;
45 int last = 0;
46
47 for (int i = 0; i < 4; i++) {
48 if (!(reg.writemask & (1 << i)))
49 continue;
50
51 swizzles[next_chan++] = last = i;
52 }
53
54 for (; next_chan < 4; next_chan++) {
55 swizzles[next_chan] = last;
56 }
57
58 this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
59 swizzles[2], swizzles[3]);
60 }
61
62 dst_reg::dst_reg(src_reg reg)
63 {
64 init();
65
66 this->file = reg.file;
67 this->reg = reg.reg;
68 this->reg_offset = reg.reg_offset;
69 this->type = reg.type;
70 this->writemask = WRITEMASK_XYZW;
71 this->reladdr = reg.reladdr;
72 this->fixed_hw_reg = reg.fixed_hw_reg;
73 }
74
75 vec4_instruction::vec4_instruction(vec4_visitor *v,
76 enum opcode opcode, dst_reg dst,
77 src_reg src0, src_reg src1, src_reg src2)
78 {
79 this->opcode = opcode;
80 this->dst = dst;
81 this->src[0] = src0;
82 this->src[1] = src1;
83 this->src[2] = src2;
84 this->ir = v->base_ir;
85 this->annotation = v->current_annotation;
86 }
87
88 vec4_instruction *
89 vec4_visitor::emit(vec4_instruction *inst)
90 {
91 this->instructions.push_tail(inst);
92
93 return inst;
94 }
95
96 vec4_instruction *
97 vec4_visitor::emit_before(vec4_instruction *inst, vec4_instruction *new_inst)
98 {
99 new_inst->ir = inst->ir;
100 new_inst->annotation = inst->annotation;
101
102 inst->insert_before(new_inst);
103
104 return inst;
105 }
106
107 vec4_instruction *
108 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
109 src_reg src0, src_reg src1, src_reg src2)
110 {
111 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
112 src0, src1, src2));
113 }
114
115
116 vec4_instruction *
117 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
118 {
119 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
120 }
121
122 vec4_instruction *
123 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
124 {
125 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
126 }
127
128 vec4_instruction *
129 vec4_visitor::emit(enum opcode opcode)
130 {
131 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
132 }
133
134 #define ALU1(op) \
135 vec4_instruction * \
136 vec4_visitor::op(dst_reg dst, src_reg src0) \
137 { \
138 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
139 src0); \
140 }
141
142 #define ALU2(op) \
143 vec4_instruction * \
144 vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \
145 { \
146 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
147 src0, src1); \
148 }
149
150 ALU1(NOT)
151 ALU1(MOV)
152 ALU1(FRC)
153 ALU1(RNDD)
154 ALU1(RNDE)
155 ALU1(RNDZ)
156 ALU2(ADD)
157 ALU2(MUL)
158 ALU2(MACH)
159 ALU2(AND)
160 ALU2(OR)
161 ALU2(XOR)
162 ALU2(DP3)
163 ALU2(DP4)
164
165 /** Gen4 predicated IF. */
166 vec4_instruction *
167 vec4_visitor::IF(uint32_t predicate)
168 {
169 vec4_instruction *inst;
170
171 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF);
172 inst->predicate = predicate;
173
174 return inst;
175 }
176
177 /** Gen6+ IF with embedded comparison. */
178 vec4_instruction *
179 vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition)
180 {
181 assert(intel->gen >= 6);
182
183 vec4_instruction *inst;
184
185 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF, dst_null_d(),
186 src0, src1);
187 inst->conditional_mod = condition;
188
189 return inst;
190 }
191
192 /**
193 * CMP: Sets the low bit of the destination channels with the result
194 * of the comparison, while the upper bits are undefined, and updates
195 * the flag register with the packed 16 bits of the result.
196 */
197 vec4_instruction *
198 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition)
199 {
200 vec4_instruction *inst;
201
202 /* original gen4 does type conversion to the destination type
203 * before before comparison, producing garbage results for floating
204 * point comparisons.
205 */
206 if (intel->gen == 4)
207 dst.type = src0.type;
208
209 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_CMP, dst, src0, src1);
210 inst->conditional_mod = condition;
211
212 return inst;
213 }
214
215 vec4_instruction *
216 vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
217 {
218 vec4_instruction *inst;
219
220 inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_READ,
221 dst, index);
222 inst->base_mrf = 14;
223 inst->mlen = 1;
224
225 return inst;
226 }
227
228 vec4_instruction *
229 vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index)
230 {
231 vec4_instruction *inst;
232
233 inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_WRITE,
234 dst, src, index);
235 inst->base_mrf = 13;
236 inst->mlen = 2;
237
238 return inst;
239 }
240
241 void
242 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
243 {
244 static enum opcode dot_opcodes[] = {
245 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
246 };
247
248 emit(dot_opcodes[elements - 2], dst, src0, src1);
249 }
250
251 void
252 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
253 {
254 /* The gen6 math instruction ignores the source modifiers --
255 * swizzle, abs, negate, and at least some parts of the register
256 * region description.
257 *
258 * While it would seem that this MOV could be avoided at this point
259 * in the case that the swizzle is matched up with the destination
260 * writemask, note that uniform packing and register allocation
261 * could rearrange our swizzle, so let's leave this matter up to
262 * copy propagation later.
263 */
264 src_reg temp_src = src_reg(this, glsl_type::vec4_type);
265 emit(MOV(dst_reg(temp_src), src));
266
267 if (dst.writemask != WRITEMASK_XYZW) {
268 /* The gen6 math instruction must be align1, so we can't do
269 * writemasks.
270 */
271 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
272
273 emit(opcode, temp_dst, temp_src);
274
275 emit(MOV(dst, src_reg(temp_dst)));
276 } else {
277 emit(opcode, dst, temp_src);
278 }
279 }
280
281 void
282 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
283 {
284 vec4_instruction *inst = emit(opcode, dst, src);
285 inst->base_mrf = 1;
286 inst->mlen = 1;
287 }
288
289 void
290 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
291 {
292 switch (opcode) {
293 case SHADER_OPCODE_RCP:
294 case SHADER_OPCODE_RSQ:
295 case SHADER_OPCODE_SQRT:
296 case SHADER_OPCODE_EXP2:
297 case SHADER_OPCODE_LOG2:
298 case SHADER_OPCODE_SIN:
299 case SHADER_OPCODE_COS:
300 break;
301 default:
302 assert(!"not reached: bad math opcode");
303 return;
304 }
305
306 if (intel->gen >= 6) {
307 return emit_math1_gen6(opcode, dst, src);
308 } else {
309 return emit_math1_gen4(opcode, dst, src);
310 }
311 }
312
313 void
314 vec4_visitor::emit_math2_gen6(enum opcode opcode,
315 dst_reg dst, src_reg src0, src_reg src1)
316 {
317 src_reg expanded;
318
319 /* The gen6 math instruction ignores the source modifiers --
320 * swizzle, abs, negate, and at least some parts of the register
321 * region description. Move the sources to temporaries to make it
322 * generally work.
323 */
324
325 expanded = src_reg(this, glsl_type::vec4_type);
326 emit(MOV(dst_reg(expanded), src0));
327 src0 = expanded;
328
329 expanded = src_reg(this, glsl_type::vec4_type);
330 emit(MOV(dst_reg(expanded), src1));
331 src1 = expanded;
332
333 if (dst.writemask != WRITEMASK_XYZW) {
334 /* The gen6 math instruction must be align1, so we can't do
335 * writemasks.
336 */
337 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
338
339 emit(opcode, temp_dst, src0, src1);
340
341 emit(MOV(dst, src_reg(temp_dst)));
342 } else {
343 emit(opcode, dst, src0, src1);
344 }
345 }
346
347 void
348 vec4_visitor::emit_math2_gen4(enum opcode opcode,
349 dst_reg dst, src_reg src0, src_reg src1)
350 {
351 vec4_instruction *inst = emit(opcode, dst, src0, src1);
352 inst->base_mrf = 1;
353 inst->mlen = 2;
354 }
355
356 void
357 vec4_visitor::emit_math(enum opcode opcode,
358 dst_reg dst, src_reg src0, src_reg src1)
359 {
360 assert(opcode == SHADER_OPCODE_POW);
361
362 if (intel->gen >= 6) {
363 return emit_math2_gen6(opcode, dst, src0, src1);
364 } else {
365 return emit_math2_gen4(opcode, dst, src0, src1);
366 }
367 }
368
369 void
370 vec4_visitor::visit_instructions(const exec_list *list)
371 {
372 foreach_list(node, list) {
373 ir_instruction *ir = (ir_instruction *)node;
374
375 base_ir = ir;
376 ir->accept(this);
377 }
378 }
379
380
381 static int
382 type_size(const struct glsl_type *type)
383 {
384 unsigned int i;
385 int size;
386
387 switch (type->base_type) {
388 case GLSL_TYPE_UINT:
389 case GLSL_TYPE_INT:
390 case GLSL_TYPE_FLOAT:
391 case GLSL_TYPE_BOOL:
392 if (type->is_matrix()) {
393 return type->matrix_columns;
394 } else {
395 /* Regardless of size of vector, it gets a vec4. This is bad
396 * packing for things like floats, but otherwise arrays become a
397 * mess. Hopefully a later pass over the code can pack scalars
398 * down if appropriate.
399 */
400 return 1;
401 }
402 case GLSL_TYPE_ARRAY:
403 assert(type->length > 0);
404 return type_size(type->fields.array) * type->length;
405 case GLSL_TYPE_STRUCT:
406 size = 0;
407 for (i = 0; i < type->length; i++) {
408 size += type_size(type->fields.structure[i].type);
409 }
410 return size;
411 case GLSL_TYPE_SAMPLER:
412 /* Samplers take up one slot in UNIFORMS[], but they're baked in
413 * at link time.
414 */
415 return 1;
416 default:
417 assert(0);
418 return 0;
419 }
420 }
421
422 int
423 vec4_visitor::virtual_grf_alloc(int size)
424 {
425 if (virtual_grf_array_size <= virtual_grf_count) {
426 if (virtual_grf_array_size == 0)
427 virtual_grf_array_size = 16;
428 else
429 virtual_grf_array_size *= 2;
430 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
431 virtual_grf_array_size);
432 }
433 virtual_grf_sizes[virtual_grf_count] = size;
434 return virtual_grf_count++;
435 }
436
437 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
438 {
439 init();
440
441 this->file = GRF;
442 this->reg = v->virtual_grf_alloc(type_size(type));
443
444 if (type->is_array() || type->is_record()) {
445 this->swizzle = BRW_SWIZZLE_NOOP;
446 } else {
447 this->swizzle = swizzle_for_size(type->vector_elements);
448 }
449
450 this->type = brw_type_for_base_type(type);
451 }
452
453 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
454 {
455 init();
456
457 this->file = GRF;
458 this->reg = v->virtual_grf_alloc(type_size(type));
459
460 if (type->is_array() || type->is_record()) {
461 this->writemask = WRITEMASK_XYZW;
462 } else {
463 this->writemask = (1 << type->vector_elements) - 1;
464 }
465
466 this->type = brw_type_for_base_type(type);
467 }
468
469 /* Our support for uniforms is piggy-backed on the struct
470 * gl_fragment_program, because that's where the values actually
471 * get stored, rather than in some global gl_shader_program uniform
472 * store.
473 */
474 int
475 vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
476 {
477 unsigned int offset = 0;
478 float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
479
480 if (type->is_matrix()) {
481 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
482 type->vector_elements,
483 1);
484
485 for (unsigned int i = 0; i < type->matrix_columns; i++) {
486 offset += setup_uniform_values(loc + offset, column);
487 }
488
489 return offset;
490 }
491
492 switch (type->base_type) {
493 case GLSL_TYPE_FLOAT:
494 case GLSL_TYPE_UINT:
495 case GLSL_TYPE_INT:
496 case GLSL_TYPE_BOOL:
497 for (unsigned int i = 0; i < type->vector_elements; i++) {
498 c->prog_data.param[this->uniforms * 4 + i] = &values[i];
499 }
500
501 /* Set up pad elements to get things aligned to a vec4 boundary. */
502 for (unsigned int i = type->vector_elements; i < 4; i++) {
503 static float zero = 0;
504
505 c->prog_data.param[this->uniforms * 4 + i] = &zero;
506 }
507
508 /* Track the size of this uniform vector, for future packing of
509 * uniforms.
510 */
511 this->uniform_vector_size[this->uniforms] = type->vector_elements;
512 this->uniforms++;
513
514 return 1;
515
516 case GLSL_TYPE_STRUCT:
517 for (unsigned int i = 0; i < type->length; i++) {
518 offset += setup_uniform_values(loc + offset,
519 type->fields.structure[i].type);
520 }
521 return offset;
522
523 case GLSL_TYPE_ARRAY:
524 for (unsigned int i = 0; i < type->length; i++) {
525 offset += setup_uniform_values(loc + offset, type->fields.array);
526 }
527 return offset;
528
529 case GLSL_TYPE_SAMPLER:
530 /* The sampler takes up a slot, but we don't use any values from it. */
531 return 1;
532
533 default:
534 assert(!"not reached");
535 return 0;
536 }
537 }
538
539 /* Our support for builtin uniforms is even scarier than non-builtin.
540 * It sits on top of the PROG_STATE_VAR parameters that are
541 * automatically updated from GL context state.
542 */
543 void
544 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
545 {
546 const ir_state_slot *const slots = ir->state_slots;
547 assert(ir->state_slots != NULL);
548
549 for (unsigned int i = 0; i < ir->num_state_slots; i++) {
550 /* This state reference has already been setup by ir_to_mesa,
551 * but we'll get the same index back here. We can reference
552 * ParameterValues directly, since unlike brw_fs.cpp, we never
553 * add new state references during compile.
554 */
555 int index = _mesa_add_state_reference(this->vp->Base.Parameters,
556 (gl_state_index *)slots[i].tokens);
557 float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
558
559 this->uniform_vector_size[this->uniforms] = 0;
560 /* Add each of the unique swizzled channels of the element.
561 * This will end up matching the size of the glsl_type of this field.
562 */
563 int last_swiz = -1;
564 for (unsigned int j = 0; j < 4; j++) {
565 int swiz = GET_SWZ(slots[i].swizzle, j);
566 last_swiz = swiz;
567
568 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
569 if (swiz <= last_swiz)
570 this->uniform_vector_size[this->uniforms]++;
571 }
572 this->uniforms++;
573 }
574 }
575
576 dst_reg *
577 vec4_visitor::variable_storage(ir_variable *var)
578 {
579 return (dst_reg *)hash_table_find(this->variable_ht, var);
580 }
581
582 void
583 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
584 {
585 ir_expression *expr = ir->as_expression();
586
587 if (expr) {
588 src_reg op[2];
589 vec4_instruction *inst;
590
591 assert(expr->get_num_operands() <= 2);
592 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
593 assert(expr->operands[i]->type->is_scalar());
594
595 expr->operands[i]->accept(this);
596 op[i] = this->result;
597 }
598
599 switch (expr->operation) {
600 case ir_unop_logic_not:
601 inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
602 inst->conditional_mod = BRW_CONDITIONAL_Z;
603 break;
604
605 case ir_binop_logic_xor:
606 inst = emit(XOR(dst_null_d(), op[0], op[1]));
607 inst->conditional_mod = BRW_CONDITIONAL_NZ;
608 break;
609
610 case ir_binop_logic_or:
611 inst = emit(OR(dst_null_d(), op[0], op[1]));
612 inst->conditional_mod = BRW_CONDITIONAL_NZ;
613 break;
614
615 case ir_binop_logic_and:
616 inst = emit(AND(dst_null_d(), op[0], op[1]));
617 inst->conditional_mod = BRW_CONDITIONAL_NZ;
618 break;
619
620 case ir_unop_f2b:
621 if (intel->gen >= 6) {
622 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
623 } else {
624 inst = emit(MOV(dst_null_f(), op[0]));
625 inst->conditional_mod = BRW_CONDITIONAL_NZ;
626 }
627 break;
628
629 case ir_unop_i2b:
630 if (intel->gen >= 6) {
631 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
632 } else {
633 inst = emit(MOV(dst_null_d(), op[0]));
634 inst->conditional_mod = BRW_CONDITIONAL_NZ;
635 }
636 break;
637
638 case ir_binop_greater:
639 case ir_binop_gequal:
640 case ir_binop_less:
641 case ir_binop_lequal:
642 case ir_binop_equal:
643 case ir_binop_all_equal:
644 case ir_binop_nequal:
645 case ir_binop_any_nequal:
646 emit(CMP(dst_null_d(), op[0], op[1],
647 brw_conditional_for_comparison(expr->operation)));
648 break;
649
650 default:
651 assert(!"not reached");
652 break;
653 }
654 return;
655 }
656
657 ir->accept(this);
658
659 if (intel->gen >= 6) {
660 vec4_instruction *inst = emit(AND(dst_null_d(),
661 this->result, src_reg(1)));
662 inst->conditional_mod = BRW_CONDITIONAL_NZ;
663 } else {
664 vec4_instruction *inst = emit(MOV(dst_null_d(), this->result));
665 inst->conditional_mod = BRW_CONDITIONAL_NZ;
666 }
667 }
668
669 /**
670 * Emit a gen6 IF statement with the comparison folded into the IF
671 * instruction.
672 */
673 void
674 vec4_visitor::emit_if_gen6(ir_if *ir)
675 {
676 ir_expression *expr = ir->condition->as_expression();
677
678 if (expr) {
679 src_reg op[2];
680 dst_reg temp;
681
682 assert(expr->get_num_operands() <= 2);
683 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
684 expr->operands[i]->accept(this);
685 op[i] = this->result;
686 }
687
688 switch (expr->operation) {
689 case ir_unop_logic_not:
690 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
691 return;
692
693 case ir_binop_logic_xor:
694 emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
695 return;
696
697 case ir_binop_logic_or:
698 temp = dst_reg(this, glsl_type::bool_type);
699 emit(OR(temp, op[0], op[1]));
700 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
701 return;
702
703 case ir_binop_logic_and:
704 temp = dst_reg(this, glsl_type::bool_type);
705 emit(AND(temp, op[0], op[1]));
706 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
707 return;
708
709 case ir_unop_f2b:
710 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
711 return;
712
713 case ir_unop_i2b:
714 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
715 return;
716
717 case ir_binop_greater:
718 case ir_binop_gequal:
719 case ir_binop_less:
720 case ir_binop_lequal:
721 case ir_binop_equal:
722 case ir_binop_nequal:
723 emit(IF(op[0], op[1],
724 brw_conditional_for_comparison(expr->operation)));
725 return;
726
727 case ir_binop_all_equal:
728 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
729 emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
730 return;
731
732 case ir_binop_any_nequal:
733 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
734 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
735 return;
736
737 case ir_unop_any:
738 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
739 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
740 return;
741
742 default:
743 assert(!"not reached");
744 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
745 return;
746 }
747 return;
748 }
749
750 ir->condition->accept(this);
751
752 emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
753 }
754
755 void
756 vec4_visitor::visit(ir_variable *ir)
757 {
758 dst_reg *reg = NULL;
759
760 if (variable_storage(ir))
761 return;
762
763 switch (ir->mode) {
764 case ir_var_in:
765 reg = new(mem_ctx) dst_reg(ATTR, ir->location);
766
767 /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED attributes
768 * come in as floating point conversions of the integer values.
769 */
770 for (int i = ir->location; i < ir->location + type_size(ir->type); i++) {
771 if (!c->key.gl_fixed_input_size[i])
772 continue;
773
774 dst_reg dst = *reg;
775 dst.writemask = (1 << c->key.gl_fixed_input_size[i]) - 1;
776 emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
777 }
778 break;
779
780 case ir_var_out:
781 reg = new(mem_ctx) dst_reg(this, ir->type);
782
783 for (int i = 0; i < type_size(ir->type); i++) {
784 output_reg[ir->location + i] = *reg;
785 output_reg[ir->location + i].reg_offset = i;
786 output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F;
787 }
788 break;
789
790 case ir_var_auto:
791 case ir_var_temporary:
792 reg = new(mem_ctx) dst_reg(this, ir->type);
793 break;
794
795 case ir_var_uniform:
796 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
797
798 /* Track how big the whole uniform variable is, in case we need to put a
799 * copy of its data into pull constants for array access.
800 */
801 this->uniform_size[this->uniforms] = type_size(ir->type);
802
803 if (!strncmp(ir->name, "gl_", 3)) {
804 setup_builtin_uniform_values(ir);
805 } else {
806 setup_uniform_values(ir->location, ir->type);
807 }
808 break;
809
810 default:
811 assert(!"not reached");
812 }
813
814 reg->type = brw_type_for_base_type(ir->type);
815 hash_table_insert(this->variable_ht, reg, ir);
816 }
817
818 void
819 vec4_visitor::visit(ir_loop *ir)
820 {
821 dst_reg counter;
822
823 /* We don't want debugging output to print the whole body of the
824 * loop as the annotation.
825 */
826 this->base_ir = NULL;
827
828 if (ir->counter != NULL) {
829 this->base_ir = ir->counter;
830 ir->counter->accept(this);
831 counter = *(variable_storage(ir->counter));
832
833 if (ir->from != NULL) {
834 this->base_ir = ir->from;
835 ir->from->accept(this);
836
837 emit(MOV(counter, this->result));
838 }
839 }
840
841 emit(BRW_OPCODE_DO);
842
843 if (ir->to) {
844 this->base_ir = ir->to;
845 ir->to->accept(this);
846
847 emit(CMP(dst_null_d(), src_reg(counter), this->result,
848 brw_conditional_for_comparison(ir->cmp)));
849
850 vec4_instruction *inst = emit(BRW_OPCODE_BREAK);
851 inst->predicate = BRW_PREDICATE_NORMAL;
852 }
853
854 visit_instructions(&ir->body_instructions);
855
856
857 if (ir->increment) {
858 this->base_ir = ir->increment;
859 ir->increment->accept(this);
860 emit(ADD(counter, src_reg(counter), this->result));
861 }
862
863 emit(BRW_OPCODE_WHILE);
864 }
865
866 void
867 vec4_visitor::visit(ir_loop_jump *ir)
868 {
869 switch (ir->mode) {
870 case ir_loop_jump::jump_break:
871 emit(BRW_OPCODE_BREAK);
872 break;
873 case ir_loop_jump::jump_continue:
874 emit(BRW_OPCODE_CONTINUE);
875 break;
876 }
877 }
878
879
880 void
881 vec4_visitor::visit(ir_function_signature *ir)
882 {
883 assert(0);
884 (void)ir;
885 }
886
887 void
888 vec4_visitor::visit(ir_function *ir)
889 {
890 /* Ignore function bodies other than main() -- we shouldn't see calls to
891 * them since they should all be inlined.
892 */
893 if (strcmp(ir->name, "main") == 0) {
894 const ir_function_signature *sig;
895 exec_list empty;
896
897 sig = ir->matching_signature(&empty);
898
899 assert(sig);
900
901 visit_instructions(&sig->body);
902 }
903 }
904
905 GLboolean
906 vec4_visitor::try_emit_sat(ir_expression *ir)
907 {
908 ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
909 if (!sat_src)
910 return false;
911
912 sat_src->accept(this);
913 src_reg src = this->result;
914
915 this->result = src_reg(this, ir->type);
916 vec4_instruction *inst;
917 inst = emit(MOV(dst_reg(this->result), src));
918 inst->saturate = true;
919
920 return true;
921 }
922
923 void
924 vec4_visitor::emit_bool_comparison(unsigned int op,
925 dst_reg dst, src_reg src0, src_reg src1)
926 {
927 /* original gen4 does destination conversion before comparison. */
928 if (intel->gen < 5)
929 dst.type = src0.type;
930
931 emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op)));
932
933 dst.type = BRW_REGISTER_TYPE_D;
934 emit(AND(dst, src_reg(dst), src_reg(0x1)));
935 }
936
937 void
938 vec4_visitor::visit(ir_expression *ir)
939 {
940 unsigned int operand;
941 src_reg op[Elements(ir->operands)];
942 src_reg result_src;
943 dst_reg result_dst;
944 vec4_instruction *inst;
945
946 if (try_emit_sat(ir))
947 return;
948
949 for (operand = 0; operand < ir->get_num_operands(); operand++) {
950 this->result.file = BAD_FILE;
951 ir->operands[operand]->accept(this);
952 if (this->result.file == BAD_FILE) {
953 printf("Failed to get tree for expression operand:\n");
954 ir->operands[operand]->print();
955 exit(1);
956 }
957 op[operand] = this->result;
958
959 /* Matrix expression operands should have been broken down to vector
960 * operations already.
961 */
962 assert(!ir->operands[operand]->type->is_matrix());
963 }
964
965 int vector_elements = ir->operands[0]->type->vector_elements;
966 if (ir->operands[1]) {
967 vector_elements = MAX2(vector_elements,
968 ir->operands[1]->type->vector_elements);
969 }
970
971 this->result.file = BAD_FILE;
972
973 /* Storage for our result. Ideally for an assignment we'd be using
974 * the actual storage for the result here, instead.
975 */
976 result_src = src_reg(this, ir->type);
977 /* convenience for the emit functions below. */
978 result_dst = dst_reg(result_src);
979 /* If nothing special happens, this is the result. */
980 this->result = result_src;
981 /* Limit writes to the channels that will be used by result_src later.
982 * This does limit this temp's use as a temporary for multi-instruction
983 * sequences.
984 */
985 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
986
987 switch (ir->operation) {
988 case ir_unop_logic_not:
989 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
990 * ones complement of the whole register, not just bit 0.
991 */
992 emit(XOR(result_dst, op[0], src_reg(1)));
993 break;
994 case ir_unop_neg:
995 op[0].negate = !op[0].negate;
996 this->result = op[0];
997 break;
998 case ir_unop_abs:
999 op[0].abs = true;
1000 op[0].negate = false;
1001 this->result = op[0];
1002 break;
1003
1004 case ir_unop_sign:
1005 emit(MOV(result_dst, src_reg(0.0f)));
1006
1007 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G));
1008 inst = emit(MOV(result_dst, src_reg(1.0f)));
1009 inst->predicate = BRW_PREDICATE_NORMAL;
1010
1011 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L));
1012 inst = emit(MOV(result_dst, src_reg(-1.0f)));
1013 inst->predicate = BRW_PREDICATE_NORMAL;
1014
1015 break;
1016
1017 case ir_unop_rcp:
1018 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
1019 break;
1020
1021 case ir_unop_exp2:
1022 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
1023 break;
1024 case ir_unop_log2:
1025 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
1026 break;
1027 case ir_unop_exp:
1028 case ir_unop_log:
1029 assert(!"not reached: should be handled by ir_explog_to_explog2");
1030 break;
1031 case ir_unop_sin:
1032 case ir_unop_sin_reduced:
1033 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
1034 break;
1035 case ir_unop_cos:
1036 case ir_unop_cos_reduced:
1037 emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
1038 break;
1039
1040 case ir_unop_dFdx:
1041 case ir_unop_dFdy:
1042 assert(!"derivatives not valid in vertex shader");
1043 break;
1044
1045 case ir_unop_noise:
1046 assert(!"not reached: should be handled by lower_noise");
1047 break;
1048
1049 case ir_binop_add:
1050 emit(ADD(result_dst, op[0], op[1]));
1051 break;
1052 case ir_binop_sub:
1053 assert(!"not reached: should be handled by ir_sub_to_add_neg");
1054 break;
1055
1056 case ir_binop_mul:
1057 if (ir->type->is_integer()) {
1058 /* For integer multiplication, the MUL uses the low 16 bits
1059 * of one of the operands (src0 on gen6, src1 on gen7). The
1060 * MACH accumulates in the contribution of the upper 16 bits
1061 * of that operand.
1062 *
1063 * FINISHME: Emit just the MUL if we know an operand is small
1064 * enough.
1065 */
1066 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
1067
1068 emit(MUL(acc, op[0], op[1]));
1069 emit(MACH(dst_null_d(), op[0], op[1]));
1070 emit(MOV(result_dst, src_reg(acc)));
1071 } else {
1072 emit(MUL(result_dst, op[0], op[1]));
1073 }
1074 break;
1075 case ir_binop_div:
1076 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1077 case ir_binop_mod:
1078 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1079 break;
1080
1081 case ir_binop_less:
1082 case ir_binop_greater:
1083 case ir_binop_lequal:
1084 case ir_binop_gequal:
1085 case ir_binop_equal:
1086 case ir_binop_nequal: {
1087 emit(CMP(result_dst, op[0], op[1],
1088 brw_conditional_for_comparison(ir->operation)));
1089 emit(AND(result_dst, result_src, src_reg(0x1)));
1090 break;
1091 }
1092
1093 case ir_binop_all_equal:
1094 /* "==" operator producing a scalar boolean. */
1095 if (ir->operands[0]->type->is_vector() ||
1096 ir->operands[1]->type->is_vector()) {
1097 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
1098 emit(MOV(result_dst, src_reg(0)));
1099 inst = emit(MOV(result_dst, src_reg(1)));
1100 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
1101 } else {
1102 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
1103 emit(AND(result_dst, result_src, src_reg(0x1)));
1104 }
1105 break;
1106 case ir_binop_any_nequal:
1107 /* "!=" operator producing a scalar boolean. */
1108 if (ir->operands[0]->type->is_vector() ||
1109 ir->operands[1]->type->is_vector()) {
1110 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
1111
1112 emit(MOV(result_dst, src_reg(0)));
1113 inst = emit(MOV(result_dst, src_reg(1)));
1114 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1115 } else {
1116 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
1117 emit(AND(result_dst, result_src, src_reg(0x1)));
1118 }
1119 break;
1120
1121 case ir_unop_any:
1122 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1123 emit(MOV(result_dst, src_reg(0)));
1124
1125 inst = emit(MOV(result_dst, src_reg(1)));
1126 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1127 break;
1128
1129 case ir_binop_logic_xor:
1130 emit(XOR(result_dst, op[0], op[1]));
1131 break;
1132
1133 case ir_binop_logic_or:
1134 emit(OR(result_dst, op[0], op[1]));
1135 break;
1136
1137 case ir_binop_logic_and:
1138 emit(AND(result_dst, op[0], op[1]));
1139 break;
1140
1141 case ir_binop_dot:
1142 assert(ir->operands[0]->type->is_vector());
1143 assert(ir->operands[0]->type == ir->operands[1]->type);
1144 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1145 break;
1146
1147 case ir_unop_sqrt:
1148 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1149 break;
1150 case ir_unop_rsq:
1151 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1152 break;
1153 case ir_unop_i2f:
1154 case ir_unop_i2u:
1155 case ir_unop_u2i:
1156 case ir_unop_u2f:
1157 case ir_unop_b2f:
1158 case ir_unop_b2i:
1159 case ir_unop_f2i:
1160 emit(MOV(result_dst, op[0]));
1161 break;
1162 case ir_unop_f2b:
1163 case ir_unop_i2b: {
1164 emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1165 emit(AND(result_dst, result_src, src_reg(1)));
1166 break;
1167 }
1168
1169 case ir_unop_trunc:
1170 emit(RNDZ(result_dst, op[0]));
1171 break;
1172 case ir_unop_ceil:
1173 op[0].negate = !op[0].negate;
1174 inst = emit(RNDD(result_dst, op[0]));
1175 this->result.negate = true;
1176 break;
1177 case ir_unop_floor:
1178 inst = emit(RNDD(result_dst, op[0]));
1179 break;
1180 case ir_unop_fract:
1181 inst = emit(FRC(result_dst, op[0]));
1182 break;
1183 case ir_unop_round_even:
1184 emit(RNDE(result_dst, op[0]));
1185 break;
1186
1187 case ir_binop_min:
1188 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_L));
1189
1190 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1191 inst->predicate = BRW_PREDICATE_NORMAL;
1192 break;
1193 case ir_binop_max:
1194 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_G));
1195
1196 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1197 inst->predicate = BRW_PREDICATE_NORMAL;
1198 break;
1199
1200 case ir_binop_pow:
1201 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1202 break;
1203
1204 case ir_unop_bit_not:
1205 inst = emit(NOT(result_dst, op[0]));
1206 break;
1207 case ir_binop_bit_and:
1208 inst = emit(AND(result_dst, op[0], op[1]));
1209 break;
1210 case ir_binop_bit_xor:
1211 inst = emit(XOR(result_dst, op[0], op[1]));
1212 break;
1213 case ir_binop_bit_or:
1214 inst = emit(OR(result_dst, op[0], op[1]));
1215 break;
1216
1217 case ir_binop_lshift:
1218 case ir_binop_rshift:
1219 assert(!"GLSL 1.30 features unsupported");
1220 break;
1221
1222 case ir_quadop_vector:
1223 assert(!"not reached: should be handled by lower_quadop_vector");
1224 break;
1225 }
1226 }
1227
1228
1229 void
1230 vec4_visitor::visit(ir_swizzle *ir)
1231 {
1232 src_reg src;
1233 int i = 0;
1234 int swizzle[4];
1235
1236 /* Note that this is only swizzles in expressions, not those on the left
1237 * hand side of an assignment, which do write masking. See ir_assignment
1238 * for that.
1239 */
1240
1241 ir->val->accept(this);
1242 src = this->result;
1243 assert(src.file != BAD_FILE);
1244
1245 for (i = 0; i < ir->type->vector_elements; i++) {
1246 switch (i) {
1247 case 0:
1248 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1249 break;
1250 case 1:
1251 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1252 break;
1253 case 2:
1254 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1255 break;
1256 case 3:
1257 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1258 break;
1259 }
1260 }
1261 for (; i < 4; i++) {
1262 /* Replicate the last channel out. */
1263 swizzle[i] = swizzle[ir->type->vector_elements - 1];
1264 }
1265
1266 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1267
1268 this->result = src;
1269 }
1270
1271 void
1272 vec4_visitor::visit(ir_dereference_variable *ir)
1273 {
1274 const struct glsl_type *type = ir->type;
1275 dst_reg *reg = variable_storage(ir->var);
1276
1277 if (!reg) {
1278 fail("Failed to find variable storage for %s\n", ir->var->name);
1279 this->result = src_reg(brw_null_reg());
1280 return;
1281 }
1282
1283 this->result = src_reg(*reg);
1284
1285 if (type->is_scalar() || type->is_vector() || type->is_matrix())
1286 this->result.swizzle = swizzle_for_size(type->vector_elements);
1287 }
1288
1289 void
1290 vec4_visitor::visit(ir_dereference_array *ir)
1291 {
1292 ir_constant *constant_index;
1293 src_reg src;
1294 int element_size = type_size(ir->type);
1295
1296 constant_index = ir->array_index->constant_expression_value();
1297
1298 ir->array->accept(this);
1299 src = this->result;
1300
1301 if (constant_index) {
1302 src.reg_offset += constant_index->value.i[0] * element_size;
1303 } else {
1304 /* Variable index array dereference. It eats the "vec4" of the
1305 * base of the array and an index that offsets the Mesa register
1306 * index.
1307 */
1308 ir->array_index->accept(this);
1309
1310 src_reg index_reg;
1311
1312 if (element_size == 1) {
1313 index_reg = this->result;
1314 } else {
1315 index_reg = src_reg(this, glsl_type::int_type);
1316
1317 emit(MUL(dst_reg(index_reg), this->result, src_reg(element_size)));
1318 }
1319
1320 if (src.reladdr) {
1321 src_reg temp = src_reg(this, glsl_type::int_type);
1322
1323 emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
1324
1325 index_reg = temp;
1326 }
1327
1328 src.reladdr = ralloc(mem_ctx, src_reg);
1329 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1330 }
1331
1332 /* If the type is smaller than a vec4, replicate the last channel out. */
1333 if (ir->type->is_scalar() || ir->type->is_vector())
1334 src.swizzle = swizzle_for_size(ir->type->vector_elements);
1335 else
1336 src.swizzle = BRW_SWIZZLE_NOOP;
1337 src.type = brw_type_for_base_type(ir->type);
1338
1339 this->result = src;
1340 }
1341
1342 void
1343 vec4_visitor::visit(ir_dereference_record *ir)
1344 {
1345 unsigned int i;
1346 const glsl_type *struct_type = ir->record->type;
1347 int offset = 0;
1348
1349 ir->record->accept(this);
1350
1351 for (i = 0; i < struct_type->length; i++) {
1352 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1353 break;
1354 offset += type_size(struct_type->fields.structure[i].type);
1355 }
1356
1357 /* If the type is smaller than a vec4, replicate the last channel out. */
1358 if (ir->type->is_scalar() || ir->type->is_vector())
1359 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1360 else
1361 this->result.swizzle = BRW_SWIZZLE_NOOP;
1362 this->result.type = brw_type_for_base_type(ir->type);
1363
1364 this->result.reg_offset += offset;
1365 }
1366
1367 /**
1368 * We want to be careful in assignment setup to hit the actual storage
1369 * instead of potentially using a temporary like we might with the
1370 * ir_dereference handler.
1371 */
1372 static dst_reg
1373 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1374 {
1375 /* The LHS must be a dereference. If the LHS is a variable indexed array
1376 * access of a vector, it must be separated into a series conditional moves
1377 * before reaching this point (see ir_vec_index_to_cond_assign).
1378 */
1379 assert(ir->as_dereference());
1380 ir_dereference_array *deref_array = ir->as_dereference_array();
1381 if (deref_array) {
1382 assert(!deref_array->array->type->is_vector());
1383 }
1384
1385 /* Use the rvalue deref handler for the most part. We'll ignore
1386 * swizzles in it and write swizzles using writemask, though.
1387 */
1388 ir->accept(v);
1389 return dst_reg(v->result);
1390 }
1391
1392 void
1393 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1394 const struct glsl_type *type, bool predicated)
1395 {
1396 if (type->base_type == GLSL_TYPE_STRUCT) {
1397 for (unsigned int i = 0; i < type->length; i++) {
1398 emit_block_move(dst, src, type->fields.structure[i].type, predicated);
1399 }
1400 return;
1401 }
1402
1403 if (type->is_array()) {
1404 for (unsigned int i = 0; i < type->length; i++) {
1405 emit_block_move(dst, src, type->fields.array, predicated);
1406 }
1407 return;
1408 }
1409
1410 if (type->is_matrix()) {
1411 const struct glsl_type *vec_type;
1412
1413 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1414 type->vector_elements, 1);
1415
1416 for (int i = 0; i < type->matrix_columns; i++) {
1417 emit_block_move(dst, src, vec_type, predicated);
1418 }
1419 return;
1420 }
1421
1422 assert(type->is_scalar() || type->is_vector());
1423
1424 dst->type = brw_type_for_base_type(type);
1425 src->type = dst->type;
1426
1427 dst->writemask = (1 << type->vector_elements) - 1;
1428
1429 /* Do we need to worry about swizzling a swizzle? */
1430 assert(src->swizzle = BRW_SWIZZLE_NOOP);
1431 src->swizzle = swizzle_for_size(type->vector_elements);
1432
1433 vec4_instruction *inst = emit(MOV(*dst, *src));
1434 if (predicated)
1435 inst->predicate = BRW_PREDICATE_NORMAL;
1436
1437 dst->reg_offset++;
1438 src->reg_offset++;
1439 }
1440
1441
1442 /* If the RHS processing resulted in an instruction generating a
1443 * temporary value, and it would be easy to rewrite the instruction to
1444 * generate its result right into the LHS instead, do so. This ends
1445 * up reliably removing instructions where it can be tricky to do so
1446 * later without real UD chain information.
1447 */
1448 bool
1449 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
1450 dst_reg dst,
1451 src_reg src,
1452 vec4_instruction *pre_rhs_inst,
1453 vec4_instruction *last_rhs_inst)
1454 {
1455 /* This could be supported, but it would take more smarts. */
1456 if (ir->condition)
1457 return false;
1458
1459 if (pre_rhs_inst == last_rhs_inst)
1460 return false; /* No instructions generated to work with. */
1461
1462 /* Make sure the last instruction generated our source reg. */
1463 if (src.file != GRF ||
1464 src.file != last_rhs_inst->dst.file ||
1465 src.reg != last_rhs_inst->dst.reg ||
1466 src.reg_offset != last_rhs_inst->dst.reg_offset ||
1467 src.reladdr ||
1468 src.abs ||
1469 src.negate ||
1470 last_rhs_inst->predicate != BRW_PREDICATE_NONE)
1471 return false;
1472
1473 /* Check that that last instruction fully initialized the channels
1474 * we want to use, in the order we want to use them. We could
1475 * potentially reswizzle the operands of many instructions so that
1476 * we could handle out of order channels, but don't yet.
1477 */
1478 for (int i = 0; i < 4; i++) {
1479 if (dst.writemask & (1 << i)) {
1480 if (!(last_rhs_inst->dst.writemask & (1 << i)))
1481 return false;
1482
1483 if (BRW_GET_SWZ(src.swizzle, i) != i)
1484 return false;
1485 }
1486 }
1487
1488 /* Success! Rewrite the instruction. */
1489 last_rhs_inst->dst.file = dst.file;
1490 last_rhs_inst->dst.reg = dst.reg;
1491 last_rhs_inst->dst.reg_offset = dst.reg_offset;
1492 last_rhs_inst->dst.reladdr = dst.reladdr;
1493 last_rhs_inst->dst.writemask &= dst.writemask;
1494
1495 return true;
1496 }
1497
1498 void
1499 vec4_visitor::visit(ir_assignment *ir)
1500 {
1501 dst_reg dst = get_assignment_lhs(ir->lhs, this);
1502
1503 if (!ir->lhs->type->is_scalar() &&
1504 !ir->lhs->type->is_vector()) {
1505 ir->rhs->accept(this);
1506 src_reg src = this->result;
1507
1508 if (ir->condition) {
1509 emit_bool_to_cond_code(ir->condition);
1510 }
1511
1512 emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL);
1513 return;
1514 }
1515
1516 /* Now we're down to just a scalar/vector with writemasks. */
1517 int i;
1518
1519 vec4_instruction *pre_rhs_inst, *last_rhs_inst;
1520 pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1521
1522 ir->rhs->accept(this);
1523
1524 last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1525
1526 src_reg src = this->result;
1527
1528 int swizzles[4];
1529 int first_enabled_chan = 0;
1530 int src_chan = 0;
1531
1532 assert(ir->lhs->type->is_vector() ||
1533 ir->lhs->type->is_scalar());
1534 dst.writemask = ir->write_mask;
1535
1536 for (int i = 0; i < 4; i++) {
1537 if (dst.writemask & (1 << i)) {
1538 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1539 break;
1540 }
1541 }
1542
1543 /* Swizzle a small RHS vector into the channels being written.
1544 *
1545 * glsl ir treats write_mask as dictating how many channels are
1546 * present on the RHS while in our instructions we need to make
1547 * those channels appear in the slots of the vec4 they're written to.
1548 */
1549 for (int i = 0; i < 4; i++) {
1550 if (dst.writemask & (1 << i))
1551 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1552 else
1553 swizzles[i] = first_enabled_chan;
1554 }
1555 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1556 swizzles[2], swizzles[3]);
1557
1558 if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
1559 return;
1560 }
1561
1562 if (ir->condition) {
1563 emit_bool_to_cond_code(ir->condition);
1564 }
1565
1566 for (i = 0; i < type_size(ir->lhs->type); i++) {
1567 vec4_instruction *inst = emit(MOV(dst, src));
1568
1569 if (ir->condition)
1570 inst->predicate = BRW_PREDICATE_NORMAL;
1571
1572 dst.reg_offset++;
1573 src.reg_offset++;
1574 }
1575 }
1576
1577 void
1578 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
1579 {
1580 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1581 foreach_list(node, &ir->components) {
1582 ir_constant *field_value = (ir_constant *)node;
1583
1584 emit_constant_values(dst, field_value);
1585 }
1586 return;
1587 }
1588
1589 if (ir->type->is_array()) {
1590 for (unsigned int i = 0; i < ir->type->length; i++) {
1591 emit_constant_values(dst, ir->array_elements[i]);
1592 }
1593 return;
1594 }
1595
1596 if (ir->type->is_matrix()) {
1597 for (int i = 0; i < ir->type->matrix_columns; i++) {
1598 for (int j = 0; j < ir->type->vector_elements; j++) {
1599 dst->writemask = 1 << j;
1600 dst->type = BRW_REGISTER_TYPE_F;
1601
1602 emit(MOV(*dst,
1603 src_reg(ir->value.f[i * ir->type->vector_elements + j])));
1604 }
1605 dst->reg_offset++;
1606 }
1607 return;
1608 }
1609
1610 for (int i = 0; i < ir->type->vector_elements; i++) {
1611 dst->writemask = 1 << i;
1612 dst->type = brw_type_for_base_type(ir->type);
1613
1614 switch (ir->type->base_type) {
1615 case GLSL_TYPE_FLOAT:
1616 emit(MOV(*dst, src_reg(ir->value.f[i])));
1617 break;
1618 case GLSL_TYPE_INT:
1619 emit(MOV(*dst, src_reg(ir->value.i[i])));
1620 break;
1621 case GLSL_TYPE_UINT:
1622 emit(MOV(*dst, src_reg(ir->value.u[i])));
1623 break;
1624 case GLSL_TYPE_BOOL:
1625 emit(MOV(*dst, src_reg(ir->value.b[i])));
1626 break;
1627 default:
1628 assert(!"Non-float/uint/int/bool constant");
1629 break;
1630 }
1631 }
1632 dst->reg_offset++;
1633 }
1634
1635 void
1636 vec4_visitor::visit(ir_constant *ir)
1637 {
1638 dst_reg dst = dst_reg(this, ir->type);
1639 this->result = src_reg(dst);
1640
1641 emit_constant_values(&dst, ir);
1642 }
1643
1644 void
1645 vec4_visitor::visit(ir_call *ir)
1646 {
1647 assert(!"not reached");
1648 }
1649
1650 void
1651 vec4_visitor::visit(ir_texture *ir)
1652 {
1653 /* FINISHME: Implement vertex texturing.
1654 *
1655 * With 0 vertex samplers available, the linker will reject
1656 * programs that do vertex texturing, but after our visitor has
1657 * run.
1658 */
1659 }
1660
1661 void
1662 vec4_visitor::visit(ir_return *ir)
1663 {
1664 assert(!"not reached");
1665 }
1666
1667 void
1668 vec4_visitor::visit(ir_discard *ir)
1669 {
1670 assert(!"not reached");
1671 }
1672
1673 void
1674 vec4_visitor::visit(ir_if *ir)
1675 {
1676 /* Don't point the annotation at the if statement, because then it plus
1677 * the then and else blocks get printed.
1678 */
1679 this->base_ir = ir->condition;
1680
1681 if (intel->gen == 6) {
1682 emit_if_gen6(ir);
1683 } else {
1684 emit_bool_to_cond_code(ir->condition);
1685 emit(IF(BRW_PREDICATE_NORMAL));
1686 }
1687
1688 visit_instructions(&ir->then_instructions);
1689
1690 if (!ir->else_instructions.is_empty()) {
1691 this->base_ir = ir->condition;
1692 emit(BRW_OPCODE_ELSE);
1693
1694 visit_instructions(&ir->else_instructions);
1695 }
1696
1697 this->base_ir = ir->condition;
1698 emit(BRW_OPCODE_ENDIF);
1699 }
1700
1701 void
1702 vec4_visitor::emit_ndc_computation()
1703 {
1704 /* Get the position */
1705 src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
1706
1707 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
1708 dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
1709 output_reg[BRW_VERT_RESULT_NDC] = ndc;
1710
1711 current_annotation = "NDC";
1712 dst_reg ndc_w = ndc;
1713 ndc_w.writemask = WRITEMASK_W;
1714 src_reg pos_w = pos;
1715 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
1716 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
1717
1718 dst_reg ndc_xyz = ndc;
1719 ndc_xyz.writemask = WRITEMASK_XYZ;
1720
1721 emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
1722 }
1723
1724 void
1725 vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
1726 {
1727 if (intel->gen < 6 &&
1728 ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
1729 c->key.nr_userclip || brw->has_negative_rhw_bug)) {
1730 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
1731 GLuint i;
1732
1733 emit(MOV(header1, 0u));
1734
1735 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1736 assert(!"finishme: psiz");
1737 src_reg psiz;
1738
1739 header1.writemask = WRITEMASK_W;
1740 emit(MUL(header1, psiz, 1u << 11));
1741 emit(AND(header1, src_reg(header1), 0x7ff << 8));
1742 }
1743
1744 for (i = 0; i < c->key.nr_userclip; i++) {
1745 vec4_instruction *inst;
1746
1747 inst = emit(DP4(dst_null_f(), src_reg(output_reg[VERT_RESULT_HPOS]),
1748 src_reg(c->userplane[i])));
1749 inst->conditional_mod = BRW_CONDITIONAL_L;
1750
1751 emit(OR(header1, src_reg(header1), 1u << i));
1752 inst->predicate = BRW_PREDICATE_NORMAL;
1753 }
1754
1755 /* i965 clipping workaround:
1756 * 1) Test for -ve rhw
1757 * 2) If set,
1758 * set ndc = (0,0,0,0)
1759 * set ucp[6] = 1
1760 *
1761 * Later, clipping will detect ucp[6] and ensure the primitive is
1762 * clipped against all fixed planes.
1763 */
1764 if (brw->has_negative_rhw_bug) {
1765 #if 0
1766 /* FINISHME */
1767 brw_CMP(p,
1768 vec8(brw_null_reg()),
1769 BRW_CONDITIONAL_L,
1770 brw_swizzle1(output_reg[BRW_VERT_RESULT_NDC], 3),
1771 brw_imm_f(0));
1772
1773 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
1774 brw_MOV(p, output_reg[BRW_VERT_RESULT_NDC], brw_imm_f(0));
1775 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1776 #endif
1777 }
1778
1779 header1.writemask = WRITEMASK_XYZW;
1780 emit(MOV(reg, src_reg(header1)));
1781 } else if (intel->gen < 6) {
1782 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
1783 } else {
1784 emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
1785 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1786 emit(MOV(brw_writemask(reg, WRITEMASK_W),
1787 src_reg(output_reg[VERT_RESULT_PSIZ])));
1788 }
1789 }
1790 }
1791
1792 void
1793 vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset)
1794 {
1795 if (intel->gen < 6) {
1796 /* Clip distance slots are set aside in gen5, but they are not used. It
1797 * is not clear whether we actually need to set aside space for them,
1798 * but the performance cost is negligible.
1799 */
1800 return;
1801 }
1802
1803 for (int i = 0; i + offset < c->key.nr_userclip && i < 4; ++i) {
1804 emit(DP4(dst_reg(brw_writemask(reg, 1 << i)),
1805 src_reg(output_reg[VERT_RESULT_HPOS]),
1806 src_reg(c->userplane[i + offset])));
1807 }
1808 }
1809
1810 void
1811 vec4_visitor::emit_urb_slot(int mrf, int vert_result)
1812 {
1813 struct brw_reg reg = brw_message_reg(mrf);
1814
1815 switch (vert_result) {
1816 case VERT_RESULT_PSIZ:
1817 /* PSIZ is always in slot 0, and is coupled with other flags. */
1818 current_annotation = "indices, point width, clip flags";
1819 emit_psiz_and_flags(reg);
1820 break;
1821 case BRW_VERT_RESULT_NDC:
1822 current_annotation = "NDC";
1823 emit(MOV(reg, src_reg(output_reg[BRW_VERT_RESULT_NDC])));
1824 break;
1825 case BRW_VERT_RESULT_HPOS_DUPLICATE:
1826 case VERT_RESULT_HPOS:
1827 current_annotation = "gl_Position";
1828 emit(MOV(reg, src_reg(output_reg[VERT_RESULT_HPOS])));
1829 break;
1830 case BRW_VERT_RESULT_CLIP0:
1831 current_annotation = "user clip distances";
1832 emit_clip_distances(reg, 0);
1833 break;
1834 case BRW_VERT_RESULT_CLIP1:
1835 current_annotation = "user clip distances";
1836 emit_clip_distances(reg, 4);
1837 break;
1838 case BRW_VERT_RESULT_PAD:
1839 /* No need to write to this slot */
1840 break;
1841 default: {
1842 assert (vert_result < VERT_RESULT_MAX);
1843 current_annotation = NULL;
1844 /* Copy the register, saturating if necessary */
1845 vec4_instruction *inst = emit(MOV(reg,
1846 src_reg(output_reg[vert_result])));
1847 if ((vert_result == VERT_RESULT_COL0 ||
1848 vert_result == VERT_RESULT_COL1 ||
1849 vert_result == VERT_RESULT_BFC0 ||
1850 vert_result == VERT_RESULT_BFC1) &&
1851 c->key.clamp_vertex_color) {
1852 inst->saturate = true;
1853 }
1854 }
1855 break;
1856 }
1857 }
1858
1859 static int
1860 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
1861 {
1862 struct intel_context *intel = &brw->intel;
1863
1864 if (intel->gen >= 6) {
1865 /* URB data written (does not include the message header reg) must
1866 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
1867 * section 5.4.3.2.2: URB_INTERLEAVED.
1868 *
1869 * URB entries are allocated on a multiple of 1024 bits, so an
1870 * extra 128 bits written here to make the end align to 256 is
1871 * no problem.
1872 */
1873 if ((mlen % 2) != 1)
1874 mlen++;
1875 }
1876
1877 return mlen;
1878 }
1879
1880 /**
1881 * Generates the VUE payload plus the 1 or 2 URB write instructions to
1882 * complete the VS thread.
1883 *
1884 * The VUE layout is documented in Volume 2a.
1885 */
1886 void
1887 vec4_visitor::emit_urb_writes()
1888 {
1889 /* MRF 0 is reserved for the debugger, so start with message header
1890 * in MRF 1.
1891 */
1892 int base_mrf = 1;
1893 int mrf = base_mrf;
1894 int urb_entry_size;
1895 /* In the process of generating our URB write message contents, we
1896 * may need to unspill a register or load from an array. Those
1897 * reads would use MRFs 14-15.
1898 */
1899 int max_usable_mrf = 13;
1900
1901 /* FINISHME: edgeflag */
1902
1903 brw_compute_vue_map(&c->vue_map, intel, c->key.nr_userclip,
1904 c->key.two_side_color, c->prog_data.outputs_written);
1905
1906 /* First mrf is the g0-based message header containing URB handles and such,
1907 * which is implied in VS_OPCODE_URB_WRITE.
1908 */
1909 mrf++;
1910
1911 if (intel->gen < 6) {
1912 emit_ndc_computation();
1913 }
1914
1915 /* Set up the VUE data for the first URB write */
1916 int slot;
1917 for (slot = 0; slot < c->vue_map.num_slots; ++slot) {
1918 emit_urb_slot(mrf++, c->vue_map.slot_to_vert_result[slot]);
1919
1920 /* If this was MRF 15, we can't fit anything more into this URB
1921 * WRITE. Note that base_mrf of 1 means that MRF 15 is an
1922 * even-numbered amount of URB write data, which will meet
1923 * gen6's requirements for length alignment.
1924 */
1925 if (mrf > max_usable_mrf) {
1926 slot++;
1927 break;
1928 }
1929 }
1930
1931 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
1932 inst->base_mrf = base_mrf;
1933 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1934 inst->eot = (slot >= c->vue_map.num_slots);
1935
1936 urb_entry_size = mrf - base_mrf;
1937
1938 /* Optional second URB write */
1939 if (!inst->eot) {
1940 mrf = base_mrf + 1;
1941
1942 for (; slot < c->vue_map.num_slots; ++slot) {
1943 assert(mrf < max_usable_mrf);
1944
1945 emit_urb_slot(mrf++, c->vue_map.slot_to_vert_result[slot]);
1946 }
1947
1948 inst = emit(VS_OPCODE_URB_WRITE);
1949 inst->base_mrf = base_mrf;
1950 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1951 inst->eot = true;
1952 /* URB destination offset. In the previous write, we got MRFs
1953 * 2-13 minus the one header MRF, so 12 regs. URB offset is in
1954 * URB row increments, and each of our MRFs is half of one of
1955 * those, since we're doing interleaved writes.
1956 */
1957 inst->offset = (max_usable_mrf - base_mrf) / 2;
1958
1959 urb_entry_size += mrf - base_mrf;
1960 }
1961
1962 if (intel->gen == 6)
1963 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8;
1964 else
1965 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4;
1966 }
1967
1968 src_reg
1969 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
1970 src_reg *reladdr, int reg_offset)
1971 {
1972 /* Because we store the values to scratch interleaved like our
1973 * vertex data, we need to scale the vec4 index by 2.
1974 */
1975 int message_header_scale = 2;
1976
1977 /* Pre-gen6, the message header uses byte offsets instead of vec4
1978 * (16-byte) offset units.
1979 */
1980 if (intel->gen < 6)
1981 message_header_scale *= 16;
1982
1983 if (reladdr) {
1984 src_reg index = src_reg(this, glsl_type::int_type);
1985
1986 emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
1987 emit_before(inst, MUL(dst_reg(index),
1988 index, src_reg(message_header_scale)));
1989
1990 return index;
1991 } else {
1992 return src_reg(reg_offset * message_header_scale);
1993 }
1994 }
1995
1996 src_reg
1997 vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
1998 src_reg *reladdr, int reg_offset)
1999 {
2000 if (reladdr) {
2001 src_reg index = src_reg(this, glsl_type::int_type);
2002
2003 emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2004
2005 /* Pre-gen6, the message header uses byte offsets instead of vec4
2006 * (16-byte) offset units.
2007 */
2008 if (intel->gen < 6) {
2009 emit_before(inst, MUL(dst_reg(index), index, src_reg(16)));
2010 }
2011
2012 return index;
2013 } else {
2014 int message_header_scale = intel->gen < 6 ? 16 : 1;
2015 return src_reg(reg_offset * message_header_scale);
2016 }
2017 }
2018
2019 /**
2020 * Emits an instruction before @inst to load the value named by @orig_src
2021 * from scratch space at @base_offset to @temp.
2022 */
2023 void
2024 vec4_visitor::emit_scratch_read(vec4_instruction *inst,
2025 dst_reg temp, src_reg orig_src,
2026 int base_offset)
2027 {
2028 int reg_offset = base_offset + orig_src.reg_offset;
2029 src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
2030
2031 emit_before(inst, SCRATCH_READ(temp, index));
2032 }
2033
2034 /**
2035 * Emits an instruction after @inst to store the value to be written
2036 * to @orig_dst to scratch space at @base_offset, from @temp.
2037 */
2038 void
2039 vec4_visitor::emit_scratch_write(vec4_instruction *inst,
2040 src_reg temp, dst_reg orig_dst,
2041 int base_offset)
2042 {
2043 int reg_offset = base_offset + orig_dst.reg_offset;
2044 src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset);
2045
2046 dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
2047 orig_dst.writemask));
2048 vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
2049 write->predicate = inst->predicate;
2050 write->ir = inst->ir;
2051 write->annotation = inst->annotation;
2052 inst->insert_after(write);
2053 }
2054
2055 /**
2056 * We can't generally support array access in GRF space, because a
2057 * single instruction's destination can only span 2 contiguous
2058 * registers. So, we send all GRF arrays that get variable index
2059 * access to scratch space.
2060 */
2061 void
2062 vec4_visitor::move_grf_array_access_to_scratch()
2063 {
2064 int scratch_loc[this->virtual_grf_count];
2065
2066 for (int i = 0; i < this->virtual_grf_count; i++) {
2067 scratch_loc[i] = -1;
2068 }
2069
2070 /* First, calculate the set of virtual GRFs that need to be punted
2071 * to scratch due to having any array access on them, and where in
2072 * scratch.
2073 */
2074 foreach_list(node, &this->instructions) {
2075 vec4_instruction *inst = (vec4_instruction *)node;
2076
2077 if (inst->dst.file == GRF && inst->dst.reladdr &&
2078 scratch_loc[inst->dst.reg] == -1) {
2079 scratch_loc[inst->dst.reg] = c->last_scratch;
2080 c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4;
2081 }
2082
2083 for (int i = 0 ; i < 3; i++) {
2084 src_reg *src = &inst->src[i];
2085
2086 if (src->file == GRF && src->reladdr &&
2087 scratch_loc[src->reg] == -1) {
2088 scratch_loc[src->reg] = c->last_scratch;
2089 c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4;
2090 }
2091 }
2092 }
2093
2094 /* Now, for anything that will be accessed through scratch, rewrite
2095 * it to load/store. Note that this is a _safe list walk, because
2096 * we may generate a new scratch_write instruction after the one
2097 * we're processing.
2098 */
2099 foreach_list_safe(node, &this->instructions) {
2100 vec4_instruction *inst = (vec4_instruction *)node;
2101
2102 /* Set up the annotation tracking for new generated instructions. */
2103 base_ir = inst->ir;
2104 current_annotation = inst->annotation;
2105
2106 if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
2107 src_reg temp = src_reg(this, glsl_type::vec4_type);
2108
2109 emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]);
2110
2111 inst->dst.file = temp.file;
2112 inst->dst.reg = temp.reg;
2113 inst->dst.reg_offset = temp.reg_offset;
2114 inst->dst.reladdr = NULL;
2115 }
2116
2117 for (int i = 0 ; i < 3; i++) {
2118 if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
2119 continue;
2120
2121 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2122
2123 emit_scratch_read(inst, temp, inst->src[i],
2124 scratch_loc[inst->src[i].reg]);
2125
2126 inst->src[i].file = temp.file;
2127 inst->src[i].reg = temp.reg;
2128 inst->src[i].reg_offset = temp.reg_offset;
2129 inst->src[i].reladdr = NULL;
2130 }
2131 }
2132 }
2133
2134 /**
2135 * Emits an instruction before @inst to load the value named by @orig_src
2136 * from the pull constant buffer (surface) at @base_offset to @temp.
2137 */
2138 void
2139 vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
2140 dst_reg temp, src_reg orig_src,
2141 int base_offset)
2142 {
2143 int reg_offset = base_offset + orig_src.reg_offset;
2144 src_reg index = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
2145 vec4_instruction *load;
2146
2147 load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
2148 temp, index);
2149 load->base_mrf = 14;
2150 load->mlen = 1;
2151 emit_before(inst, load);
2152 }
2153
2154 /**
2155 * Implements array access of uniforms by inserting a
2156 * PULL_CONSTANT_LOAD instruction.
2157 *
2158 * Unlike temporary GRF array access (where we don't support it due to
2159 * the difficulty of doing relative addressing on instruction
2160 * destinations), we could potentially do array access of uniforms
2161 * that were loaded in GRF space as push constants. In real-world
2162 * usage we've seen, though, the arrays being used are always larger
2163 * than we could load as push constants, so just always move all
2164 * uniform array access out to a pull constant buffer.
2165 */
2166 void
2167 vec4_visitor::move_uniform_array_access_to_pull_constants()
2168 {
2169 int pull_constant_loc[this->uniforms];
2170
2171 for (int i = 0; i < this->uniforms; i++) {
2172 pull_constant_loc[i] = -1;
2173 }
2174
2175 /* Walk through and find array access of uniforms. Put a copy of that
2176 * uniform in the pull constant buffer.
2177 *
2178 * Note that we don't move constant-indexed accesses to arrays. No
2179 * testing has been done of the performance impact of this choice.
2180 */
2181 foreach_list_safe(node, &this->instructions) {
2182 vec4_instruction *inst = (vec4_instruction *)node;
2183
2184 for (int i = 0 ; i < 3; i++) {
2185 if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
2186 continue;
2187
2188 int uniform = inst->src[i].reg;
2189
2190 /* If this array isn't already present in the pull constant buffer,
2191 * add it.
2192 */
2193 if (pull_constant_loc[uniform] == -1) {
2194 const float **values = &prog_data->param[uniform * 4];
2195
2196 pull_constant_loc[uniform] = prog_data->nr_pull_params;
2197
2198 for (int j = 0; j < uniform_size[uniform] * 4; j++) {
2199 prog_data->pull_param[prog_data->nr_pull_params++] = values[j];
2200 }
2201 }
2202
2203 /* Set up the annotation tracking for new generated instructions. */
2204 base_ir = inst->ir;
2205 current_annotation = inst->annotation;
2206
2207 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2208
2209 emit_pull_constant_load(inst, temp, inst->src[i],
2210 pull_constant_loc[uniform]);
2211
2212 inst->src[i].file = temp.file;
2213 inst->src[i].reg = temp.reg;
2214 inst->src[i].reg_offset = temp.reg_offset;
2215 inst->src[i].reladdr = NULL;
2216 }
2217 }
2218
2219 /* Now there are no accesses of the UNIFORM file with a reladdr, so
2220 * no need to track them as larger-than-vec4 objects. This will be
2221 * relied on in cutting out unused uniform vectors from push
2222 * constants.
2223 */
2224 split_uniform_registers();
2225 }
2226
2227 vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
2228 struct gl_shader_program *prog,
2229 struct brw_shader *shader)
2230 {
2231 this->c = c;
2232 this->p = &c->func;
2233 this->brw = p->brw;
2234 this->intel = &brw->intel;
2235 this->ctx = &intel->ctx;
2236 this->prog = prog;
2237 this->shader = shader;
2238
2239 this->mem_ctx = ralloc_context(NULL);
2240 this->failed = false;
2241
2242 this->base_ir = NULL;
2243 this->current_annotation = NULL;
2244
2245 this->c = c;
2246 this->vp = prog->VertexProgram;
2247 this->prog_data = &c->prog_data;
2248
2249 this->variable_ht = hash_table_ctor(0,
2250 hash_table_pointer_hash,
2251 hash_table_pointer_compare);
2252
2253 this->virtual_grf_def = NULL;
2254 this->virtual_grf_use = NULL;
2255 this->virtual_grf_sizes = NULL;
2256 this->virtual_grf_count = 0;
2257 this->virtual_grf_array_size = 0;
2258 this->live_intervals_valid = false;
2259
2260 this->uniforms = 0;
2261
2262 this->variable_ht = hash_table_ctor(0,
2263 hash_table_pointer_hash,
2264 hash_table_pointer_compare);
2265 }
2266
2267 vec4_visitor::~vec4_visitor()
2268 {
2269 ralloc_free(this->mem_ctx);
2270 hash_table_dtor(this->variable_ht);
2271 }
2272
2273
2274 void
2275 vec4_visitor::fail(const char *format, ...)
2276 {
2277 va_list va;
2278 char *msg;
2279
2280 if (failed)
2281 return;
2282
2283 failed = true;
2284
2285 va_start(va, format);
2286 msg = ralloc_vasprintf(mem_ctx, format, va);
2287 va_end(va);
2288 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
2289
2290 this->fail_msg = msg;
2291
2292 if (INTEL_DEBUG & DEBUG_VS) {
2293 fprintf(stderr, "%s", msg);
2294 }
2295 }
2296
2297 } /* namespace brw */