i965/vs: Do VUE writes using the MRF file instead of hardware register.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_visitor.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25 extern "C" {
26 #include "main/macros.h"
27 #include "program/prog_parameter.h"
28 }
29
30 namespace brw {
31
32 src_reg::src_reg(dst_reg reg)
33 {
34 init();
35
36 this->file = reg.file;
37 this->reg = reg.reg;
38 this->reg_offset = reg.reg_offset;
39 this->type = reg.type;
40 this->reladdr = reg.reladdr;
41 this->fixed_hw_reg = reg.fixed_hw_reg;
42
43 int swizzles[4];
44 int next_chan = 0;
45 int last = 0;
46
47 for (int i = 0; i < 4; i++) {
48 if (!(reg.writemask & (1 << i)))
49 continue;
50
51 swizzles[next_chan++] = last = i;
52 }
53
54 for (; next_chan < 4; next_chan++) {
55 swizzles[next_chan] = last;
56 }
57
58 this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
59 swizzles[2], swizzles[3]);
60 }
61
62 dst_reg::dst_reg(src_reg reg)
63 {
64 init();
65
66 this->file = reg.file;
67 this->reg = reg.reg;
68 this->reg_offset = reg.reg_offset;
69 this->type = reg.type;
70 this->writemask = WRITEMASK_XYZW;
71 this->reladdr = reg.reladdr;
72 this->fixed_hw_reg = reg.fixed_hw_reg;
73 }
74
75 vec4_instruction::vec4_instruction(vec4_visitor *v,
76 enum opcode opcode, dst_reg dst,
77 src_reg src0, src_reg src1, src_reg src2)
78 {
79 this->opcode = opcode;
80 this->dst = dst;
81 this->src[0] = src0;
82 this->src[1] = src1;
83 this->src[2] = src2;
84 this->ir = v->base_ir;
85 this->annotation = v->current_annotation;
86 }
87
88 vec4_instruction *
89 vec4_visitor::emit(vec4_instruction *inst)
90 {
91 this->instructions.push_tail(inst);
92
93 return inst;
94 }
95
96 vec4_instruction *
97 vec4_visitor::emit_before(vec4_instruction *inst, vec4_instruction *new_inst)
98 {
99 new_inst->ir = inst->ir;
100 new_inst->annotation = inst->annotation;
101
102 inst->insert_before(new_inst);
103
104 return inst;
105 }
106
107 vec4_instruction *
108 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
109 src_reg src0, src_reg src1, src_reg src2)
110 {
111 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
112 src0, src1, src2));
113 }
114
115
116 vec4_instruction *
117 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
118 {
119 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
120 }
121
122 vec4_instruction *
123 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
124 {
125 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
126 }
127
128 vec4_instruction *
129 vec4_visitor::emit(enum opcode opcode)
130 {
131 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
132 }
133
134 #define ALU1(op) \
135 vec4_instruction * \
136 vec4_visitor::op(dst_reg dst, src_reg src0) \
137 { \
138 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
139 src0); \
140 }
141
142 #define ALU2(op) \
143 vec4_instruction * \
144 vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \
145 { \
146 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
147 src0, src1); \
148 }
149
150 ALU1(NOT)
151 ALU1(MOV)
152 ALU1(FRC)
153 ALU1(RNDD)
154 ALU1(RNDE)
155 ALU1(RNDZ)
156 ALU2(ADD)
157 ALU2(MUL)
158 ALU2(MACH)
159 ALU2(AND)
160 ALU2(OR)
161 ALU2(XOR)
162 ALU2(DP3)
163 ALU2(DP4)
164
165 /** Gen4 predicated IF. */
166 vec4_instruction *
167 vec4_visitor::IF(uint32_t predicate)
168 {
169 vec4_instruction *inst;
170
171 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF);
172 inst->predicate = predicate;
173
174 return inst;
175 }
176
177 /** Gen6+ IF with embedded comparison. */
178 vec4_instruction *
179 vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition)
180 {
181 assert(intel->gen >= 6);
182
183 vec4_instruction *inst;
184
185 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF, dst_null_d(),
186 src0, src1);
187 inst->conditional_mod = condition;
188
189 return inst;
190 }
191
192 /**
193 * CMP: Sets the low bit of the destination channels with the result
194 * of the comparison, while the upper bits are undefined, and updates
195 * the flag register with the packed 16 bits of the result.
196 */
197 vec4_instruction *
198 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition)
199 {
200 vec4_instruction *inst;
201
202 /* original gen4 does type conversion to the destination type
203 * before before comparison, producing garbage results for floating
204 * point comparisons.
205 */
206 if (intel->gen == 4) {
207 dst.type = src0.type;
208 if (dst.file == HW_REG)
209 dst.fixed_hw_reg.type = dst.type;
210 }
211
212 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_CMP, dst, src0, src1);
213 inst->conditional_mod = condition;
214
215 return inst;
216 }
217
218 vec4_instruction *
219 vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
220 {
221 vec4_instruction *inst;
222
223 inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_READ,
224 dst, index);
225 inst->base_mrf = 14;
226 inst->mlen = 1;
227
228 return inst;
229 }
230
231 vec4_instruction *
232 vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index)
233 {
234 vec4_instruction *inst;
235
236 inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_WRITE,
237 dst, src, index);
238 inst->base_mrf = 13;
239 inst->mlen = 2;
240
241 return inst;
242 }
243
244 void
245 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
246 {
247 static enum opcode dot_opcodes[] = {
248 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
249 };
250
251 emit(dot_opcodes[elements - 2], dst, src0, src1);
252 }
253
254 void
255 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
256 {
257 /* The gen6 math instruction ignores the source modifiers --
258 * swizzle, abs, negate, and at least some parts of the register
259 * region description.
260 *
261 * While it would seem that this MOV could be avoided at this point
262 * in the case that the swizzle is matched up with the destination
263 * writemask, note that uniform packing and register allocation
264 * could rearrange our swizzle, so let's leave this matter up to
265 * copy propagation later.
266 */
267 src_reg temp_src = src_reg(this, glsl_type::vec4_type);
268 emit(MOV(dst_reg(temp_src), src));
269
270 if (dst.writemask != WRITEMASK_XYZW) {
271 /* The gen6 math instruction must be align1, so we can't do
272 * writemasks.
273 */
274 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
275
276 emit(opcode, temp_dst, temp_src);
277
278 emit(MOV(dst, src_reg(temp_dst)));
279 } else {
280 emit(opcode, dst, temp_src);
281 }
282 }
283
284 void
285 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
286 {
287 vec4_instruction *inst = emit(opcode, dst, src);
288 inst->base_mrf = 1;
289 inst->mlen = 1;
290 }
291
292 void
293 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
294 {
295 switch (opcode) {
296 case SHADER_OPCODE_RCP:
297 case SHADER_OPCODE_RSQ:
298 case SHADER_OPCODE_SQRT:
299 case SHADER_OPCODE_EXP2:
300 case SHADER_OPCODE_LOG2:
301 case SHADER_OPCODE_SIN:
302 case SHADER_OPCODE_COS:
303 break;
304 default:
305 assert(!"not reached: bad math opcode");
306 return;
307 }
308
309 if (intel->gen >= 6) {
310 return emit_math1_gen6(opcode, dst, src);
311 } else {
312 return emit_math1_gen4(opcode, dst, src);
313 }
314 }
315
316 void
317 vec4_visitor::emit_math2_gen6(enum opcode opcode,
318 dst_reg dst, src_reg src0, src_reg src1)
319 {
320 src_reg expanded;
321
322 /* The gen6 math instruction ignores the source modifiers --
323 * swizzle, abs, negate, and at least some parts of the register
324 * region description. Move the sources to temporaries to make it
325 * generally work.
326 */
327
328 expanded = src_reg(this, glsl_type::vec4_type);
329 emit(MOV(dst_reg(expanded), src0));
330 src0 = expanded;
331
332 expanded = src_reg(this, glsl_type::vec4_type);
333 emit(MOV(dst_reg(expanded), src1));
334 src1 = expanded;
335
336 if (dst.writemask != WRITEMASK_XYZW) {
337 /* The gen6 math instruction must be align1, so we can't do
338 * writemasks.
339 */
340 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
341
342 emit(opcode, temp_dst, src0, src1);
343
344 emit(MOV(dst, src_reg(temp_dst)));
345 } else {
346 emit(opcode, dst, src0, src1);
347 }
348 }
349
350 void
351 vec4_visitor::emit_math2_gen4(enum opcode opcode,
352 dst_reg dst, src_reg src0, src_reg src1)
353 {
354 vec4_instruction *inst = emit(opcode, dst, src0, src1);
355 inst->base_mrf = 1;
356 inst->mlen = 2;
357 }
358
359 void
360 vec4_visitor::emit_math(enum opcode opcode,
361 dst_reg dst, src_reg src0, src_reg src1)
362 {
363 assert(opcode == SHADER_OPCODE_POW);
364
365 if (intel->gen >= 6) {
366 return emit_math2_gen6(opcode, dst, src0, src1);
367 } else {
368 return emit_math2_gen4(opcode, dst, src0, src1);
369 }
370 }
371
372 void
373 vec4_visitor::visit_instructions(const exec_list *list)
374 {
375 foreach_list(node, list) {
376 ir_instruction *ir = (ir_instruction *)node;
377
378 base_ir = ir;
379 ir->accept(this);
380 }
381 }
382
383
384 static int
385 type_size(const struct glsl_type *type)
386 {
387 unsigned int i;
388 int size;
389
390 switch (type->base_type) {
391 case GLSL_TYPE_UINT:
392 case GLSL_TYPE_INT:
393 case GLSL_TYPE_FLOAT:
394 case GLSL_TYPE_BOOL:
395 if (type->is_matrix()) {
396 return type->matrix_columns;
397 } else {
398 /* Regardless of size of vector, it gets a vec4. This is bad
399 * packing for things like floats, but otherwise arrays become a
400 * mess. Hopefully a later pass over the code can pack scalars
401 * down if appropriate.
402 */
403 return 1;
404 }
405 case GLSL_TYPE_ARRAY:
406 assert(type->length > 0);
407 return type_size(type->fields.array) * type->length;
408 case GLSL_TYPE_STRUCT:
409 size = 0;
410 for (i = 0; i < type->length; i++) {
411 size += type_size(type->fields.structure[i].type);
412 }
413 return size;
414 case GLSL_TYPE_SAMPLER:
415 /* Samplers take up one slot in UNIFORMS[], but they're baked in
416 * at link time.
417 */
418 return 1;
419 default:
420 assert(0);
421 return 0;
422 }
423 }
424
425 int
426 vec4_visitor::virtual_grf_alloc(int size)
427 {
428 if (virtual_grf_array_size <= virtual_grf_count) {
429 if (virtual_grf_array_size == 0)
430 virtual_grf_array_size = 16;
431 else
432 virtual_grf_array_size *= 2;
433 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
434 virtual_grf_array_size);
435 virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int,
436 virtual_grf_array_size);
437 }
438 virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count;
439 virtual_grf_reg_count += size;
440 virtual_grf_sizes[virtual_grf_count] = size;
441 return virtual_grf_count++;
442 }
443
444 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
445 {
446 init();
447
448 this->file = GRF;
449 this->reg = v->virtual_grf_alloc(type_size(type));
450
451 if (type->is_array() || type->is_record()) {
452 this->swizzle = BRW_SWIZZLE_NOOP;
453 } else {
454 this->swizzle = swizzle_for_size(type->vector_elements);
455 }
456
457 this->type = brw_type_for_base_type(type);
458 }
459
460 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
461 {
462 init();
463
464 this->file = GRF;
465 this->reg = v->virtual_grf_alloc(type_size(type));
466
467 if (type->is_array() || type->is_record()) {
468 this->writemask = WRITEMASK_XYZW;
469 } else {
470 this->writemask = (1 << type->vector_elements) - 1;
471 }
472
473 this->type = brw_type_for_base_type(type);
474 }
475
476 /* Our support for uniforms is piggy-backed on the struct
477 * gl_fragment_program, because that's where the values actually
478 * get stored, rather than in some global gl_shader_program uniform
479 * store.
480 */
481 int
482 vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
483 {
484 unsigned int offset = 0;
485 float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
486
487 if (type->is_matrix()) {
488 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
489 type->vector_elements,
490 1);
491
492 for (unsigned int i = 0; i < type->matrix_columns; i++) {
493 offset += setup_uniform_values(loc + offset, column);
494 }
495
496 return offset;
497 }
498
499 switch (type->base_type) {
500 case GLSL_TYPE_FLOAT:
501 case GLSL_TYPE_UINT:
502 case GLSL_TYPE_INT:
503 case GLSL_TYPE_BOOL:
504 for (unsigned int i = 0; i < type->vector_elements; i++) {
505 c->prog_data.param[this->uniforms * 4 + i] = &values[i];
506 }
507
508 /* Set up pad elements to get things aligned to a vec4 boundary. */
509 for (unsigned int i = type->vector_elements; i < 4; i++) {
510 static float zero = 0;
511
512 c->prog_data.param[this->uniforms * 4 + i] = &zero;
513 }
514
515 /* Track the size of this uniform vector, for future packing of
516 * uniforms.
517 */
518 this->uniform_vector_size[this->uniforms] = type->vector_elements;
519 this->uniforms++;
520
521 return 1;
522
523 case GLSL_TYPE_STRUCT:
524 for (unsigned int i = 0; i < type->length; i++) {
525 offset += setup_uniform_values(loc + offset,
526 type->fields.structure[i].type);
527 }
528 return offset;
529
530 case GLSL_TYPE_ARRAY:
531 for (unsigned int i = 0; i < type->length; i++) {
532 offset += setup_uniform_values(loc + offset, type->fields.array);
533 }
534 return offset;
535
536 case GLSL_TYPE_SAMPLER:
537 /* The sampler takes up a slot, but we don't use any values from it. */
538 return 1;
539
540 default:
541 assert(!"not reached");
542 return 0;
543 }
544 }
545
546 /* Our support for builtin uniforms is even scarier than non-builtin.
547 * It sits on top of the PROG_STATE_VAR parameters that are
548 * automatically updated from GL context state.
549 */
550 void
551 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
552 {
553 const ir_state_slot *const slots = ir->state_slots;
554 assert(ir->state_slots != NULL);
555
556 for (unsigned int i = 0; i < ir->num_state_slots; i++) {
557 /* This state reference has already been setup by ir_to_mesa,
558 * but we'll get the same index back here. We can reference
559 * ParameterValues directly, since unlike brw_fs.cpp, we never
560 * add new state references during compile.
561 */
562 int index = _mesa_add_state_reference(this->vp->Base.Parameters,
563 (gl_state_index *)slots[i].tokens);
564 float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
565
566 this->uniform_vector_size[this->uniforms] = 0;
567 /* Add each of the unique swizzled channels of the element.
568 * This will end up matching the size of the glsl_type of this field.
569 */
570 int last_swiz = -1;
571 for (unsigned int j = 0; j < 4; j++) {
572 int swiz = GET_SWZ(slots[i].swizzle, j);
573 last_swiz = swiz;
574
575 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
576 if (swiz <= last_swiz)
577 this->uniform_vector_size[this->uniforms]++;
578 }
579 this->uniforms++;
580 }
581 }
582
583 dst_reg *
584 vec4_visitor::variable_storage(ir_variable *var)
585 {
586 return (dst_reg *)hash_table_find(this->variable_ht, var);
587 }
588
589 void
590 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate)
591 {
592 ir_expression *expr = ir->as_expression();
593
594 *predicate = BRW_PREDICATE_NORMAL;
595
596 if (expr) {
597 src_reg op[2];
598 vec4_instruction *inst;
599
600 assert(expr->get_num_operands() <= 2);
601 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
602 expr->operands[i]->accept(this);
603 op[i] = this->result;
604 }
605
606 switch (expr->operation) {
607 case ir_unop_logic_not:
608 inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
609 inst->conditional_mod = BRW_CONDITIONAL_Z;
610 break;
611
612 case ir_binop_logic_xor:
613 inst = emit(XOR(dst_null_d(), op[0], op[1]));
614 inst->conditional_mod = BRW_CONDITIONAL_NZ;
615 break;
616
617 case ir_binop_logic_or:
618 inst = emit(OR(dst_null_d(), op[0], op[1]));
619 inst->conditional_mod = BRW_CONDITIONAL_NZ;
620 break;
621
622 case ir_binop_logic_and:
623 inst = emit(AND(dst_null_d(), op[0], op[1]));
624 inst->conditional_mod = BRW_CONDITIONAL_NZ;
625 break;
626
627 case ir_unop_f2b:
628 if (intel->gen >= 6) {
629 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
630 } else {
631 inst = emit(MOV(dst_null_f(), op[0]));
632 inst->conditional_mod = BRW_CONDITIONAL_NZ;
633 }
634 break;
635
636 case ir_unop_i2b:
637 if (intel->gen >= 6) {
638 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
639 } else {
640 inst = emit(MOV(dst_null_d(), op[0]));
641 inst->conditional_mod = BRW_CONDITIONAL_NZ;
642 }
643 break;
644
645 case ir_binop_all_equal:
646 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
647 *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
648 break;
649
650 case ir_binop_any_nequal:
651 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
652 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
653 break;
654
655 case ir_unop_any:
656 inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
657 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
658 break;
659
660 case ir_binop_greater:
661 case ir_binop_gequal:
662 case ir_binop_less:
663 case ir_binop_lequal:
664 case ir_binop_equal:
665 case ir_binop_nequal:
666 emit(CMP(dst_null_d(), op[0], op[1],
667 brw_conditional_for_comparison(expr->operation)));
668 break;
669
670 default:
671 assert(!"not reached");
672 break;
673 }
674 return;
675 }
676
677 ir->accept(this);
678
679 if (intel->gen >= 6) {
680 vec4_instruction *inst = emit(AND(dst_null_d(),
681 this->result, src_reg(1)));
682 inst->conditional_mod = BRW_CONDITIONAL_NZ;
683 } else {
684 vec4_instruction *inst = emit(MOV(dst_null_d(), this->result));
685 inst->conditional_mod = BRW_CONDITIONAL_NZ;
686 }
687 }
688
689 /**
690 * Emit a gen6 IF statement with the comparison folded into the IF
691 * instruction.
692 */
693 void
694 vec4_visitor::emit_if_gen6(ir_if *ir)
695 {
696 ir_expression *expr = ir->condition->as_expression();
697
698 if (expr) {
699 src_reg op[2];
700 dst_reg temp;
701
702 assert(expr->get_num_operands() <= 2);
703 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
704 expr->operands[i]->accept(this);
705 op[i] = this->result;
706 }
707
708 switch (expr->operation) {
709 case ir_unop_logic_not:
710 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
711 return;
712
713 case ir_binop_logic_xor:
714 emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
715 return;
716
717 case ir_binop_logic_or:
718 temp = dst_reg(this, glsl_type::bool_type);
719 emit(OR(temp, op[0], op[1]));
720 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
721 return;
722
723 case ir_binop_logic_and:
724 temp = dst_reg(this, glsl_type::bool_type);
725 emit(AND(temp, op[0], op[1]));
726 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
727 return;
728
729 case ir_unop_f2b:
730 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
731 return;
732
733 case ir_unop_i2b:
734 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
735 return;
736
737 case ir_binop_greater:
738 case ir_binop_gequal:
739 case ir_binop_less:
740 case ir_binop_lequal:
741 case ir_binop_equal:
742 case ir_binop_nequal:
743 emit(IF(op[0], op[1],
744 brw_conditional_for_comparison(expr->operation)));
745 return;
746
747 case ir_binop_all_equal:
748 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
749 emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
750 return;
751
752 case ir_binop_any_nequal:
753 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
754 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
755 return;
756
757 case ir_unop_any:
758 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
759 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
760 return;
761
762 default:
763 assert(!"not reached");
764 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
765 return;
766 }
767 return;
768 }
769
770 ir->condition->accept(this);
771
772 emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
773 }
774
775 void
776 vec4_visitor::visit(ir_variable *ir)
777 {
778 dst_reg *reg = NULL;
779
780 if (variable_storage(ir))
781 return;
782
783 switch (ir->mode) {
784 case ir_var_in:
785 reg = new(mem_ctx) dst_reg(ATTR, ir->location);
786
787 /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED attributes
788 * come in as floating point conversions of the integer values.
789 */
790 for (int i = ir->location; i < ir->location + type_size(ir->type); i++) {
791 if (!c->key.gl_fixed_input_size[i])
792 continue;
793
794 dst_reg dst = *reg;
795 dst.writemask = (1 << c->key.gl_fixed_input_size[i]) - 1;
796 emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
797 }
798 break;
799
800 case ir_var_out:
801 reg = new(mem_ctx) dst_reg(this, ir->type);
802
803 for (int i = 0; i < type_size(ir->type); i++) {
804 output_reg[ir->location + i] = *reg;
805 output_reg[ir->location + i].reg_offset = i;
806 output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F;
807 output_reg_annotation[ir->location + i] = ir->name;
808 }
809 break;
810
811 case ir_var_auto:
812 case ir_var_temporary:
813 reg = new(mem_ctx) dst_reg(this, ir->type);
814 break;
815
816 case ir_var_uniform:
817 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
818
819 /* Track how big the whole uniform variable is, in case we need to put a
820 * copy of its data into pull constants for array access.
821 */
822 this->uniform_size[this->uniforms] = type_size(ir->type);
823
824 if (!strncmp(ir->name, "gl_", 3)) {
825 setup_builtin_uniform_values(ir);
826 } else {
827 setup_uniform_values(ir->location, ir->type);
828 }
829 break;
830
831 default:
832 assert(!"not reached");
833 }
834
835 reg->type = brw_type_for_base_type(ir->type);
836 hash_table_insert(this->variable_ht, reg, ir);
837 }
838
839 void
840 vec4_visitor::visit(ir_loop *ir)
841 {
842 dst_reg counter;
843
844 /* We don't want debugging output to print the whole body of the
845 * loop as the annotation.
846 */
847 this->base_ir = NULL;
848
849 if (ir->counter != NULL) {
850 this->base_ir = ir->counter;
851 ir->counter->accept(this);
852 counter = *(variable_storage(ir->counter));
853
854 if (ir->from != NULL) {
855 this->base_ir = ir->from;
856 ir->from->accept(this);
857
858 emit(MOV(counter, this->result));
859 }
860 }
861
862 emit(BRW_OPCODE_DO);
863
864 if (ir->to) {
865 this->base_ir = ir->to;
866 ir->to->accept(this);
867
868 emit(CMP(dst_null_d(), src_reg(counter), this->result,
869 brw_conditional_for_comparison(ir->cmp)));
870
871 vec4_instruction *inst = emit(BRW_OPCODE_BREAK);
872 inst->predicate = BRW_PREDICATE_NORMAL;
873 }
874
875 visit_instructions(&ir->body_instructions);
876
877
878 if (ir->increment) {
879 this->base_ir = ir->increment;
880 ir->increment->accept(this);
881 emit(ADD(counter, src_reg(counter), this->result));
882 }
883
884 emit(BRW_OPCODE_WHILE);
885 }
886
887 void
888 vec4_visitor::visit(ir_loop_jump *ir)
889 {
890 switch (ir->mode) {
891 case ir_loop_jump::jump_break:
892 emit(BRW_OPCODE_BREAK);
893 break;
894 case ir_loop_jump::jump_continue:
895 emit(BRW_OPCODE_CONTINUE);
896 break;
897 }
898 }
899
900
901 void
902 vec4_visitor::visit(ir_function_signature *ir)
903 {
904 assert(0);
905 (void)ir;
906 }
907
908 void
909 vec4_visitor::visit(ir_function *ir)
910 {
911 /* Ignore function bodies other than main() -- we shouldn't see calls to
912 * them since they should all be inlined.
913 */
914 if (strcmp(ir->name, "main") == 0) {
915 const ir_function_signature *sig;
916 exec_list empty;
917
918 sig = ir->matching_signature(&empty);
919
920 assert(sig);
921
922 visit_instructions(&sig->body);
923 }
924 }
925
926 GLboolean
927 vec4_visitor::try_emit_sat(ir_expression *ir)
928 {
929 ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
930 if (!sat_src)
931 return false;
932
933 sat_src->accept(this);
934 src_reg src = this->result;
935
936 this->result = src_reg(this, ir->type);
937 vec4_instruction *inst;
938 inst = emit(MOV(dst_reg(this->result), src));
939 inst->saturate = true;
940
941 return true;
942 }
943
944 void
945 vec4_visitor::emit_bool_comparison(unsigned int op,
946 dst_reg dst, src_reg src0, src_reg src1)
947 {
948 /* original gen4 does destination conversion before comparison. */
949 if (intel->gen < 5)
950 dst.type = src0.type;
951
952 emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op)));
953
954 dst.type = BRW_REGISTER_TYPE_D;
955 emit(AND(dst, src_reg(dst), src_reg(0x1)));
956 }
957
958 void
959 vec4_visitor::visit(ir_expression *ir)
960 {
961 unsigned int operand;
962 src_reg op[Elements(ir->operands)];
963 src_reg result_src;
964 dst_reg result_dst;
965 vec4_instruction *inst;
966
967 if (try_emit_sat(ir))
968 return;
969
970 for (operand = 0; operand < ir->get_num_operands(); operand++) {
971 this->result.file = BAD_FILE;
972 ir->operands[operand]->accept(this);
973 if (this->result.file == BAD_FILE) {
974 printf("Failed to get tree for expression operand:\n");
975 ir->operands[operand]->print();
976 exit(1);
977 }
978 op[operand] = this->result;
979
980 /* Matrix expression operands should have been broken down to vector
981 * operations already.
982 */
983 assert(!ir->operands[operand]->type->is_matrix());
984 }
985
986 int vector_elements = ir->operands[0]->type->vector_elements;
987 if (ir->operands[1]) {
988 vector_elements = MAX2(vector_elements,
989 ir->operands[1]->type->vector_elements);
990 }
991
992 this->result.file = BAD_FILE;
993
994 /* Storage for our result. Ideally for an assignment we'd be using
995 * the actual storage for the result here, instead.
996 */
997 result_src = src_reg(this, ir->type);
998 /* convenience for the emit functions below. */
999 result_dst = dst_reg(result_src);
1000 /* If nothing special happens, this is the result. */
1001 this->result = result_src;
1002 /* Limit writes to the channels that will be used by result_src later.
1003 * This does limit this temp's use as a temporary for multi-instruction
1004 * sequences.
1005 */
1006 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1007
1008 switch (ir->operation) {
1009 case ir_unop_logic_not:
1010 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
1011 * ones complement of the whole register, not just bit 0.
1012 */
1013 emit(XOR(result_dst, op[0], src_reg(1)));
1014 break;
1015 case ir_unop_neg:
1016 op[0].negate = !op[0].negate;
1017 this->result = op[0];
1018 break;
1019 case ir_unop_abs:
1020 op[0].abs = true;
1021 op[0].negate = false;
1022 this->result = op[0];
1023 break;
1024
1025 case ir_unop_sign:
1026 emit(MOV(result_dst, src_reg(0.0f)));
1027
1028 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G));
1029 inst = emit(MOV(result_dst, src_reg(1.0f)));
1030 inst->predicate = BRW_PREDICATE_NORMAL;
1031
1032 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L));
1033 inst = emit(MOV(result_dst, src_reg(-1.0f)));
1034 inst->predicate = BRW_PREDICATE_NORMAL;
1035
1036 break;
1037
1038 case ir_unop_rcp:
1039 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
1040 break;
1041
1042 case ir_unop_exp2:
1043 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
1044 break;
1045 case ir_unop_log2:
1046 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
1047 break;
1048 case ir_unop_exp:
1049 case ir_unop_log:
1050 assert(!"not reached: should be handled by ir_explog_to_explog2");
1051 break;
1052 case ir_unop_sin:
1053 case ir_unop_sin_reduced:
1054 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
1055 break;
1056 case ir_unop_cos:
1057 case ir_unop_cos_reduced:
1058 emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
1059 break;
1060
1061 case ir_unop_dFdx:
1062 case ir_unop_dFdy:
1063 assert(!"derivatives not valid in vertex shader");
1064 break;
1065
1066 case ir_unop_noise:
1067 assert(!"not reached: should be handled by lower_noise");
1068 break;
1069
1070 case ir_binop_add:
1071 emit(ADD(result_dst, op[0], op[1]));
1072 break;
1073 case ir_binop_sub:
1074 assert(!"not reached: should be handled by ir_sub_to_add_neg");
1075 break;
1076
1077 case ir_binop_mul:
1078 if (ir->type->is_integer()) {
1079 /* For integer multiplication, the MUL uses the low 16 bits
1080 * of one of the operands (src0 on gen6, src1 on gen7). The
1081 * MACH accumulates in the contribution of the upper 16 bits
1082 * of that operand.
1083 *
1084 * FINISHME: Emit just the MUL if we know an operand is small
1085 * enough.
1086 */
1087 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
1088
1089 emit(MUL(acc, op[0], op[1]));
1090 emit(MACH(dst_null_d(), op[0], op[1]));
1091 emit(MOV(result_dst, src_reg(acc)));
1092 } else {
1093 emit(MUL(result_dst, op[0], op[1]));
1094 }
1095 break;
1096 case ir_binop_div:
1097 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1098 case ir_binop_mod:
1099 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1100 break;
1101
1102 case ir_binop_less:
1103 case ir_binop_greater:
1104 case ir_binop_lequal:
1105 case ir_binop_gequal:
1106 case ir_binop_equal:
1107 case ir_binop_nequal: {
1108 emit(CMP(result_dst, op[0], op[1],
1109 brw_conditional_for_comparison(ir->operation)));
1110 emit(AND(result_dst, result_src, src_reg(0x1)));
1111 break;
1112 }
1113
1114 case ir_binop_all_equal:
1115 /* "==" operator producing a scalar boolean. */
1116 if (ir->operands[0]->type->is_vector() ||
1117 ir->operands[1]->type->is_vector()) {
1118 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
1119 emit(MOV(result_dst, src_reg(0)));
1120 inst = emit(MOV(result_dst, src_reg(1)));
1121 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
1122 } else {
1123 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
1124 emit(AND(result_dst, result_src, src_reg(0x1)));
1125 }
1126 break;
1127 case ir_binop_any_nequal:
1128 /* "!=" operator producing a scalar boolean. */
1129 if (ir->operands[0]->type->is_vector() ||
1130 ir->operands[1]->type->is_vector()) {
1131 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
1132
1133 emit(MOV(result_dst, src_reg(0)));
1134 inst = emit(MOV(result_dst, src_reg(1)));
1135 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1136 } else {
1137 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
1138 emit(AND(result_dst, result_src, src_reg(0x1)));
1139 }
1140 break;
1141
1142 case ir_unop_any:
1143 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1144 emit(MOV(result_dst, src_reg(0)));
1145
1146 inst = emit(MOV(result_dst, src_reg(1)));
1147 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1148 break;
1149
1150 case ir_binop_logic_xor:
1151 emit(XOR(result_dst, op[0], op[1]));
1152 break;
1153
1154 case ir_binop_logic_or:
1155 emit(OR(result_dst, op[0], op[1]));
1156 break;
1157
1158 case ir_binop_logic_and:
1159 emit(AND(result_dst, op[0], op[1]));
1160 break;
1161
1162 case ir_binop_dot:
1163 assert(ir->operands[0]->type->is_vector());
1164 assert(ir->operands[0]->type == ir->operands[1]->type);
1165 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1166 break;
1167
1168 case ir_unop_sqrt:
1169 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1170 break;
1171 case ir_unop_rsq:
1172 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1173 break;
1174 case ir_unop_i2f:
1175 case ir_unop_i2u:
1176 case ir_unop_u2i:
1177 case ir_unop_u2f:
1178 case ir_unop_b2f:
1179 case ir_unop_b2i:
1180 case ir_unop_f2i:
1181 emit(MOV(result_dst, op[0]));
1182 break;
1183 case ir_unop_f2b:
1184 case ir_unop_i2b: {
1185 emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1186 emit(AND(result_dst, result_src, src_reg(1)));
1187 break;
1188 }
1189
1190 case ir_unop_trunc:
1191 emit(RNDZ(result_dst, op[0]));
1192 break;
1193 case ir_unop_ceil:
1194 op[0].negate = !op[0].negate;
1195 inst = emit(RNDD(result_dst, op[0]));
1196 this->result.negate = true;
1197 break;
1198 case ir_unop_floor:
1199 inst = emit(RNDD(result_dst, op[0]));
1200 break;
1201 case ir_unop_fract:
1202 inst = emit(FRC(result_dst, op[0]));
1203 break;
1204 case ir_unop_round_even:
1205 emit(RNDE(result_dst, op[0]));
1206 break;
1207
1208 case ir_binop_min:
1209 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_L));
1210
1211 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1212 inst->predicate = BRW_PREDICATE_NORMAL;
1213 break;
1214 case ir_binop_max:
1215 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_G));
1216
1217 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1218 inst->predicate = BRW_PREDICATE_NORMAL;
1219 break;
1220
1221 case ir_binop_pow:
1222 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1223 break;
1224
1225 case ir_unop_bit_not:
1226 inst = emit(NOT(result_dst, op[0]));
1227 break;
1228 case ir_binop_bit_and:
1229 inst = emit(AND(result_dst, op[0], op[1]));
1230 break;
1231 case ir_binop_bit_xor:
1232 inst = emit(XOR(result_dst, op[0], op[1]));
1233 break;
1234 case ir_binop_bit_or:
1235 inst = emit(OR(result_dst, op[0], op[1]));
1236 break;
1237
1238 case ir_binop_lshift:
1239 case ir_binop_rshift:
1240 assert(!"GLSL 1.30 features unsupported");
1241 break;
1242
1243 case ir_quadop_vector:
1244 assert(!"not reached: should be handled by lower_quadop_vector");
1245 break;
1246 }
1247 }
1248
1249
1250 void
1251 vec4_visitor::visit(ir_swizzle *ir)
1252 {
1253 src_reg src;
1254 int i = 0;
1255 int swizzle[4];
1256
1257 /* Note that this is only swizzles in expressions, not those on the left
1258 * hand side of an assignment, which do write masking. See ir_assignment
1259 * for that.
1260 */
1261
1262 ir->val->accept(this);
1263 src = this->result;
1264 assert(src.file != BAD_FILE);
1265
1266 for (i = 0; i < ir->type->vector_elements; i++) {
1267 switch (i) {
1268 case 0:
1269 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1270 break;
1271 case 1:
1272 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1273 break;
1274 case 2:
1275 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1276 break;
1277 case 3:
1278 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1279 break;
1280 }
1281 }
1282 for (; i < 4; i++) {
1283 /* Replicate the last channel out. */
1284 swizzle[i] = swizzle[ir->type->vector_elements - 1];
1285 }
1286
1287 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1288
1289 this->result = src;
1290 }
1291
1292 void
1293 vec4_visitor::visit(ir_dereference_variable *ir)
1294 {
1295 const struct glsl_type *type = ir->type;
1296 dst_reg *reg = variable_storage(ir->var);
1297
1298 if (!reg) {
1299 fail("Failed to find variable storage for %s\n", ir->var->name);
1300 this->result = src_reg(brw_null_reg());
1301 return;
1302 }
1303
1304 this->result = src_reg(*reg);
1305
1306 if (type->is_scalar() || type->is_vector() || type->is_matrix())
1307 this->result.swizzle = swizzle_for_size(type->vector_elements);
1308 }
1309
1310 void
1311 vec4_visitor::visit(ir_dereference_array *ir)
1312 {
1313 ir_constant *constant_index;
1314 src_reg src;
1315 int element_size = type_size(ir->type);
1316
1317 constant_index = ir->array_index->constant_expression_value();
1318
1319 ir->array->accept(this);
1320 src = this->result;
1321
1322 if (constant_index) {
1323 src.reg_offset += constant_index->value.i[0] * element_size;
1324 } else {
1325 /* Variable index array dereference. It eats the "vec4" of the
1326 * base of the array and an index that offsets the Mesa register
1327 * index.
1328 */
1329 ir->array_index->accept(this);
1330
1331 src_reg index_reg;
1332
1333 if (element_size == 1) {
1334 index_reg = this->result;
1335 } else {
1336 index_reg = src_reg(this, glsl_type::int_type);
1337
1338 emit(MUL(dst_reg(index_reg), this->result, src_reg(element_size)));
1339 }
1340
1341 if (src.reladdr) {
1342 src_reg temp = src_reg(this, glsl_type::int_type);
1343
1344 emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
1345
1346 index_reg = temp;
1347 }
1348
1349 src.reladdr = ralloc(mem_ctx, src_reg);
1350 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1351 }
1352
1353 /* If the type is smaller than a vec4, replicate the last channel out. */
1354 if (ir->type->is_scalar() || ir->type->is_vector())
1355 src.swizzle = swizzle_for_size(ir->type->vector_elements);
1356 else
1357 src.swizzle = BRW_SWIZZLE_NOOP;
1358 src.type = brw_type_for_base_type(ir->type);
1359
1360 this->result = src;
1361 }
1362
1363 void
1364 vec4_visitor::visit(ir_dereference_record *ir)
1365 {
1366 unsigned int i;
1367 const glsl_type *struct_type = ir->record->type;
1368 int offset = 0;
1369
1370 ir->record->accept(this);
1371
1372 for (i = 0; i < struct_type->length; i++) {
1373 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1374 break;
1375 offset += type_size(struct_type->fields.structure[i].type);
1376 }
1377
1378 /* If the type is smaller than a vec4, replicate the last channel out. */
1379 if (ir->type->is_scalar() || ir->type->is_vector())
1380 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1381 else
1382 this->result.swizzle = BRW_SWIZZLE_NOOP;
1383 this->result.type = brw_type_for_base_type(ir->type);
1384
1385 this->result.reg_offset += offset;
1386 }
1387
1388 /**
1389 * We want to be careful in assignment setup to hit the actual storage
1390 * instead of potentially using a temporary like we might with the
1391 * ir_dereference handler.
1392 */
1393 static dst_reg
1394 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1395 {
1396 /* The LHS must be a dereference. If the LHS is a variable indexed array
1397 * access of a vector, it must be separated into a series conditional moves
1398 * before reaching this point (see ir_vec_index_to_cond_assign).
1399 */
1400 assert(ir->as_dereference());
1401 ir_dereference_array *deref_array = ir->as_dereference_array();
1402 if (deref_array) {
1403 assert(!deref_array->array->type->is_vector());
1404 }
1405
1406 /* Use the rvalue deref handler for the most part. We'll ignore
1407 * swizzles in it and write swizzles using writemask, though.
1408 */
1409 ir->accept(v);
1410 return dst_reg(v->result);
1411 }
1412
1413 void
1414 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1415 const struct glsl_type *type, uint32_t predicate)
1416 {
1417 if (type->base_type == GLSL_TYPE_STRUCT) {
1418 for (unsigned int i = 0; i < type->length; i++) {
1419 emit_block_move(dst, src, type->fields.structure[i].type, predicate);
1420 }
1421 return;
1422 }
1423
1424 if (type->is_array()) {
1425 for (unsigned int i = 0; i < type->length; i++) {
1426 emit_block_move(dst, src, type->fields.array, predicate);
1427 }
1428 return;
1429 }
1430
1431 if (type->is_matrix()) {
1432 const struct glsl_type *vec_type;
1433
1434 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1435 type->vector_elements, 1);
1436
1437 for (int i = 0; i < type->matrix_columns; i++) {
1438 emit_block_move(dst, src, vec_type, predicate);
1439 }
1440 return;
1441 }
1442
1443 assert(type->is_scalar() || type->is_vector());
1444
1445 dst->type = brw_type_for_base_type(type);
1446 src->type = dst->type;
1447
1448 dst->writemask = (1 << type->vector_elements) - 1;
1449
1450 /* Do we need to worry about swizzling a swizzle? */
1451 assert(src->swizzle = BRW_SWIZZLE_NOOP);
1452 src->swizzle = swizzle_for_size(type->vector_elements);
1453
1454 vec4_instruction *inst = emit(MOV(*dst, *src));
1455 inst->predicate = predicate;
1456
1457 dst->reg_offset++;
1458 src->reg_offset++;
1459 }
1460
1461
1462 /* If the RHS processing resulted in an instruction generating a
1463 * temporary value, and it would be easy to rewrite the instruction to
1464 * generate its result right into the LHS instead, do so. This ends
1465 * up reliably removing instructions where it can be tricky to do so
1466 * later without real UD chain information.
1467 */
1468 bool
1469 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
1470 dst_reg dst,
1471 src_reg src,
1472 vec4_instruction *pre_rhs_inst,
1473 vec4_instruction *last_rhs_inst)
1474 {
1475 /* This could be supported, but it would take more smarts. */
1476 if (ir->condition)
1477 return false;
1478
1479 if (pre_rhs_inst == last_rhs_inst)
1480 return false; /* No instructions generated to work with. */
1481
1482 /* Make sure the last instruction generated our source reg. */
1483 if (src.file != GRF ||
1484 src.file != last_rhs_inst->dst.file ||
1485 src.reg != last_rhs_inst->dst.reg ||
1486 src.reg_offset != last_rhs_inst->dst.reg_offset ||
1487 src.reladdr ||
1488 src.abs ||
1489 src.negate ||
1490 last_rhs_inst->predicate != BRW_PREDICATE_NONE)
1491 return false;
1492
1493 /* Check that that last instruction fully initialized the channels
1494 * we want to use, in the order we want to use them. We could
1495 * potentially reswizzle the operands of many instructions so that
1496 * we could handle out of order channels, but don't yet.
1497 */
1498 for (int i = 0; i < 4; i++) {
1499 if (dst.writemask & (1 << i)) {
1500 if (!(last_rhs_inst->dst.writemask & (1 << i)))
1501 return false;
1502
1503 if (BRW_GET_SWZ(src.swizzle, i) != i)
1504 return false;
1505 }
1506 }
1507
1508 /* Success! Rewrite the instruction. */
1509 last_rhs_inst->dst.file = dst.file;
1510 last_rhs_inst->dst.reg = dst.reg;
1511 last_rhs_inst->dst.reg_offset = dst.reg_offset;
1512 last_rhs_inst->dst.reladdr = dst.reladdr;
1513 last_rhs_inst->dst.writemask &= dst.writemask;
1514
1515 return true;
1516 }
1517
1518 void
1519 vec4_visitor::visit(ir_assignment *ir)
1520 {
1521 dst_reg dst = get_assignment_lhs(ir->lhs, this);
1522 uint32_t predicate = BRW_PREDICATE_NONE;
1523
1524 if (!ir->lhs->type->is_scalar() &&
1525 !ir->lhs->type->is_vector()) {
1526 ir->rhs->accept(this);
1527 src_reg src = this->result;
1528
1529 if (ir->condition) {
1530 emit_bool_to_cond_code(ir->condition, &predicate);
1531 }
1532
1533 emit_block_move(&dst, &src, ir->rhs->type, predicate);
1534 return;
1535 }
1536
1537 /* Now we're down to just a scalar/vector with writemasks. */
1538 int i;
1539
1540 vec4_instruction *pre_rhs_inst, *last_rhs_inst;
1541 pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1542
1543 ir->rhs->accept(this);
1544
1545 last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1546
1547 src_reg src = this->result;
1548
1549 int swizzles[4];
1550 int first_enabled_chan = 0;
1551 int src_chan = 0;
1552
1553 assert(ir->lhs->type->is_vector() ||
1554 ir->lhs->type->is_scalar());
1555 dst.writemask = ir->write_mask;
1556
1557 for (int i = 0; i < 4; i++) {
1558 if (dst.writemask & (1 << i)) {
1559 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1560 break;
1561 }
1562 }
1563
1564 /* Swizzle a small RHS vector into the channels being written.
1565 *
1566 * glsl ir treats write_mask as dictating how many channels are
1567 * present on the RHS while in our instructions we need to make
1568 * those channels appear in the slots of the vec4 they're written to.
1569 */
1570 for (int i = 0; i < 4; i++) {
1571 if (dst.writemask & (1 << i))
1572 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1573 else
1574 swizzles[i] = first_enabled_chan;
1575 }
1576 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1577 swizzles[2], swizzles[3]);
1578
1579 if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
1580 return;
1581 }
1582
1583 if (ir->condition) {
1584 emit_bool_to_cond_code(ir->condition, &predicate);
1585 }
1586
1587 for (i = 0; i < type_size(ir->lhs->type); i++) {
1588 vec4_instruction *inst = emit(MOV(dst, src));
1589 inst->predicate = predicate;
1590
1591 dst.reg_offset++;
1592 src.reg_offset++;
1593 }
1594 }
1595
1596 void
1597 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
1598 {
1599 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1600 foreach_list(node, &ir->components) {
1601 ir_constant *field_value = (ir_constant *)node;
1602
1603 emit_constant_values(dst, field_value);
1604 }
1605 return;
1606 }
1607
1608 if (ir->type->is_array()) {
1609 for (unsigned int i = 0; i < ir->type->length; i++) {
1610 emit_constant_values(dst, ir->array_elements[i]);
1611 }
1612 return;
1613 }
1614
1615 if (ir->type->is_matrix()) {
1616 for (int i = 0; i < ir->type->matrix_columns; i++) {
1617 for (int j = 0; j < ir->type->vector_elements; j++) {
1618 dst->writemask = 1 << j;
1619 dst->type = BRW_REGISTER_TYPE_F;
1620
1621 emit(MOV(*dst,
1622 src_reg(ir->value.f[i * ir->type->vector_elements + j])));
1623 }
1624 dst->reg_offset++;
1625 }
1626 return;
1627 }
1628
1629 for (int i = 0; i < ir->type->vector_elements; i++) {
1630 dst->writemask = 1 << i;
1631 dst->type = brw_type_for_base_type(ir->type);
1632
1633 switch (ir->type->base_type) {
1634 case GLSL_TYPE_FLOAT:
1635 emit(MOV(*dst, src_reg(ir->value.f[i])));
1636 break;
1637 case GLSL_TYPE_INT:
1638 emit(MOV(*dst, src_reg(ir->value.i[i])));
1639 break;
1640 case GLSL_TYPE_UINT:
1641 emit(MOV(*dst, src_reg(ir->value.u[i])));
1642 break;
1643 case GLSL_TYPE_BOOL:
1644 emit(MOV(*dst, src_reg(ir->value.b[i])));
1645 break;
1646 default:
1647 assert(!"Non-float/uint/int/bool constant");
1648 break;
1649 }
1650 }
1651 dst->reg_offset++;
1652 }
1653
1654 void
1655 vec4_visitor::visit(ir_constant *ir)
1656 {
1657 dst_reg dst = dst_reg(this, ir->type);
1658 this->result = src_reg(dst);
1659
1660 emit_constant_values(&dst, ir);
1661 }
1662
1663 void
1664 vec4_visitor::visit(ir_call *ir)
1665 {
1666 assert(!"not reached");
1667 }
1668
1669 void
1670 vec4_visitor::visit(ir_texture *ir)
1671 {
1672 /* FINISHME: Implement vertex texturing.
1673 *
1674 * With 0 vertex samplers available, the linker will reject
1675 * programs that do vertex texturing, but after our visitor has
1676 * run.
1677 */
1678 }
1679
1680 void
1681 vec4_visitor::visit(ir_return *ir)
1682 {
1683 assert(!"not reached");
1684 }
1685
1686 void
1687 vec4_visitor::visit(ir_discard *ir)
1688 {
1689 assert(!"not reached");
1690 }
1691
1692 void
1693 vec4_visitor::visit(ir_if *ir)
1694 {
1695 /* Don't point the annotation at the if statement, because then it plus
1696 * the then and else blocks get printed.
1697 */
1698 this->base_ir = ir->condition;
1699
1700 if (intel->gen == 6) {
1701 emit_if_gen6(ir);
1702 } else {
1703 uint32_t predicate;
1704 emit_bool_to_cond_code(ir->condition, &predicate);
1705 emit(IF(predicate));
1706 }
1707
1708 visit_instructions(&ir->then_instructions);
1709
1710 if (!ir->else_instructions.is_empty()) {
1711 this->base_ir = ir->condition;
1712 emit(BRW_OPCODE_ELSE);
1713
1714 visit_instructions(&ir->else_instructions);
1715 }
1716
1717 this->base_ir = ir->condition;
1718 emit(BRW_OPCODE_ENDIF);
1719 }
1720
1721 void
1722 vec4_visitor::emit_ndc_computation()
1723 {
1724 /* Get the position */
1725 src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
1726
1727 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
1728 dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
1729 output_reg[BRW_VERT_RESULT_NDC] = ndc;
1730
1731 current_annotation = "NDC";
1732 dst_reg ndc_w = ndc;
1733 ndc_w.writemask = WRITEMASK_W;
1734 src_reg pos_w = pos;
1735 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
1736 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
1737
1738 dst_reg ndc_xyz = ndc;
1739 ndc_xyz.writemask = WRITEMASK_XYZ;
1740
1741 emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
1742 }
1743
1744 void
1745 vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
1746 {
1747 if (intel->gen < 6 &&
1748 ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
1749 c->key.nr_userclip || brw->has_negative_rhw_bug)) {
1750 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
1751 GLuint i;
1752
1753 emit(MOV(header1, 0u));
1754
1755 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1756 src_reg psiz = src_reg(output_reg[VERT_RESULT_PSIZ]);
1757
1758 current_annotation = "Point size";
1759 header1.writemask = WRITEMASK_W;
1760 emit(MUL(header1, psiz, src_reg((float)(1 << 11))));
1761 emit(AND(header1, src_reg(header1), 0x7ff << 8));
1762 }
1763
1764 current_annotation = "Clipping flags";
1765 for (i = 0; i < c->key.nr_userclip; i++) {
1766 vec4_instruction *inst;
1767
1768 inst = emit(DP4(dst_null_f(), src_reg(output_reg[VERT_RESULT_HPOS]),
1769 src_reg(c->userplane[i])));
1770 inst->conditional_mod = BRW_CONDITIONAL_L;
1771
1772 emit(OR(header1, src_reg(header1), 1u << i));
1773 inst->predicate = BRW_PREDICATE_NORMAL;
1774 }
1775
1776 /* i965 clipping workaround:
1777 * 1) Test for -ve rhw
1778 * 2) If set,
1779 * set ndc = (0,0,0,0)
1780 * set ucp[6] = 1
1781 *
1782 * Later, clipping will detect ucp[6] and ensure the primitive is
1783 * clipped against all fixed planes.
1784 */
1785 if (brw->has_negative_rhw_bug) {
1786 #if 0
1787 /* FINISHME */
1788 brw_CMP(p,
1789 vec8(brw_null_reg()),
1790 BRW_CONDITIONAL_L,
1791 brw_swizzle1(output_reg[BRW_VERT_RESULT_NDC], 3),
1792 brw_imm_f(0));
1793
1794 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
1795 brw_MOV(p, output_reg[BRW_VERT_RESULT_NDC], brw_imm_f(0));
1796 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1797 #endif
1798 }
1799
1800 header1.writemask = WRITEMASK_XYZW;
1801 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
1802 } else if (intel->gen < 6) {
1803 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
1804 } else {
1805 emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
1806 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1807 emit(MOV(brw_writemask(reg, WRITEMASK_W),
1808 src_reg(output_reg[VERT_RESULT_PSIZ])));
1809 }
1810 }
1811 }
1812
1813 void
1814 vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset)
1815 {
1816 if (intel->gen < 6) {
1817 /* Clip distance slots are set aside in gen5, but they are not used. It
1818 * is not clear whether we actually need to set aside space for them,
1819 * but the performance cost is negligible.
1820 */
1821 return;
1822 }
1823
1824 for (int i = 0; i + offset < c->key.nr_userclip && i < 4; ++i) {
1825 emit(DP4(dst_reg(brw_writemask(reg, 1 << i)),
1826 src_reg(output_reg[VERT_RESULT_HPOS]),
1827 src_reg(c->userplane[i + offset])));
1828 }
1829 }
1830
1831 void
1832 vec4_visitor::emit_urb_slot(int mrf, int vert_result)
1833 {
1834 struct brw_reg hw_reg = brw_message_reg(mrf);
1835 dst_reg reg = dst_reg(MRF, mrf);
1836 reg.type = BRW_REGISTER_TYPE_F;
1837
1838 switch (vert_result) {
1839 case VERT_RESULT_PSIZ:
1840 /* PSIZ is always in slot 0, and is coupled with other flags. */
1841 current_annotation = "indices, point width, clip flags";
1842 emit_psiz_and_flags(hw_reg);
1843 break;
1844 case BRW_VERT_RESULT_NDC:
1845 current_annotation = "NDC";
1846 emit(MOV(reg, src_reg(output_reg[BRW_VERT_RESULT_NDC])));
1847 break;
1848 case BRW_VERT_RESULT_HPOS_DUPLICATE:
1849 case VERT_RESULT_HPOS:
1850 current_annotation = "gl_Position";
1851 emit(MOV(reg, src_reg(output_reg[VERT_RESULT_HPOS])));
1852 break;
1853 case BRW_VERT_RESULT_CLIP0:
1854 current_annotation = "user clip distances";
1855 emit_clip_distances(hw_reg, 0);
1856 break;
1857 case BRW_VERT_RESULT_CLIP1:
1858 current_annotation = "user clip distances";
1859 emit_clip_distances(hw_reg, 4);
1860 break;
1861 case BRW_VERT_RESULT_PAD:
1862 /* No need to write to this slot */
1863 break;
1864 default: {
1865 assert (vert_result < VERT_RESULT_MAX);
1866 current_annotation = output_reg_annotation[vert_result];
1867 /* Copy the register, saturating if necessary */
1868 vec4_instruction *inst = emit(MOV(reg,
1869 src_reg(output_reg[vert_result])));
1870 if ((vert_result == VERT_RESULT_COL0 ||
1871 vert_result == VERT_RESULT_COL1 ||
1872 vert_result == VERT_RESULT_BFC0 ||
1873 vert_result == VERT_RESULT_BFC1) &&
1874 c->key.clamp_vertex_color) {
1875 inst->saturate = true;
1876 }
1877 }
1878 break;
1879 }
1880 }
1881
1882 static int
1883 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
1884 {
1885 struct intel_context *intel = &brw->intel;
1886
1887 if (intel->gen >= 6) {
1888 /* URB data written (does not include the message header reg) must
1889 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
1890 * section 5.4.3.2.2: URB_INTERLEAVED.
1891 *
1892 * URB entries are allocated on a multiple of 1024 bits, so an
1893 * extra 128 bits written here to make the end align to 256 is
1894 * no problem.
1895 */
1896 if ((mlen % 2) != 1)
1897 mlen++;
1898 }
1899
1900 return mlen;
1901 }
1902
1903 /**
1904 * Generates the VUE payload plus the 1 or 2 URB write instructions to
1905 * complete the VS thread.
1906 *
1907 * The VUE layout is documented in Volume 2a.
1908 */
1909 void
1910 vec4_visitor::emit_urb_writes()
1911 {
1912 /* MRF 0 is reserved for the debugger, so start with message header
1913 * in MRF 1.
1914 */
1915 int base_mrf = 1;
1916 int mrf = base_mrf;
1917 /* In the process of generating our URB write message contents, we
1918 * may need to unspill a register or load from an array. Those
1919 * reads would use MRFs 14-15.
1920 */
1921 int max_usable_mrf = 13;
1922
1923 /* The following assertion verifies that max_usable_mrf causes an
1924 * even-numbered amount of URB write data, which will meet gen6's
1925 * requirements for length alignment.
1926 */
1927 assert ((max_usable_mrf - base_mrf) % 2 == 0);
1928
1929 /* FINISHME: edgeflag */
1930
1931 brw_compute_vue_map(&c->vue_map, intel, c->key.nr_userclip,
1932 c->prog_data.outputs_written);
1933
1934 /* First mrf is the g0-based message header containing URB handles and such,
1935 * which is implied in VS_OPCODE_URB_WRITE.
1936 */
1937 mrf++;
1938
1939 if (intel->gen < 6) {
1940 emit_ndc_computation();
1941 }
1942
1943 /* Set up the VUE data for the first URB write */
1944 int slot;
1945 for (slot = 0; slot < c->vue_map.num_slots; ++slot) {
1946 emit_urb_slot(mrf++, c->vue_map.slot_to_vert_result[slot]);
1947
1948 /* If this was max_usable_mrf, we can't fit anything more into this URB
1949 * WRITE.
1950 */
1951 if (mrf > max_usable_mrf) {
1952 slot++;
1953 break;
1954 }
1955 }
1956
1957 current_annotation = "URB write";
1958 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
1959 inst->base_mrf = base_mrf;
1960 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1961 inst->eot = (slot >= c->vue_map.num_slots);
1962
1963 /* Optional second URB write */
1964 if (!inst->eot) {
1965 mrf = base_mrf + 1;
1966
1967 for (; slot < c->vue_map.num_slots; ++slot) {
1968 assert(mrf < max_usable_mrf);
1969
1970 emit_urb_slot(mrf++, c->vue_map.slot_to_vert_result[slot]);
1971 }
1972
1973 current_annotation = "URB write";
1974 inst = emit(VS_OPCODE_URB_WRITE);
1975 inst->base_mrf = base_mrf;
1976 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1977 inst->eot = true;
1978 /* URB destination offset. In the previous write, we got MRFs
1979 * 2-13 minus the one header MRF, so 12 regs. URB offset is in
1980 * URB row increments, and each of our MRFs is half of one of
1981 * those, since we're doing interleaved writes.
1982 */
1983 inst->offset = (max_usable_mrf - base_mrf) / 2;
1984 }
1985
1986 if (intel->gen == 6)
1987 c->prog_data.urb_entry_size = ALIGN(c->vue_map.num_slots, 8) / 8;
1988 else
1989 c->prog_data.urb_entry_size = ALIGN(c->vue_map.num_slots, 4) / 4;
1990 }
1991
1992 src_reg
1993 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
1994 src_reg *reladdr, int reg_offset)
1995 {
1996 /* Because we store the values to scratch interleaved like our
1997 * vertex data, we need to scale the vec4 index by 2.
1998 */
1999 int message_header_scale = 2;
2000
2001 /* Pre-gen6, the message header uses byte offsets instead of vec4
2002 * (16-byte) offset units.
2003 */
2004 if (intel->gen < 6)
2005 message_header_scale *= 16;
2006
2007 if (reladdr) {
2008 src_reg index = src_reg(this, glsl_type::int_type);
2009
2010 emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2011 emit_before(inst, MUL(dst_reg(index),
2012 index, src_reg(message_header_scale)));
2013
2014 return index;
2015 } else {
2016 return src_reg(reg_offset * message_header_scale);
2017 }
2018 }
2019
2020 src_reg
2021 vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
2022 src_reg *reladdr, int reg_offset)
2023 {
2024 if (reladdr) {
2025 src_reg index = src_reg(this, glsl_type::int_type);
2026
2027 emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2028
2029 /* Pre-gen6, the message header uses byte offsets instead of vec4
2030 * (16-byte) offset units.
2031 */
2032 if (intel->gen < 6) {
2033 emit_before(inst, MUL(dst_reg(index), index, src_reg(16)));
2034 }
2035
2036 return index;
2037 } else {
2038 int message_header_scale = intel->gen < 6 ? 16 : 1;
2039 return src_reg(reg_offset * message_header_scale);
2040 }
2041 }
2042
2043 /**
2044 * Emits an instruction before @inst to load the value named by @orig_src
2045 * from scratch space at @base_offset to @temp.
2046 */
2047 void
2048 vec4_visitor::emit_scratch_read(vec4_instruction *inst,
2049 dst_reg temp, src_reg orig_src,
2050 int base_offset)
2051 {
2052 int reg_offset = base_offset + orig_src.reg_offset;
2053 src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
2054
2055 emit_before(inst, SCRATCH_READ(temp, index));
2056 }
2057
2058 /**
2059 * Emits an instruction after @inst to store the value to be written
2060 * to @orig_dst to scratch space at @base_offset, from @temp.
2061 */
2062 void
2063 vec4_visitor::emit_scratch_write(vec4_instruction *inst,
2064 src_reg temp, dst_reg orig_dst,
2065 int base_offset)
2066 {
2067 int reg_offset = base_offset + orig_dst.reg_offset;
2068 src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset);
2069
2070 dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
2071 orig_dst.writemask));
2072 vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
2073 write->predicate = inst->predicate;
2074 write->ir = inst->ir;
2075 write->annotation = inst->annotation;
2076 inst->insert_after(write);
2077 }
2078
2079 /**
2080 * We can't generally support array access in GRF space, because a
2081 * single instruction's destination can only span 2 contiguous
2082 * registers. So, we send all GRF arrays that get variable index
2083 * access to scratch space.
2084 */
2085 void
2086 vec4_visitor::move_grf_array_access_to_scratch()
2087 {
2088 int scratch_loc[this->virtual_grf_count];
2089
2090 for (int i = 0; i < this->virtual_grf_count; i++) {
2091 scratch_loc[i] = -1;
2092 }
2093
2094 /* First, calculate the set of virtual GRFs that need to be punted
2095 * to scratch due to having any array access on them, and where in
2096 * scratch.
2097 */
2098 foreach_list(node, &this->instructions) {
2099 vec4_instruction *inst = (vec4_instruction *)node;
2100
2101 if (inst->dst.file == GRF && inst->dst.reladdr &&
2102 scratch_loc[inst->dst.reg] == -1) {
2103 scratch_loc[inst->dst.reg] = c->last_scratch;
2104 c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4;
2105 }
2106
2107 for (int i = 0 ; i < 3; i++) {
2108 src_reg *src = &inst->src[i];
2109
2110 if (src->file == GRF && src->reladdr &&
2111 scratch_loc[src->reg] == -1) {
2112 scratch_loc[src->reg] = c->last_scratch;
2113 c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4;
2114 }
2115 }
2116 }
2117
2118 /* Now, for anything that will be accessed through scratch, rewrite
2119 * it to load/store. Note that this is a _safe list walk, because
2120 * we may generate a new scratch_write instruction after the one
2121 * we're processing.
2122 */
2123 foreach_list_safe(node, &this->instructions) {
2124 vec4_instruction *inst = (vec4_instruction *)node;
2125
2126 /* Set up the annotation tracking for new generated instructions. */
2127 base_ir = inst->ir;
2128 current_annotation = inst->annotation;
2129
2130 if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
2131 src_reg temp = src_reg(this, glsl_type::vec4_type);
2132
2133 emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]);
2134
2135 inst->dst.file = temp.file;
2136 inst->dst.reg = temp.reg;
2137 inst->dst.reg_offset = temp.reg_offset;
2138 inst->dst.reladdr = NULL;
2139 }
2140
2141 for (int i = 0 ; i < 3; i++) {
2142 if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
2143 continue;
2144
2145 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2146
2147 emit_scratch_read(inst, temp, inst->src[i],
2148 scratch_loc[inst->src[i].reg]);
2149
2150 inst->src[i].file = temp.file;
2151 inst->src[i].reg = temp.reg;
2152 inst->src[i].reg_offset = temp.reg_offset;
2153 inst->src[i].reladdr = NULL;
2154 }
2155 }
2156 }
2157
2158 /**
2159 * Emits an instruction before @inst to load the value named by @orig_src
2160 * from the pull constant buffer (surface) at @base_offset to @temp.
2161 */
2162 void
2163 vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
2164 dst_reg temp, src_reg orig_src,
2165 int base_offset)
2166 {
2167 int reg_offset = base_offset + orig_src.reg_offset;
2168 src_reg index = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
2169 vec4_instruction *load;
2170
2171 load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
2172 temp, index);
2173 load->base_mrf = 14;
2174 load->mlen = 1;
2175 emit_before(inst, load);
2176 }
2177
2178 /**
2179 * Implements array access of uniforms by inserting a
2180 * PULL_CONSTANT_LOAD instruction.
2181 *
2182 * Unlike temporary GRF array access (where we don't support it due to
2183 * the difficulty of doing relative addressing on instruction
2184 * destinations), we could potentially do array access of uniforms
2185 * that were loaded in GRF space as push constants. In real-world
2186 * usage we've seen, though, the arrays being used are always larger
2187 * than we could load as push constants, so just always move all
2188 * uniform array access out to a pull constant buffer.
2189 */
2190 void
2191 vec4_visitor::move_uniform_array_access_to_pull_constants()
2192 {
2193 int pull_constant_loc[this->uniforms];
2194
2195 for (int i = 0; i < this->uniforms; i++) {
2196 pull_constant_loc[i] = -1;
2197 }
2198
2199 /* Walk through and find array access of uniforms. Put a copy of that
2200 * uniform in the pull constant buffer.
2201 *
2202 * Note that we don't move constant-indexed accesses to arrays. No
2203 * testing has been done of the performance impact of this choice.
2204 */
2205 foreach_list_safe(node, &this->instructions) {
2206 vec4_instruction *inst = (vec4_instruction *)node;
2207
2208 for (int i = 0 ; i < 3; i++) {
2209 if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
2210 continue;
2211
2212 int uniform = inst->src[i].reg;
2213
2214 /* If this array isn't already present in the pull constant buffer,
2215 * add it.
2216 */
2217 if (pull_constant_loc[uniform] == -1) {
2218 const float **values = &prog_data->param[uniform * 4];
2219
2220 pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
2221
2222 for (int j = 0; j < uniform_size[uniform] * 4; j++) {
2223 prog_data->pull_param[prog_data->nr_pull_params++] = values[j];
2224 }
2225 }
2226
2227 /* Set up the annotation tracking for new generated instructions. */
2228 base_ir = inst->ir;
2229 current_annotation = inst->annotation;
2230
2231 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2232
2233 emit_pull_constant_load(inst, temp, inst->src[i],
2234 pull_constant_loc[uniform]);
2235
2236 inst->src[i].file = temp.file;
2237 inst->src[i].reg = temp.reg;
2238 inst->src[i].reg_offset = temp.reg_offset;
2239 inst->src[i].reladdr = NULL;
2240 }
2241 }
2242
2243 /* Now there are no accesses of the UNIFORM file with a reladdr, so
2244 * no need to track them as larger-than-vec4 objects. This will be
2245 * relied on in cutting out unused uniform vectors from push
2246 * constants.
2247 */
2248 split_uniform_registers();
2249 }
2250
2251 vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
2252 struct gl_shader_program *prog,
2253 struct brw_shader *shader)
2254 {
2255 this->c = c;
2256 this->p = &c->func;
2257 this->brw = p->brw;
2258 this->intel = &brw->intel;
2259 this->ctx = &intel->ctx;
2260 this->prog = prog;
2261 this->shader = shader;
2262
2263 this->mem_ctx = ralloc_context(NULL);
2264 this->failed = false;
2265
2266 this->base_ir = NULL;
2267 this->current_annotation = NULL;
2268
2269 this->c = c;
2270 this->vp = prog->VertexProgram;
2271 this->prog_data = &c->prog_data;
2272
2273 this->variable_ht = hash_table_ctor(0,
2274 hash_table_pointer_hash,
2275 hash_table_pointer_compare);
2276
2277 this->virtual_grf_def = NULL;
2278 this->virtual_grf_use = NULL;
2279 this->virtual_grf_sizes = NULL;
2280 this->virtual_grf_count = 0;
2281 this->virtual_grf_reg_map = NULL;
2282 this->virtual_grf_reg_count = 0;
2283 this->virtual_grf_array_size = 0;
2284 this->live_intervals_valid = false;
2285
2286 this->uniforms = 0;
2287
2288 this->variable_ht = hash_table_ctor(0,
2289 hash_table_pointer_hash,
2290 hash_table_pointer_compare);
2291 }
2292
2293 vec4_visitor::~vec4_visitor()
2294 {
2295 ralloc_free(this->mem_ctx);
2296 hash_table_dtor(this->variable_ht);
2297 }
2298
2299
2300 void
2301 vec4_visitor::fail(const char *format, ...)
2302 {
2303 va_list va;
2304 char *msg;
2305
2306 if (failed)
2307 return;
2308
2309 failed = true;
2310
2311 va_start(va, format);
2312 msg = ralloc_vasprintf(mem_ctx, format, va);
2313 va_end(va);
2314 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
2315
2316 this->fail_msg = msg;
2317
2318 if (INTEL_DEBUG & DEBUG_VS) {
2319 fprintf(stderr, "%s", msg);
2320 }
2321 }
2322
2323 } /* namespace brw */