i965/vs: Add support for pull constant loads for uniform arrays.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_visitor.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25 extern "C" {
26 #include "main/macros.h"
27 #include "program/prog_parameter.h"
28 }
29
30 namespace brw {
31
32 src_reg::src_reg(dst_reg reg)
33 {
34 init();
35
36 this->file = reg.file;
37 this->reg = reg.reg;
38 this->reg_offset = reg.reg_offset;
39 this->type = reg.type;
40 this->reladdr = reg.reladdr;
41 this->fixed_hw_reg = reg.fixed_hw_reg;
42
43 int swizzles[4];
44 int next_chan = 0;
45 int last = 0;
46
47 for (int i = 0; i < 4; i++) {
48 if (!(reg.writemask & (1 << i)))
49 continue;
50
51 swizzles[next_chan++] = last = i;
52 }
53
54 for (; next_chan < 4; next_chan++) {
55 swizzles[next_chan] = last;
56 }
57
58 this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
59 swizzles[2], swizzles[3]);
60 }
61
62 dst_reg::dst_reg(src_reg reg)
63 {
64 init();
65
66 this->file = reg.file;
67 this->reg = reg.reg;
68 this->reg_offset = reg.reg_offset;
69 this->type = reg.type;
70 this->writemask = WRITEMASK_XYZW;
71 this->reladdr = reg.reladdr;
72 this->fixed_hw_reg = reg.fixed_hw_reg;
73 }
74
75 vec4_instruction::vec4_instruction(vec4_visitor *v,
76 enum opcode opcode, dst_reg dst,
77 src_reg src0, src_reg src1, src_reg src2)
78 {
79 this->opcode = opcode;
80 this->dst = dst;
81 this->src[0] = src0;
82 this->src[1] = src1;
83 this->src[2] = src2;
84 this->ir = v->base_ir;
85 this->annotation = v->current_annotation;
86 }
87
88 vec4_instruction *
89 vec4_visitor::emit(vec4_instruction *inst)
90 {
91 this->instructions.push_tail(inst);
92
93 return inst;
94 }
95
96 vec4_instruction *
97 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
98 src_reg src0, src_reg src1, src_reg src2)
99 {
100 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
101 src0, src1, src2));
102 }
103
104
105 vec4_instruction *
106 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
107 {
108 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
109 }
110
111 vec4_instruction *
112 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
113 {
114 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
115 }
116
117 vec4_instruction *
118 vec4_visitor::emit(enum opcode opcode)
119 {
120 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
121 }
122
123 void
124 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
125 {
126 static enum opcode dot_opcodes[] = {
127 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
128 };
129
130 emit(dot_opcodes[elements - 2], dst, src0, src1);
131 }
132
133 void
134 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
135 {
136 /* The gen6 math instruction ignores the source modifiers --
137 * swizzle, abs, negate, and at least some parts of the register
138 * region description.
139 */
140 src_reg temp_src = src_reg(this, glsl_type::vec4_type);
141 emit(BRW_OPCODE_MOV, dst_reg(temp_src), src);
142
143 if (dst.writemask != WRITEMASK_XYZW) {
144 /* The gen6 math instruction must be align1, so we can't do
145 * writemasks.
146 */
147 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
148
149 emit(opcode, temp_dst, temp_src);
150
151 emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
152 } else {
153 emit(opcode, dst, temp_src);
154 }
155 }
156
157 void
158 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
159 {
160 vec4_instruction *inst = emit(opcode, dst, src);
161 inst->base_mrf = 1;
162 inst->mlen = 1;
163 }
164
165 void
166 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
167 {
168 switch (opcode) {
169 case SHADER_OPCODE_RCP:
170 case SHADER_OPCODE_RSQ:
171 case SHADER_OPCODE_SQRT:
172 case SHADER_OPCODE_EXP2:
173 case SHADER_OPCODE_LOG2:
174 case SHADER_OPCODE_SIN:
175 case SHADER_OPCODE_COS:
176 break;
177 default:
178 assert(!"not reached: bad math opcode");
179 return;
180 }
181
182 if (intel->gen >= 6) {
183 return emit_math1_gen6(opcode, dst, src);
184 } else {
185 return emit_math1_gen4(opcode, dst, src);
186 }
187 }
188
189 void
190 vec4_visitor::emit_math2_gen6(enum opcode opcode,
191 dst_reg dst, src_reg src0, src_reg src1)
192 {
193 src_reg expanded;
194
195 /* The gen6 math instruction ignores the source modifiers --
196 * swizzle, abs, negate, and at least some parts of the register
197 * region description. Move the sources to temporaries to make it
198 * generally work.
199 */
200
201 expanded = src_reg(this, glsl_type::vec4_type);
202 emit(BRW_OPCODE_MOV, dst_reg(expanded), src0);
203 src0 = expanded;
204
205 expanded = src_reg(this, glsl_type::vec4_type);
206 emit(BRW_OPCODE_MOV, dst_reg(expanded), src1);
207 src1 = expanded;
208
209 if (dst.writemask != WRITEMASK_XYZW) {
210 /* The gen6 math instruction must be align1, so we can't do
211 * writemasks.
212 */
213 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
214
215 emit(opcode, temp_dst, src0, src1);
216
217 emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
218 } else {
219 emit(opcode, dst, src0, src1);
220 }
221 }
222
223 void
224 vec4_visitor::emit_math2_gen4(enum opcode opcode,
225 dst_reg dst, src_reg src0, src_reg src1)
226 {
227 vec4_instruction *inst = emit(opcode, dst, src0, src1);
228 inst->base_mrf = 1;
229 inst->mlen = 2;
230 }
231
232 void
233 vec4_visitor::emit_math(enum opcode opcode,
234 dst_reg dst, src_reg src0, src_reg src1)
235 {
236 assert(opcode == SHADER_OPCODE_POW);
237
238 if (intel->gen >= 6) {
239 return emit_math2_gen6(opcode, dst, src0, src1);
240 } else {
241 return emit_math2_gen4(opcode, dst, src0, src1);
242 }
243 }
244
245 void
246 vec4_visitor::visit_instructions(const exec_list *list)
247 {
248 foreach_list(node, list) {
249 ir_instruction *ir = (ir_instruction *)node;
250
251 base_ir = ir;
252 ir->accept(this);
253 }
254 }
255
256
257 static int
258 type_size(const struct glsl_type *type)
259 {
260 unsigned int i;
261 int size;
262
263 switch (type->base_type) {
264 case GLSL_TYPE_UINT:
265 case GLSL_TYPE_INT:
266 case GLSL_TYPE_FLOAT:
267 case GLSL_TYPE_BOOL:
268 if (type->is_matrix()) {
269 return type->matrix_columns;
270 } else {
271 /* Regardless of size of vector, it gets a vec4. This is bad
272 * packing for things like floats, but otherwise arrays become a
273 * mess. Hopefully a later pass over the code can pack scalars
274 * down if appropriate.
275 */
276 return 1;
277 }
278 case GLSL_TYPE_ARRAY:
279 assert(type->length > 0);
280 return type_size(type->fields.array) * type->length;
281 case GLSL_TYPE_STRUCT:
282 size = 0;
283 for (i = 0; i < type->length; i++) {
284 size += type_size(type->fields.structure[i].type);
285 }
286 return size;
287 case GLSL_TYPE_SAMPLER:
288 /* Samplers take up one slot in UNIFORMS[], but they're baked in
289 * at link time.
290 */
291 return 1;
292 default:
293 assert(0);
294 return 0;
295 }
296 }
297
298 int
299 vec4_visitor::virtual_grf_alloc(int size)
300 {
301 if (virtual_grf_array_size <= virtual_grf_count) {
302 if (virtual_grf_array_size == 0)
303 virtual_grf_array_size = 16;
304 else
305 virtual_grf_array_size *= 2;
306 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
307 virtual_grf_array_size);
308 }
309 virtual_grf_sizes[virtual_grf_count] = size;
310 return virtual_grf_count++;
311 }
312
313 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
314 {
315 init();
316
317 this->file = GRF;
318 this->reg = v->virtual_grf_alloc(type_size(type));
319
320 if (type->is_array() || type->is_record()) {
321 this->swizzle = BRW_SWIZZLE_NOOP;
322 } else {
323 this->swizzle = swizzle_for_size(type->vector_elements);
324 }
325
326 this->type = brw_type_for_base_type(type);
327 }
328
329 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
330 {
331 init();
332
333 this->file = GRF;
334 this->reg = v->virtual_grf_alloc(type_size(type));
335
336 if (type->is_array() || type->is_record()) {
337 this->writemask = WRITEMASK_XYZW;
338 } else {
339 this->writemask = (1 << type->vector_elements) - 1;
340 }
341
342 this->type = brw_type_for_base_type(type);
343 }
344
345 /* Our support for uniforms is piggy-backed on the struct
346 * gl_fragment_program, because that's where the values actually
347 * get stored, rather than in some global gl_shader_program uniform
348 * store.
349 */
350 int
351 vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
352 {
353 unsigned int offset = 0;
354 float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
355
356 if (type->is_matrix()) {
357 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
358 type->vector_elements,
359 1);
360
361 for (unsigned int i = 0; i < type->matrix_columns; i++) {
362 offset += setup_uniform_values(loc + offset, column);
363 }
364
365 return offset;
366 }
367
368 switch (type->base_type) {
369 case GLSL_TYPE_FLOAT:
370 case GLSL_TYPE_UINT:
371 case GLSL_TYPE_INT:
372 case GLSL_TYPE_BOOL:
373 for (unsigned int i = 0; i < type->vector_elements; i++) {
374 c->prog_data.param[this->uniforms * 4 + i] = &values[i];
375 }
376
377 /* Set up pad elements to get things aligned to a vec4 boundary. */
378 for (unsigned int i = type->vector_elements; i < 4; i++) {
379 static float zero = 0;
380
381 c->prog_data.param[this->uniforms * 4 + i] = &zero;
382 }
383
384 /* Track the size of this uniform vector, for future packing of
385 * uniforms.
386 */
387 this->uniform_vector_size[this->uniforms] = type->vector_elements;
388 this->uniforms++;
389
390 return 1;
391
392 case GLSL_TYPE_STRUCT:
393 for (unsigned int i = 0; i < type->length; i++) {
394 offset += setup_uniform_values(loc + offset,
395 type->fields.structure[i].type);
396 }
397 return offset;
398
399 case GLSL_TYPE_ARRAY:
400 for (unsigned int i = 0; i < type->length; i++) {
401 offset += setup_uniform_values(loc + offset, type->fields.array);
402 }
403 return offset;
404
405 case GLSL_TYPE_SAMPLER:
406 /* The sampler takes up a slot, but we don't use any values from it. */
407 return 1;
408
409 default:
410 assert(!"not reached");
411 return 0;
412 }
413 }
414
415 /* Our support for builtin uniforms is even scarier than non-builtin.
416 * It sits on top of the PROG_STATE_VAR parameters that are
417 * automatically updated from GL context state.
418 */
419 void
420 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
421 {
422 const ir_state_slot *const slots = ir->state_slots;
423 assert(ir->state_slots != NULL);
424
425 for (unsigned int i = 0; i < ir->num_state_slots; i++) {
426 /* This state reference has already been setup by ir_to_mesa,
427 * but we'll get the same index back here. We can reference
428 * ParameterValues directly, since unlike brw_fs.cpp, we never
429 * add new state references during compile.
430 */
431 int index = _mesa_add_state_reference(this->vp->Base.Parameters,
432 (gl_state_index *)slots[i].tokens);
433 float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
434
435 this->uniform_vector_size[this->uniforms] = 0;
436 /* Add each of the unique swizzled channels of the element.
437 * This will end up matching the size of the glsl_type of this field.
438 */
439 int last_swiz = -1;
440 for (unsigned int j = 0; j < 4; j++) {
441 int swiz = GET_SWZ(slots[i].swizzle, j);
442 last_swiz = swiz;
443
444 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
445 if (swiz <= last_swiz)
446 this->uniform_vector_size[this->uniforms]++;
447 }
448 this->uniforms++;
449 }
450 }
451
452 dst_reg *
453 vec4_visitor::variable_storage(ir_variable *var)
454 {
455 return (dst_reg *)hash_table_find(this->variable_ht, var);
456 }
457
458 void
459 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
460 {
461 ir_expression *expr = ir->as_expression();
462
463 if (expr) {
464 src_reg op[2];
465 vec4_instruction *inst;
466
467 assert(expr->get_num_operands() <= 2);
468 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
469 assert(expr->operands[i]->type->is_scalar());
470
471 expr->operands[i]->accept(this);
472 op[i] = this->result;
473 }
474
475 switch (expr->operation) {
476 case ir_unop_logic_not:
477 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1));
478 inst->conditional_mod = BRW_CONDITIONAL_Z;
479 break;
480
481 case ir_binop_logic_xor:
482 inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]);
483 inst->conditional_mod = BRW_CONDITIONAL_NZ;
484 break;
485
486 case ir_binop_logic_or:
487 inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]);
488 inst->conditional_mod = BRW_CONDITIONAL_NZ;
489 break;
490
491 case ir_binop_logic_and:
492 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]);
493 inst->conditional_mod = BRW_CONDITIONAL_NZ;
494 break;
495
496 case ir_unop_f2b:
497 if (intel->gen >= 6) {
498 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f));
499 } else {
500 inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]);
501 }
502 inst->conditional_mod = BRW_CONDITIONAL_NZ;
503 break;
504
505 case ir_unop_i2b:
506 if (intel->gen >= 6) {
507 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
508 } else {
509 inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]);
510 }
511 inst->conditional_mod = BRW_CONDITIONAL_NZ;
512 break;
513
514 case ir_binop_greater:
515 case ir_binop_gequal:
516 case ir_binop_less:
517 case ir_binop_lequal:
518 case ir_binop_equal:
519 case ir_binop_all_equal:
520 case ir_binop_nequal:
521 case ir_binop_any_nequal:
522 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
523 inst->conditional_mod =
524 brw_conditional_for_comparison(expr->operation);
525 break;
526
527 default:
528 assert(!"not reached");
529 break;
530 }
531 return;
532 }
533
534 ir->accept(this);
535
536 if (intel->gen >= 6) {
537 vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(),
538 this->result, src_reg(1));
539 inst->conditional_mod = BRW_CONDITIONAL_NZ;
540 } else {
541 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result);
542 inst->conditional_mod = BRW_CONDITIONAL_NZ;
543 }
544 }
545
546 /**
547 * Emit a gen6 IF statement with the comparison folded into the IF
548 * instruction.
549 */
550 void
551 vec4_visitor::emit_if_gen6(ir_if *ir)
552 {
553 ir_expression *expr = ir->condition->as_expression();
554
555 if (expr) {
556 src_reg op[2];
557 vec4_instruction *inst;
558 dst_reg temp;
559
560 assert(expr->get_num_operands() <= 2);
561 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
562 expr->operands[i]->accept(this);
563 op[i] = this->result;
564 }
565
566 switch (expr->operation) {
567 case ir_unop_logic_not:
568 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
569 inst->conditional_mod = BRW_CONDITIONAL_Z;
570 return;
571
572 case ir_binop_logic_xor:
573 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
574 inst->conditional_mod = BRW_CONDITIONAL_NZ;
575 return;
576
577 case ir_binop_logic_or:
578 temp = dst_reg(this, glsl_type::bool_type);
579 emit(BRW_OPCODE_OR, temp, op[0], op[1]);
580 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
581 inst->conditional_mod = BRW_CONDITIONAL_NZ;
582 return;
583
584 case ir_binop_logic_and:
585 temp = dst_reg(this, glsl_type::bool_type);
586 emit(BRW_OPCODE_AND, temp, op[0], op[1]);
587 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
588 inst->conditional_mod = BRW_CONDITIONAL_NZ;
589 return;
590
591 case ir_unop_f2b:
592 inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0));
593 inst->conditional_mod = BRW_CONDITIONAL_NZ;
594 return;
595
596 case ir_unop_i2b:
597 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
598 inst->conditional_mod = BRW_CONDITIONAL_NZ;
599 return;
600
601 case ir_binop_greater:
602 case ir_binop_gequal:
603 case ir_binop_less:
604 case ir_binop_lequal:
605 case ir_binop_equal:
606 case ir_binop_nequal:
607 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
608 inst->conditional_mod =
609 brw_conditional_for_comparison(expr->operation);
610 return;
611
612 case ir_binop_all_equal:
613 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
614 inst->conditional_mod = BRW_CONDITIONAL_Z;
615
616 inst = emit(BRW_OPCODE_IF);
617 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
618 return;
619
620 case ir_binop_any_nequal:
621 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
622 inst->conditional_mod = BRW_CONDITIONAL_NZ;
623
624 inst = emit(BRW_OPCODE_IF);
625 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
626 return;
627
628 case ir_unop_any:
629 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
630 inst->conditional_mod = BRW_CONDITIONAL_NZ;
631
632 inst = emit(BRW_OPCODE_IF);
633 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
634 return;
635
636 default:
637 assert(!"not reached");
638 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
639 inst->conditional_mod = BRW_CONDITIONAL_NZ;
640 return;
641 }
642 return;
643 }
644
645 ir->condition->accept(this);
646
647 vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(),
648 this->result, src_reg(0));
649 inst->conditional_mod = BRW_CONDITIONAL_NZ;
650 }
651
652 void
653 vec4_visitor::visit(ir_variable *ir)
654 {
655 dst_reg *reg = NULL;
656
657 if (variable_storage(ir))
658 return;
659
660 switch (ir->mode) {
661 case ir_var_in:
662 reg = new(mem_ctx) dst_reg(ATTR, ir->location);
663 break;
664
665 case ir_var_out:
666 reg = new(mem_ctx) dst_reg(this, ir->type);
667
668 for (int i = 0; i < type_size(ir->type); i++) {
669 output_reg[ir->location + i] = *reg;
670 output_reg[ir->location + i].reg_offset = i;
671 output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F;
672 }
673 break;
674
675 case ir_var_auto:
676 case ir_var_temporary:
677 reg = new(mem_ctx) dst_reg(this, ir->type);
678 break;
679
680 case ir_var_uniform:
681 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
682
683 /* Track how big the whole uniform variable is, in case we need to put a
684 * copy of its data into pull constants for array access.
685 */
686 this->uniform_size[this->uniforms] = type_size(ir->type);
687
688 if (!strncmp(ir->name, "gl_", 3)) {
689 setup_builtin_uniform_values(ir);
690 } else {
691 setup_uniform_values(ir->location, ir->type);
692 }
693 break;
694
695 default:
696 assert(!"not reached");
697 }
698
699 reg->type = brw_type_for_base_type(ir->type);
700 hash_table_insert(this->variable_ht, reg, ir);
701 }
702
703 void
704 vec4_visitor::visit(ir_loop *ir)
705 {
706 dst_reg counter;
707
708 /* We don't want debugging output to print the whole body of the
709 * loop as the annotation.
710 */
711 this->base_ir = NULL;
712
713 if (ir->counter != NULL) {
714 this->base_ir = ir->counter;
715 ir->counter->accept(this);
716 counter = *(variable_storage(ir->counter));
717
718 if (ir->from != NULL) {
719 this->base_ir = ir->from;
720 ir->from->accept(this);
721
722 emit(BRW_OPCODE_MOV, counter, this->result);
723 }
724 }
725
726 emit(BRW_OPCODE_DO);
727
728 if (ir->to) {
729 this->base_ir = ir->to;
730 ir->to->accept(this);
731
732 vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst_null_d(),
733 src_reg(counter), this->result);
734 inst->conditional_mod = brw_conditional_for_comparison(ir->cmp);
735
736 inst = emit(BRW_OPCODE_BREAK);
737 inst->predicate = BRW_PREDICATE_NORMAL;
738 }
739
740 visit_instructions(&ir->body_instructions);
741
742
743 if (ir->increment) {
744 this->base_ir = ir->increment;
745 ir->increment->accept(this);
746 emit(BRW_OPCODE_ADD, counter, src_reg(counter), this->result);
747 }
748
749 emit(BRW_OPCODE_WHILE);
750 }
751
752 void
753 vec4_visitor::visit(ir_loop_jump *ir)
754 {
755 switch (ir->mode) {
756 case ir_loop_jump::jump_break:
757 emit(BRW_OPCODE_BREAK);
758 break;
759 case ir_loop_jump::jump_continue:
760 emit(BRW_OPCODE_CONTINUE);
761 break;
762 }
763 }
764
765
766 void
767 vec4_visitor::visit(ir_function_signature *ir)
768 {
769 assert(0);
770 (void)ir;
771 }
772
773 void
774 vec4_visitor::visit(ir_function *ir)
775 {
776 /* Ignore function bodies other than main() -- we shouldn't see calls to
777 * them since they should all be inlined.
778 */
779 if (strcmp(ir->name, "main") == 0) {
780 const ir_function_signature *sig;
781 exec_list empty;
782
783 sig = ir->matching_signature(&empty);
784
785 assert(sig);
786
787 visit_instructions(&sig->body);
788 }
789 }
790
791 GLboolean
792 vec4_visitor::try_emit_sat(ir_expression *ir)
793 {
794 ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
795 if (!sat_src)
796 return false;
797
798 sat_src->accept(this);
799 src_reg src = this->result;
800
801 this->result = src_reg(this, ir->type);
802 vec4_instruction *inst;
803 inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src);
804 inst->saturate = true;
805
806 return true;
807 }
808
809 void
810 vec4_visitor::emit_bool_comparison(unsigned int op,
811 dst_reg dst, src_reg src0, src_reg src1)
812 {
813 /* original gen4 does destination conversion before comparison. */
814 if (intel->gen < 5)
815 dst.type = src0.type;
816
817 vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1);
818 inst->conditional_mod = brw_conditional_for_comparison(op);
819
820 dst.type = BRW_REGISTER_TYPE_D;
821 emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1));
822 }
823
824 void
825 vec4_visitor::visit(ir_expression *ir)
826 {
827 unsigned int operand;
828 src_reg op[Elements(ir->operands)];
829 src_reg result_src;
830 dst_reg result_dst;
831 vec4_instruction *inst;
832
833 if (try_emit_sat(ir))
834 return;
835
836 for (operand = 0; operand < ir->get_num_operands(); operand++) {
837 this->result.file = BAD_FILE;
838 ir->operands[operand]->accept(this);
839 if (this->result.file == BAD_FILE) {
840 printf("Failed to get tree for expression operand:\n");
841 ir->operands[operand]->print();
842 exit(1);
843 }
844 op[operand] = this->result;
845
846 /* Matrix expression operands should have been broken down to vector
847 * operations already.
848 */
849 assert(!ir->operands[operand]->type->is_matrix());
850 }
851
852 int vector_elements = ir->operands[0]->type->vector_elements;
853 if (ir->operands[1]) {
854 vector_elements = MAX2(vector_elements,
855 ir->operands[1]->type->vector_elements);
856 }
857
858 this->result.file = BAD_FILE;
859
860 /* Storage for our result. Ideally for an assignment we'd be using
861 * the actual storage for the result here, instead.
862 */
863 result_src = src_reg(this, ir->type);
864 /* convenience for the emit functions below. */
865 result_dst = dst_reg(result_src);
866 /* If nothing special happens, this is the result. */
867 this->result = result_src;
868 /* Limit writes to the channels that will be used by result_src later.
869 * This does limit this temp's use as a temporary for multi-instruction
870 * sequences.
871 */
872 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
873
874 switch (ir->operation) {
875 case ir_unop_logic_not:
876 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
877 * ones complement of the whole register, not just bit 0.
878 */
879 emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1));
880 break;
881 case ir_unop_neg:
882 op[0].negate = !op[0].negate;
883 this->result = op[0];
884 break;
885 case ir_unop_abs:
886 op[0].abs = true;
887 op[0].negate = false;
888 this->result = op[0];
889 break;
890
891 case ir_unop_sign:
892 emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f));
893
894 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
895 inst->conditional_mod = BRW_CONDITIONAL_G;
896 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f));
897 inst->predicate = BRW_PREDICATE_NORMAL;
898
899 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
900 inst->conditional_mod = BRW_CONDITIONAL_L;
901 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f));
902 inst->predicate = BRW_PREDICATE_NORMAL;
903
904 break;
905
906 case ir_unop_rcp:
907 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
908 break;
909
910 case ir_unop_exp2:
911 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
912 break;
913 case ir_unop_log2:
914 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
915 break;
916 case ir_unop_exp:
917 case ir_unop_log:
918 assert(!"not reached: should be handled by ir_explog_to_explog2");
919 break;
920 case ir_unop_sin:
921 case ir_unop_sin_reduced:
922 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
923 break;
924 case ir_unop_cos:
925 case ir_unop_cos_reduced:
926 emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
927 break;
928
929 case ir_unop_dFdx:
930 case ir_unop_dFdy:
931 assert(!"derivatives not valid in vertex shader");
932 break;
933
934 case ir_unop_noise:
935 assert(!"not reached: should be handled by lower_noise");
936 break;
937
938 case ir_binop_add:
939 emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]);
940 break;
941 case ir_binop_sub:
942 assert(!"not reached: should be handled by ir_sub_to_add_neg");
943 break;
944
945 case ir_binop_mul:
946 if (ir->type->is_integer()) {
947 /* For integer multiplication, the MUL uses the low 16 bits
948 * of one of the operands (src0 on gen6, src1 on gen7). The
949 * MACH accumulates in the contribution of the upper 16 bits
950 * of that operand.
951 *
952 * FINISHME: Emit just the MUL if we know an operand is small
953 * enough.
954 */
955 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
956
957 emit(BRW_OPCODE_MUL, acc, op[0], op[1]);
958 emit(BRW_OPCODE_MACH, dst_null_d(), op[0], op[1]);
959 emit(BRW_OPCODE_MOV, result_dst, src_reg(acc));
960 } else {
961 emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
962 }
963 break;
964 case ir_binop_div:
965 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
966 case ir_binop_mod:
967 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
968 break;
969
970 case ir_binop_less:
971 case ir_binop_greater:
972 case ir_binop_lequal:
973 case ir_binop_gequal:
974 case ir_binop_equal:
975 case ir_binop_nequal: {
976 dst_reg temp = result_dst;
977 /* original gen4 does implicit conversion before comparison. */
978 if (intel->gen < 5)
979 temp.type = op[0].type;
980
981 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
982 inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
983 emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1));
984 break;
985 }
986
987 case ir_binop_all_equal:
988 /* "==" operator producing a scalar boolean. */
989 if (ir->operands[0]->type->is_vector() ||
990 ir->operands[1]->type->is_vector()) {
991 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
992 inst->conditional_mod = BRW_CONDITIONAL_Z;
993
994 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
995 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
996 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
997 } else {
998 dst_reg temp = result_dst;
999 /* original gen4 does implicit conversion before comparison. */
1000 if (intel->gen < 5)
1001 temp.type = op[0].type;
1002
1003 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
1004 inst->conditional_mod = BRW_CONDITIONAL_Z;
1005 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
1006 }
1007 break;
1008 case ir_binop_any_nequal:
1009 /* "!=" operator producing a scalar boolean. */
1010 if (ir->operands[0]->type->is_vector() ||
1011 ir->operands[1]->type->is_vector()) {
1012 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
1013 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1014
1015 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
1016 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
1017 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1018 } else {
1019 dst_reg temp = result_dst;
1020 /* original gen4 does implicit conversion before comparison. */
1021 if (intel->gen < 5)
1022 temp.type = op[0].type;
1023
1024 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
1025 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1026 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
1027 }
1028 break;
1029
1030 case ir_unop_any:
1031 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
1032 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1033
1034 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
1035
1036 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
1037 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1038 break;
1039
1040 case ir_binop_logic_xor:
1041 emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
1042 break;
1043
1044 case ir_binop_logic_or:
1045 emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
1046 break;
1047
1048 case ir_binop_logic_and:
1049 emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
1050 break;
1051
1052 case ir_binop_dot:
1053 assert(ir->operands[0]->type->is_vector());
1054 assert(ir->operands[0]->type == ir->operands[1]->type);
1055 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1056 break;
1057
1058 case ir_unop_sqrt:
1059 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1060 break;
1061 case ir_unop_rsq:
1062 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1063 break;
1064 case ir_unop_i2f:
1065 case ir_unop_i2u:
1066 case ir_unop_u2i:
1067 case ir_unop_u2f:
1068 case ir_unop_b2f:
1069 case ir_unop_b2i:
1070 case ir_unop_f2i:
1071 emit(BRW_OPCODE_MOV, result_dst, op[0]);
1072 break;
1073 case ir_unop_f2b:
1074 case ir_unop_i2b: {
1075 dst_reg temp = result_dst;
1076 /* original gen4 does implicit conversion before comparison. */
1077 if (intel->gen < 5)
1078 temp.type = op[0].type;
1079
1080 inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f));
1081 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1082 inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1));
1083 break;
1084 }
1085
1086 case ir_unop_trunc:
1087 emit(BRW_OPCODE_RNDZ, result_dst, op[0]);
1088 break;
1089 case ir_unop_ceil:
1090 op[0].negate = !op[0].negate;
1091 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
1092 this->result.negate = true;
1093 break;
1094 case ir_unop_floor:
1095 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
1096 break;
1097 case ir_unop_fract:
1098 inst = emit(BRW_OPCODE_FRC, result_dst, op[0]);
1099 break;
1100 case ir_unop_round_even:
1101 emit(BRW_OPCODE_RNDE, result_dst, op[0]);
1102 break;
1103
1104 case ir_binop_min:
1105 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
1106 inst->conditional_mod = BRW_CONDITIONAL_L;
1107
1108 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1109 inst->predicate = BRW_PREDICATE_NORMAL;
1110 break;
1111 case ir_binop_max:
1112 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
1113 inst->conditional_mod = BRW_CONDITIONAL_G;
1114
1115 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1116 inst->predicate = BRW_PREDICATE_NORMAL;
1117 break;
1118
1119 case ir_binop_pow:
1120 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1121 break;
1122
1123 case ir_unop_bit_not:
1124 inst = emit(BRW_OPCODE_NOT, result_dst, op[0]);
1125 break;
1126 case ir_binop_bit_and:
1127 inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
1128 break;
1129 case ir_binop_bit_xor:
1130 inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
1131 break;
1132 case ir_binop_bit_or:
1133 inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
1134 break;
1135
1136 case ir_binop_lshift:
1137 case ir_binop_rshift:
1138 assert(!"GLSL 1.30 features unsupported");
1139 break;
1140
1141 case ir_quadop_vector:
1142 assert(!"not reached: should be handled by lower_quadop_vector");
1143 break;
1144 }
1145 }
1146
1147
1148 void
1149 vec4_visitor::visit(ir_swizzle *ir)
1150 {
1151 src_reg src;
1152 int i = 0;
1153 int swizzle[4];
1154
1155 /* Note that this is only swizzles in expressions, not those on the left
1156 * hand side of an assignment, which do write masking. See ir_assignment
1157 * for that.
1158 */
1159
1160 ir->val->accept(this);
1161 src = this->result;
1162 assert(src.file != BAD_FILE);
1163
1164 for (i = 0; i < ir->type->vector_elements; i++) {
1165 switch (i) {
1166 case 0:
1167 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1168 break;
1169 case 1:
1170 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1171 break;
1172 case 2:
1173 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1174 break;
1175 case 3:
1176 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1177 break;
1178 }
1179 }
1180 for (; i < 4; i++) {
1181 /* Replicate the last channel out. */
1182 swizzle[i] = swizzle[ir->type->vector_elements - 1];
1183 }
1184
1185 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1186
1187 this->result = src;
1188 }
1189
1190 void
1191 vec4_visitor::visit(ir_dereference_variable *ir)
1192 {
1193 const struct glsl_type *type = ir->type;
1194 dst_reg *reg = variable_storage(ir->var);
1195
1196 if (!reg) {
1197 fail("Failed to find variable storage for %s\n", ir->var->name);
1198 this->result = src_reg(brw_null_reg());
1199 return;
1200 }
1201
1202 this->result = src_reg(*reg);
1203
1204 if (type->is_scalar() || type->is_vector() || type->is_matrix())
1205 this->result.swizzle = swizzle_for_size(type->vector_elements);
1206 }
1207
1208 void
1209 vec4_visitor::visit(ir_dereference_array *ir)
1210 {
1211 ir_constant *constant_index;
1212 src_reg src;
1213 int element_size = type_size(ir->type);
1214
1215 constant_index = ir->array_index->constant_expression_value();
1216
1217 ir->array->accept(this);
1218 src = this->result;
1219
1220 if (constant_index) {
1221 src.reg_offset += constant_index->value.i[0] * element_size;
1222 } else {
1223 /* Variable index array dereference. It eats the "vec4" of the
1224 * base of the array and an index that offsets the Mesa register
1225 * index.
1226 */
1227 ir->array_index->accept(this);
1228
1229 src_reg index_reg;
1230
1231 if (element_size == 1) {
1232 index_reg = this->result;
1233 } else {
1234 index_reg = src_reg(this, glsl_type::int_type);
1235
1236 emit(BRW_OPCODE_MUL, dst_reg(index_reg),
1237 this->result, src_reg(element_size));
1238 }
1239
1240 if (src.reladdr) {
1241 src_reg temp = src_reg(this, glsl_type::int_type);
1242
1243 emit(BRW_OPCODE_ADD, dst_reg(temp), *src.reladdr, index_reg);
1244
1245 index_reg = temp;
1246 }
1247
1248 src.reladdr = ralloc(mem_ctx, src_reg);
1249 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1250 }
1251
1252 /* If the type is smaller than a vec4, replicate the last channel out. */
1253 if (ir->type->is_scalar() || ir->type->is_vector())
1254 src.swizzle = swizzle_for_size(ir->type->vector_elements);
1255 else
1256 src.swizzle = BRW_SWIZZLE_NOOP;
1257 src.type = brw_type_for_base_type(ir->type);
1258
1259 this->result = src;
1260 }
1261
1262 void
1263 vec4_visitor::visit(ir_dereference_record *ir)
1264 {
1265 unsigned int i;
1266 const glsl_type *struct_type = ir->record->type;
1267 int offset = 0;
1268
1269 ir->record->accept(this);
1270
1271 for (i = 0; i < struct_type->length; i++) {
1272 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1273 break;
1274 offset += type_size(struct_type->fields.structure[i].type);
1275 }
1276
1277 /* If the type is smaller than a vec4, replicate the last channel out. */
1278 if (ir->type->is_scalar() || ir->type->is_vector())
1279 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1280 else
1281 this->result.swizzle = BRW_SWIZZLE_NOOP;
1282 this->result.type = brw_type_for_base_type(ir->type);
1283
1284 this->result.reg_offset += offset;
1285 }
1286
1287 /**
1288 * We want to be careful in assignment setup to hit the actual storage
1289 * instead of potentially using a temporary like we might with the
1290 * ir_dereference handler.
1291 */
1292 static dst_reg
1293 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1294 {
1295 /* The LHS must be a dereference. If the LHS is a variable indexed array
1296 * access of a vector, it must be separated into a series conditional moves
1297 * before reaching this point (see ir_vec_index_to_cond_assign).
1298 */
1299 assert(ir->as_dereference());
1300 ir_dereference_array *deref_array = ir->as_dereference_array();
1301 if (deref_array) {
1302 assert(!deref_array->array->type->is_vector());
1303 }
1304
1305 /* Use the rvalue deref handler for the most part. We'll ignore
1306 * swizzles in it and write swizzles using writemask, though.
1307 */
1308 ir->accept(v);
1309 return dst_reg(v->result);
1310 }
1311
1312 void
1313 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1314 const struct glsl_type *type, bool predicated)
1315 {
1316 if (type->base_type == GLSL_TYPE_STRUCT) {
1317 for (unsigned int i = 0; i < type->length; i++) {
1318 emit_block_move(dst, src, type->fields.structure[i].type, predicated);
1319 }
1320 return;
1321 }
1322
1323 if (type->is_array()) {
1324 for (unsigned int i = 0; i < type->length; i++) {
1325 emit_block_move(dst, src, type->fields.array, predicated);
1326 }
1327 return;
1328 }
1329
1330 if (type->is_matrix()) {
1331 const struct glsl_type *vec_type;
1332
1333 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1334 type->vector_elements, 1);
1335
1336 for (int i = 0; i < type->matrix_columns; i++) {
1337 emit_block_move(dst, src, vec_type, predicated);
1338 }
1339 return;
1340 }
1341
1342 assert(type->is_scalar() || type->is_vector());
1343
1344 dst->type = brw_type_for_base_type(type);
1345 src->type = dst->type;
1346
1347 dst->writemask = (1 << type->vector_elements) - 1;
1348
1349 /* Do we need to worry about swizzling a swizzle? */
1350 assert(src->swizzle = BRW_SWIZZLE_NOOP);
1351 src->swizzle = swizzle_for_size(type->vector_elements);
1352
1353 vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src);
1354 if (predicated)
1355 inst->predicate = BRW_PREDICATE_NORMAL;
1356
1357 dst->reg_offset++;
1358 src->reg_offset++;
1359 }
1360
1361
1362 /* If the RHS processing resulted in an instruction generating a
1363 * temporary value, and it would be easy to rewrite the instruction to
1364 * generate its result right into the LHS instead, do so. This ends
1365 * up reliably removing instructions where it can be tricky to do so
1366 * later without real UD chain information.
1367 */
1368 bool
1369 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
1370 dst_reg dst,
1371 src_reg src,
1372 vec4_instruction *pre_rhs_inst,
1373 vec4_instruction *last_rhs_inst)
1374 {
1375 /* This could be supported, but it would take more smarts. */
1376 if (ir->condition)
1377 return false;
1378
1379 if (pre_rhs_inst == last_rhs_inst)
1380 return false; /* No instructions generated to work with. */
1381
1382 /* Make sure the last instruction generated our source reg. */
1383 if (src.file != GRF ||
1384 src.file != last_rhs_inst->dst.file ||
1385 src.reg != last_rhs_inst->dst.reg ||
1386 src.reg_offset != last_rhs_inst->dst.reg_offset ||
1387 src.reladdr ||
1388 src.abs ||
1389 src.negate ||
1390 last_rhs_inst->predicate != BRW_PREDICATE_NONE)
1391 return false;
1392
1393 /* Check that that last instruction fully initialized the channels
1394 * we want to use, in the order we want to use them. We could
1395 * potentially reswizzle the operands of many instructions so that
1396 * we could handle out of order channels, but don't yet.
1397 */
1398 for (int i = 0; i < 4; i++) {
1399 if (dst.writemask & (1 << i)) {
1400 if (!(last_rhs_inst->dst.writemask & (1 << i)))
1401 return false;
1402
1403 if (BRW_GET_SWZ(src.swizzle, i) != i)
1404 return false;
1405 }
1406 }
1407
1408 /* Success! Rewrite the instruction. */
1409 last_rhs_inst->dst.file = dst.file;
1410 last_rhs_inst->dst.reg = dst.reg;
1411 last_rhs_inst->dst.reg_offset = dst.reg_offset;
1412 last_rhs_inst->dst.reladdr = dst.reladdr;
1413 last_rhs_inst->dst.writemask &= dst.writemask;
1414
1415 return true;
1416 }
1417
1418 void
1419 vec4_visitor::visit(ir_assignment *ir)
1420 {
1421 dst_reg dst = get_assignment_lhs(ir->lhs, this);
1422
1423 if (!ir->lhs->type->is_scalar() &&
1424 !ir->lhs->type->is_vector()) {
1425 ir->rhs->accept(this);
1426 src_reg src = this->result;
1427
1428 if (ir->condition) {
1429 emit_bool_to_cond_code(ir->condition);
1430 }
1431
1432 emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL);
1433 return;
1434 }
1435
1436 /* Now we're down to just a scalar/vector with writemasks. */
1437 int i;
1438
1439 vec4_instruction *pre_rhs_inst, *last_rhs_inst;
1440 pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1441
1442 ir->rhs->accept(this);
1443
1444 last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1445
1446 src_reg src = this->result;
1447
1448 int swizzles[4];
1449 int first_enabled_chan = 0;
1450 int src_chan = 0;
1451
1452 assert(ir->lhs->type->is_vector() ||
1453 ir->lhs->type->is_scalar());
1454 dst.writemask = ir->write_mask;
1455
1456 for (int i = 0; i < 4; i++) {
1457 if (dst.writemask & (1 << i)) {
1458 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1459 break;
1460 }
1461 }
1462
1463 /* Swizzle a small RHS vector into the channels being written.
1464 *
1465 * glsl ir treats write_mask as dictating how many channels are
1466 * present on the RHS while in our instructions we need to make
1467 * those channels appear in the slots of the vec4 they're written to.
1468 */
1469 for (int i = 0; i < 4; i++) {
1470 if (dst.writemask & (1 << i))
1471 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1472 else
1473 swizzles[i] = first_enabled_chan;
1474 }
1475 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1476 swizzles[2], swizzles[3]);
1477
1478 if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
1479 return;
1480 }
1481
1482 if (ir->condition) {
1483 emit_bool_to_cond_code(ir->condition);
1484 }
1485
1486 for (i = 0; i < type_size(ir->lhs->type); i++) {
1487 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
1488
1489 if (ir->condition)
1490 inst->predicate = BRW_PREDICATE_NORMAL;
1491
1492 dst.reg_offset++;
1493 src.reg_offset++;
1494 }
1495 }
1496
1497 void
1498 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
1499 {
1500 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1501 foreach_list(node, &ir->components) {
1502 ir_constant *field_value = (ir_constant *)node;
1503
1504 emit_constant_values(dst, field_value);
1505 }
1506 return;
1507 }
1508
1509 if (ir->type->is_array()) {
1510 for (unsigned int i = 0; i < ir->type->length; i++) {
1511 emit_constant_values(dst, ir->array_elements[i]);
1512 }
1513 return;
1514 }
1515
1516 if (ir->type->is_matrix()) {
1517 for (int i = 0; i < ir->type->matrix_columns; i++) {
1518 for (int j = 0; j < ir->type->vector_elements; j++) {
1519 dst->writemask = 1 << j;
1520 dst->type = BRW_REGISTER_TYPE_F;
1521
1522 emit(BRW_OPCODE_MOV, *dst,
1523 src_reg(ir->value.f[i * ir->type->vector_elements + j]));
1524 }
1525 dst->reg_offset++;
1526 }
1527 return;
1528 }
1529
1530 for (int i = 0; i < ir->type->vector_elements; i++) {
1531 dst->writemask = 1 << i;
1532 dst->type = brw_type_for_base_type(ir->type);
1533
1534 switch (ir->type->base_type) {
1535 case GLSL_TYPE_FLOAT:
1536 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i]));
1537 break;
1538 case GLSL_TYPE_INT:
1539 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.i[i]));
1540 break;
1541 case GLSL_TYPE_UINT:
1542 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.u[i]));
1543 break;
1544 case GLSL_TYPE_BOOL:
1545 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.b[i]));
1546 break;
1547 default:
1548 assert(!"Non-float/uint/int/bool constant");
1549 break;
1550 }
1551 }
1552 dst->reg_offset++;
1553 }
1554
1555 void
1556 vec4_visitor::visit(ir_constant *ir)
1557 {
1558 dst_reg dst = dst_reg(this, ir->type);
1559 this->result = src_reg(dst);
1560
1561 emit_constant_values(&dst, ir);
1562 }
1563
1564 void
1565 vec4_visitor::visit(ir_call *ir)
1566 {
1567 assert(!"not reached");
1568 }
1569
1570 void
1571 vec4_visitor::visit(ir_texture *ir)
1572 {
1573 /* FINISHME: Implement vertex texturing.
1574 *
1575 * With 0 vertex samplers available, the linker will reject
1576 * programs that do vertex texturing, but after our visitor has
1577 * run.
1578 */
1579 }
1580
1581 void
1582 vec4_visitor::visit(ir_return *ir)
1583 {
1584 assert(!"not reached");
1585 }
1586
1587 void
1588 vec4_visitor::visit(ir_discard *ir)
1589 {
1590 assert(!"not reached");
1591 }
1592
1593 void
1594 vec4_visitor::visit(ir_if *ir)
1595 {
1596 /* Don't point the annotation at the if statement, because then it plus
1597 * the then and else blocks get printed.
1598 */
1599 this->base_ir = ir->condition;
1600
1601 if (intel->gen == 6) {
1602 emit_if_gen6(ir);
1603 } else {
1604 emit_bool_to_cond_code(ir->condition);
1605 vec4_instruction *inst = emit(BRW_OPCODE_IF);
1606 inst->predicate = BRW_PREDICATE_NORMAL;
1607 }
1608
1609 visit_instructions(&ir->then_instructions);
1610
1611 if (!ir->else_instructions.is_empty()) {
1612 this->base_ir = ir->condition;
1613 emit(BRW_OPCODE_ELSE);
1614
1615 visit_instructions(&ir->else_instructions);
1616 }
1617
1618 this->base_ir = ir->condition;
1619 emit(BRW_OPCODE_ENDIF);
1620 }
1621
1622 int
1623 vec4_visitor::emit_vue_header_gen4(int header_mrf)
1624 {
1625 /* Get the position */
1626 src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
1627
1628 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
1629 dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
1630
1631 current_annotation = "NDC";
1632 dst_reg ndc_w = ndc;
1633 ndc_w.writemask = WRITEMASK_W;
1634 src_reg pos_w = pos;
1635 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
1636 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
1637
1638 dst_reg ndc_xyz = ndc;
1639 ndc_xyz.writemask = WRITEMASK_XYZ;
1640
1641 emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w));
1642
1643 if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
1644 c->key.nr_userclip || brw->has_negative_rhw_bug) {
1645 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
1646 GLuint i;
1647
1648 emit(BRW_OPCODE_MOV, header1, 0u);
1649
1650 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1651 assert(!"finishme: psiz");
1652 src_reg psiz;
1653
1654 header1.writemask = WRITEMASK_W;
1655 emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11);
1656 emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8);
1657 }
1658
1659 for (i = 0; i < c->key.nr_userclip; i++) {
1660 vec4_instruction *inst;
1661
1662 inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()),
1663 pos, src_reg(c->userplane[i]));
1664 inst->conditional_mod = BRW_CONDITIONAL_L;
1665
1666 emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i);
1667 inst->predicate = BRW_PREDICATE_NORMAL;
1668 }
1669
1670 /* i965 clipping workaround:
1671 * 1) Test for -ve rhw
1672 * 2) If set,
1673 * set ndc = (0,0,0,0)
1674 * set ucp[6] = 1
1675 *
1676 * Later, clipping will detect ucp[6] and ensure the primitive is
1677 * clipped against all fixed planes.
1678 */
1679 if (brw->has_negative_rhw_bug) {
1680 #if 0
1681 /* FINISHME */
1682 brw_CMP(p,
1683 vec8(brw_null_reg()),
1684 BRW_CONDITIONAL_L,
1685 brw_swizzle1(ndc, 3),
1686 brw_imm_f(0));
1687
1688 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
1689 brw_MOV(p, ndc, brw_imm_f(0));
1690 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1691 #endif
1692 }
1693
1694 header1.writemask = WRITEMASK_XYZW;
1695 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1));
1696 } else {
1697 emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++),
1698 BRW_REGISTER_TYPE_UD), 0u);
1699 }
1700
1701 if (intel->gen == 5) {
1702 /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
1703 * dword 0-3 (m1) of the header is indices, point width, clip flags.
1704 * dword 4-7 (m2) is the ndc position (set above)
1705 * dword 8-11 (m3) of the vertex header is the 4D space position
1706 * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
1707 * m6 is a pad so that the vertex element data is aligned
1708 * m7 is the first vertex data we fill.
1709 */
1710 current_annotation = "NDC";
1711 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1712
1713 current_annotation = "gl_Position";
1714 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1715
1716 /* user clip distance. */
1717 header_mrf += 2;
1718
1719 /* Pad so that vertex element data is aligned. */
1720 header_mrf++;
1721 } else {
1722 /* There are 8 dwords in VUE header pre-Ironlake:
1723 * dword 0-3 (m1) is indices, point width, clip flags.
1724 * dword 4-7 (m2) is ndc position (set above)
1725 *
1726 * dword 8-11 (m3) is the first vertex data.
1727 */
1728 current_annotation = "NDC";
1729 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1730
1731 current_annotation = "gl_Position";
1732 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1733 }
1734
1735 return header_mrf;
1736 }
1737
1738 int
1739 vec4_visitor::emit_vue_header_gen6(int header_mrf)
1740 {
1741 struct brw_reg reg;
1742
1743 /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
1744 * dword 0-3 (m2) of the header is indices, point width, clip flags.
1745 * dword 4-7 (m3) is the 4D space position
1746 * dword 8-15 (m4,m5) of the vertex header is the user clip distance if
1747 * enabled.
1748 *
1749 * m4 or 6 is the first vertex element data we fill.
1750 */
1751
1752 current_annotation = "indices, point width, clip flags";
1753 reg = brw_message_reg(header_mrf++);
1754 emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0));
1755 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1756 emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W),
1757 src_reg(output_reg[VERT_RESULT_PSIZ]));
1758 }
1759
1760 current_annotation = "gl_Position";
1761 emit(BRW_OPCODE_MOV,
1762 brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS]));
1763
1764 current_annotation = "user clip distances";
1765 if (c->key.nr_userclip) {
1766 for (int i = 0; i < c->key.nr_userclip; i++) {
1767 struct brw_reg m;
1768 if (i < 4)
1769 m = brw_message_reg(header_mrf);
1770 else
1771 m = brw_message_reg(header_mrf + 1);
1772
1773 emit(BRW_OPCODE_DP4,
1774 dst_reg(brw_writemask(m, 1 << (i & 3))),
1775 src_reg(c->userplane[i]));
1776 }
1777 header_mrf += 2;
1778 }
1779
1780 current_annotation = NULL;
1781
1782 return header_mrf;
1783 }
1784
1785 static int
1786 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
1787 {
1788 struct intel_context *intel = &brw->intel;
1789
1790 if (intel->gen >= 6) {
1791 /* URB data written (does not include the message header reg) must
1792 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
1793 * section 5.4.3.2.2: URB_INTERLEAVED.
1794 *
1795 * URB entries are allocated on a multiple of 1024 bits, so an
1796 * extra 128 bits written here to make the end align to 256 is
1797 * no problem.
1798 */
1799 if ((mlen % 2) != 1)
1800 mlen++;
1801 }
1802
1803 return mlen;
1804 }
1805
1806 /**
1807 * Generates the VUE payload plus the 1 or 2 URB write instructions to
1808 * complete the VS thread.
1809 *
1810 * The VUE layout is documented in Volume 2a.
1811 */
1812 void
1813 vec4_visitor::emit_urb_writes()
1814 {
1815 /* MRF 0 is reserved for the debugger, so start with message header
1816 * in MRF 1.
1817 */
1818 int base_mrf = 1;
1819 int mrf = base_mrf;
1820 int urb_entry_size;
1821 uint64_t outputs_remaining = c->prog_data.outputs_written;
1822 /* In the process of generating our URB write message contents, we
1823 * may need to unspill a register or load from an array. Those
1824 * reads would use MRFs 14-15.
1825 */
1826 int max_usable_mrf = 13;
1827
1828 /* FINISHME: edgeflag */
1829
1830 /* First mrf is the g0-based message header containing URB handles and such,
1831 * which is implied in VS_OPCODE_URB_WRITE.
1832 */
1833 mrf++;
1834
1835 if (intel->gen >= 6) {
1836 mrf = emit_vue_header_gen6(mrf);
1837 } else {
1838 mrf = emit_vue_header_gen4(mrf);
1839 }
1840
1841 /* Set up the VUE data for the first URB write */
1842 int attr;
1843 for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
1844 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1845 continue;
1846
1847 outputs_remaining &= ~BITFIELD64_BIT(attr);
1848
1849 /* This is set up in the VUE header. */
1850 if (attr == VERT_RESULT_HPOS)
1851 continue;
1852
1853 /* This is loaded into the VUE header, and thus doesn't occupy
1854 * an attribute slot.
1855 */
1856 if (attr == VERT_RESULT_PSIZ)
1857 continue;
1858
1859 vec4_instruction *inst = emit(BRW_OPCODE_MOV, brw_message_reg(mrf++),
1860 src_reg(output_reg[attr]));
1861
1862 if ((attr == VERT_RESULT_COL0 ||
1863 attr == VERT_RESULT_COL1 ||
1864 attr == VERT_RESULT_BFC0 ||
1865 attr == VERT_RESULT_BFC1) &&
1866 c->key.clamp_vertex_color) {
1867 inst->saturate = true;
1868 }
1869
1870 /* If this was MRF 15, we can't fit anything more into this URB
1871 * WRITE. Note that base_mrf of 1 means that MRF 15 is an
1872 * even-numbered amount of URB write data, which will meet
1873 * gen6's requirements for length alignment.
1874 */
1875 if (mrf > max_usable_mrf) {
1876 attr++;
1877 break;
1878 }
1879 }
1880
1881 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
1882 inst->base_mrf = base_mrf;
1883 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1884 inst->eot = !outputs_remaining;
1885
1886 urb_entry_size = mrf - base_mrf;
1887
1888 /* Optional second URB write */
1889 if (outputs_remaining) {
1890 mrf = base_mrf + 1;
1891
1892 for (; attr < VERT_RESULT_MAX; attr++) {
1893 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1894 continue;
1895
1896 assert(mrf < max_usable_mrf);
1897
1898 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
1899 }
1900
1901 inst = emit(VS_OPCODE_URB_WRITE);
1902 inst->base_mrf = base_mrf;
1903 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1904 inst->eot = true;
1905 /* URB destination offset. In the previous write, we got MRFs
1906 * 2-13 minus the one header MRF, so 12 regs. URB offset is in
1907 * URB row increments, and each of our MRFs is half of one of
1908 * those, since we're doing interleaved writes.
1909 */
1910 inst->offset = (max_usable_mrf - base_mrf) / 2;
1911
1912 urb_entry_size += mrf - base_mrf;
1913 }
1914
1915 if (intel->gen == 6)
1916 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8;
1917 else
1918 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4;
1919 }
1920
1921 src_reg
1922 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
1923 src_reg *reladdr, int reg_offset)
1924 {
1925 /* Because we store the values to scratch interleaved like our
1926 * vertex data, we need to scale the vec4 index by 2.
1927 */
1928 int message_header_scale = 2;
1929
1930 /* Pre-gen6, the message header uses byte offsets instead of vec4
1931 * (16-byte) offset units.
1932 */
1933 if (intel->gen < 6)
1934 message_header_scale *= 16;
1935
1936 if (reladdr) {
1937 src_reg index = src_reg(this, glsl_type::int_type);
1938
1939 vec4_instruction *add = emit(BRW_OPCODE_ADD,
1940 dst_reg(index),
1941 *reladdr,
1942 src_reg(reg_offset));
1943 /* Move our new instruction from the tail to its correct place. */
1944 add->remove();
1945 inst->insert_before(add);
1946
1947 vec4_instruction *mul = emit(BRW_OPCODE_MUL, dst_reg(index),
1948 index, src_reg(message_header_scale));
1949 mul->remove();
1950 inst->insert_before(mul);
1951
1952 return index;
1953 } else {
1954 return src_reg(reg_offset * message_header_scale);
1955 }
1956 }
1957
1958 src_reg
1959 vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
1960 src_reg *reladdr, int reg_offset)
1961 {
1962 if (reladdr) {
1963 src_reg index = src_reg(this, glsl_type::int_type);
1964
1965 vec4_instruction *add = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_ADD,
1966 dst_reg(index),
1967 *reladdr,
1968 src_reg(reg_offset));
1969 add->ir = inst->ir;
1970 add->annotation = inst->annotation;
1971 inst->insert_before(add);
1972
1973 /* Pre-gen6, the message header uses byte offsets instead of vec4
1974 * (16-byte) offset units.
1975 */
1976 if (intel->gen < 6) {
1977 vec4_instruction *mul = new(mem_ctx) vec4_instruction(this,
1978 BRW_OPCODE_MUL,
1979 dst_reg(index),
1980 index,
1981 src_reg(16));
1982 mul->ir = inst->ir;
1983 mul->annotation = inst->annotation;
1984 inst->insert_before(mul);
1985 }
1986
1987 return index;
1988 } else {
1989 int message_header_scale = intel->gen < 6 ? 16 : 1;
1990 return src_reg(reg_offset * message_header_scale);
1991 }
1992 }
1993
1994 /**
1995 * Emits an instruction before @inst to load the value named by @orig_src
1996 * from scratch space at @base_offset to @temp.
1997 */
1998 void
1999 vec4_visitor::emit_scratch_read(vec4_instruction *inst,
2000 dst_reg temp, src_reg orig_src,
2001 int base_offset)
2002 {
2003 int reg_offset = base_offset + orig_src.reg_offset;
2004 src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
2005
2006 vec4_instruction *scratch_read_inst = emit(VS_OPCODE_SCRATCH_READ,
2007 temp, index);
2008
2009 scratch_read_inst->base_mrf = 14;
2010 scratch_read_inst->mlen = 1;
2011 /* Move our instruction from the tail to its correct place. */
2012 scratch_read_inst->remove();
2013 inst->insert_before(scratch_read_inst);
2014 }
2015
2016 /**
2017 * Emits an instruction after @inst to store the value to be written
2018 * to @orig_dst to scratch space at @base_offset, from @temp.
2019 */
2020 void
2021 vec4_visitor::emit_scratch_write(vec4_instruction *inst,
2022 src_reg temp, dst_reg orig_dst,
2023 int base_offset)
2024 {
2025 int reg_offset = base_offset + orig_dst.reg_offset;
2026 src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset);
2027
2028 dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
2029 orig_dst.writemask));
2030 vec4_instruction *scratch_write_inst = emit(VS_OPCODE_SCRATCH_WRITE,
2031 dst, temp, index);
2032 scratch_write_inst->base_mrf = 13;
2033 scratch_write_inst->mlen = 2;
2034 scratch_write_inst->predicate = inst->predicate;
2035 /* Move our instruction from the tail to its correct place. */
2036 scratch_write_inst->remove();
2037 inst->insert_after(scratch_write_inst);
2038 }
2039
2040 /**
2041 * We can't generally support array access in GRF space, because a
2042 * single instruction's destination can only span 2 contiguous
2043 * registers. So, we send all GRF arrays that get variable index
2044 * access to scratch space.
2045 */
2046 void
2047 vec4_visitor::move_grf_array_access_to_scratch()
2048 {
2049 int scratch_loc[this->virtual_grf_count];
2050
2051 for (int i = 0; i < this->virtual_grf_count; i++) {
2052 scratch_loc[i] = -1;
2053 }
2054
2055 /* First, calculate the set of virtual GRFs that need to be punted
2056 * to scratch due to having any array access on them, and where in
2057 * scratch.
2058 */
2059 foreach_list(node, &this->instructions) {
2060 vec4_instruction *inst = (vec4_instruction *)node;
2061
2062 if (inst->dst.file == GRF && inst->dst.reladdr &&
2063 scratch_loc[inst->dst.reg] == -1) {
2064 scratch_loc[inst->dst.reg] = c->last_scratch;
2065 c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4;
2066 }
2067
2068 for (int i = 0 ; i < 3; i++) {
2069 src_reg *src = &inst->src[i];
2070
2071 if (src->file == GRF && src->reladdr &&
2072 scratch_loc[src->reg] == -1) {
2073 scratch_loc[src->reg] = c->last_scratch;
2074 c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4;
2075 }
2076 }
2077 }
2078
2079 /* Now, for anything that will be accessed through scratch, rewrite
2080 * it to load/store. Note that this is a _safe list walk, because
2081 * we may generate a new scratch_write instruction after the one
2082 * we're processing.
2083 */
2084 foreach_list_safe(node, &this->instructions) {
2085 vec4_instruction *inst = (vec4_instruction *)node;
2086
2087 /* Set up the annotation tracking for new generated instructions. */
2088 base_ir = inst->ir;
2089 current_annotation = inst->annotation;
2090
2091 if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
2092 src_reg temp = src_reg(this, glsl_type::vec4_type);
2093
2094 emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]);
2095
2096 inst->dst.file = temp.file;
2097 inst->dst.reg = temp.reg;
2098 inst->dst.reg_offset = temp.reg_offset;
2099 inst->dst.reladdr = NULL;
2100 }
2101
2102 for (int i = 0 ; i < 3; i++) {
2103 if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
2104 continue;
2105
2106 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2107
2108 emit_scratch_read(inst, temp, inst->src[i],
2109 scratch_loc[inst->src[i].reg]);
2110
2111 inst->src[i].file = temp.file;
2112 inst->src[i].reg = temp.reg;
2113 inst->src[i].reg_offset = temp.reg_offset;
2114 inst->src[i].reladdr = NULL;
2115 }
2116 }
2117 }
2118
2119 /**
2120 * Emits an instruction before @inst to load the value named by @orig_src
2121 * from the pull constant buffer (surface) at @base_offset to @temp.
2122 */
2123 void
2124 vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
2125 dst_reg temp, src_reg orig_src,
2126 int base_offset)
2127 {
2128 int reg_offset = base_offset + orig_src.reg_offset;
2129 src_reg index = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
2130 vec4_instruction *load;
2131
2132 load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
2133 temp, index);
2134 load->annotation = inst->annotation;
2135 load->ir = inst->ir;
2136 load->base_mrf = 14;
2137 load->mlen = 1;
2138 inst->insert_before(load);
2139 }
2140
2141 /**
2142 * Implements array access of uniforms by inserting a
2143 * PULL_CONSTANT_LOAD instruction.
2144 *
2145 * Unlike temporary GRF array access (where we don't support it due to
2146 * the difficulty of doing relative addressing on instruction
2147 * destinations), we could potentially do array access of uniforms
2148 * that were loaded in GRF space as push constants. In real-world
2149 * usage we've seen, though, the arrays being used are always larger
2150 * than we could load as push constants, so just always move all
2151 * uniform array access out to a pull constant buffer.
2152 */
2153 void
2154 vec4_visitor::move_uniform_array_access_to_pull_constants()
2155 {
2156 int pull_constant_loc[this->uniforms];
2157
2158 for (int i = 0; i < this->uniforms; i++) {
2159 pull_constant_loc[i] = -1;
2160 }
2161
2162 /* Walk through and find array access of uniforms. Put a copy of that
2163 * uniform in the pull constant buffer.
2164 *
2165 * Note that we don't move constant-indexed accesses to arrays. No
2166 * testing has been done of the performance impact of this choice.
2167 */
2168 foreach_list_safe(node, &this->instructions) {
2169 vec4_instruction *inst = (vec4_instruction *)node;
2170
2171 for (int i = 0 ; i < 3; i++) {
2172 if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
2173 continue;
2174
2175 int uniform = inst->src[i].reg;
2176
2177 /* If this array isn't already present in the pull constant buffer,
2178 * add it.
2179 */
2180 if (pull_constant_loc[uniform] == -1) {
2181 const float **values = &prog_data->param[uniform * 4];
2182
2183 pull_constant_loc[uniform] = prog_data->nr_pull_params;
2184
2185 for (int j = 0; j < uniform_size[uniform] * 4; j++) {
2186 prog_data->pull_param[prog_data->nr_pull_params++] = values[j];
2187 }
2188 }
2189
2190 /* Set up the annotation tracking for new generated instructions. */
2191 base_ir = inst->ir;
2192 current_annotation = inst->annotation;
2193
2194 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2195
2196 emit_pull_constant_load(inst, temp, inst->src[i],
2197 pull_constant_loc[uniform]);
2198
2199 inst->src[i].file = temp.file;
2200 inst->src[i].reg = temp.reg;
2201 inst->src[i].reg_offset = temp.reg_offset;
2202 inst->src[i].reladdr = NULL;
2203 }
2204 }
2205 }
2206
2207 vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
2208 struct gl_shader_program *prog,
2209 struct brw_shader *shader)
2210 {
2211 this->c = c;
2212 this->p = &c->func;
2213 this->brw = p->brw;
2214 this->intel = &brw->intel;
2215 this->ctx = &intel->ctx;
2216 this->prog = prog;
2217 this->shader = shader;
2218
2219 this->mem_ctx = ralloc_context(NULL);
2220 this->failed = false;
2221
2222 this->base_ir = NULL;
2223 this->current_annotation = NULL;
2224
2225 this->c = c;
2226 this->vp = prog->VertexProgram;
2227 this->prog_data = &c->prog_data;
2228
2229 this->variable_ht = hash_table_ctor(0,
2230 hash_table_pointer_hash,
2231 hash_table_pointer_compare);
2232
2233 this->virtual_grf_def = NULL;
2234 this->virtual_grf_use = NULL;
2235 this->virtual_grf_sizes = NULL;
2236 this->virtual_grf_count = 0;
2237 this->virtual_grf_array_size = 0;
2238 this->live_intervals_valid = false;
2239
2240 this->uniforms = 0;
2241
2242 this->variable_ht = hash_table_ctor(0,
2243 hash_table_pointer_hash,
2244 hash_table_pointer_compare);
2245 }
2246
2247 vec4_visitor::~vec4_visitor()
2248 {
2249 ralloc_free(this->mem_ctx);
2250 hash_table_dtor(this->variable_ht);
2251 }
2252
2253
2254 void
2255 vec4_visitor::fail(const char *format, ...)
2256 {
2257 va_list va;
2258 char *msg;
2259
2260 if (failed)
2261 return;
2262
2263 failed = true;
2264
2265 va_start(va, format);
2266 msg = ralloc_vasprintf(mem_ctx, format, va);
2267 va_end(va);
2268 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
2269
2270 this->fail_msg = msg;
2271
2272 if (INTEL_DEBUG & DEBUG_VS) {
2273 fprintf(stderr, "%s", msg);
2274 }
2275 }
2276
2277 } /* namespace brw */