5e2b3e5a5feac250192666241ac54335371836c6
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_visitor.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25 extern "C" {
26 #include "main/macros.h"
27 #include "program/prog_parameter.h"
28 }
29
30 namespace brw {
31
32 src_reg::src_reg(dst_reg reg)
33 {
34 init();
35
36 this->file = reg.file;
37 this->reg = reg.reg;
38 this->reg_offset = reg.reg_offset;
39 this->type = reg.type;
40
41 int swizzles[4];
42 int next_chan = 0;
43 int last = 0;
44
45 for (int i = 0; i < 4; i++) {
46 if (!(reg.writemask & (1 << i)))
47 continue;
48
49 swizzles[next_chan++] = last = i;
50 }
51
52 for (; next_chan < 4; next_chan++) {
53 swizzles[next_chan] = last;
54 }
55
56 this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
57 swizzles[2], swizzles[3]);
58 }
59
60 dst_reg::dst_reg(src_reg reg)
61 {
62 init();
63
64 this->file = reg.file;
65 this->reg = reg.reg;
66 this->reg_offset = reg.reg_offset;
67 this->type = reg.type;
68 this->writemask = WRITEMASK_XYZW;
69 }
70
71 vec4_instruction *
72 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
73 src_reg src0, src_reg src1, src_reg src2)
74 {
75 vec4_instruction *inst = new(mem_ctx) vec4_instruction();
76
77 inst->opcode = opcode;
78 inst->dst = dst;
79 inst->src[0] = src0;
80 inst->src[1] = src1;
81 inst->src[2] = src2;
82 inst->ir = this->base_ir;
83 inst->annotation = this->current_annotation;
84
85 this->instructions.push_tail(inst);
86
87 return inst;
88 }
89
90
91 vec4_instruction *
92 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
93 {
94 return emit(opcode, dst, src0, src1, src_reg());
95 }
96
97 vec4_instruction *
98 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
99 {
100 assert(dst.writemask != 0);
101 return emit(opcode, dst, src0, src_reg(), src_reg());
102 }
103
104 vec4_instruction *
105 vec4_visitor::emit(enum opcode opcode)
106 {
107 return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg());
108 }
109
110 void
111 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
112 {
113 static enum opcode dot_opcodes[] = {
114 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
115 };
116
117 emit(dot_opcodes[elements - 2], dst, src0, src1);
118 }
119
120 void
121 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
122 {
123 /* The gen6 math instruction ignores the source modifiers --
124 * swizzle, abs, negate, and at least some parts of the register
125 * region description.
126 */
127 src_reg temp_src = src_reg(this, glsl_type::vec4_type);
128 emit(BRW_OPCODE_MOV, dst_reg(temp_src), src);
129
130 emit(opcode, dst, temp_src);
131 }
132
133 void
134 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
135 {
136 vec4_instruction *inst = emit(opcode, dst, src);
137 inst->base_mrf = 1;
138 inst->mlen = 1;
139 }
140
141 void
142 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
143 {
144 switch (opcode) {
145 case SHADER_OPCODE_RCP:
146 case SHADER_OPCODE_RSQ:
147 case SHADER_OPCODE_SQRT:
148 case SHADER_OPCODE_EXP2:
149 case SHADER_OPCODE_LOG2:
150 case SHADER_OPCODE_SIN:
151 case SHADER_OPCODE_COS:
152 break;
153 default:
154 assert(!"not reached: bad math opcode");
155 return;
156 }
157
158 if (intel->gen >= 6) {
159 return emit_math1_gen6(opcode, dst, src);
160 } else {
161 return emit_math1_gen4(opcode, dst, src);
162 }
163 }
164
165 void
166 vec4_visitor::emit_math2_gen6(enum opcode opcode,
167 dst_reg dst, src_reg src0, src_reg src1)
168 {
169 src_reg expanded;
170
171 /* The gen6 math instruction ignores the source modifiers --
172 * swizzle, abs, negate, and at least some parts of the register
173 * region description. Move the sources to temporaries to make it
174 * generally work.
175 */
176
177 expanded = src_reg(this, glsl_type::vec4_type);
178 emit(BRW_OPCODE_MOV, dst, src0);
179 src0 = expanded;
180
181 expanded = src_reg(this, glsl_type::vec4_type);
182 emit(BRW_OPCODE_MOV, dst, src1);
183 src1 = expanded;
184
185 emit(opcode, dst, src0, src1);
186 }
187
188 void
189 vec4_visitor::emit_math2_gen4(enum opcode opcode,
190 dst_reg dst, src_reg src0, src_reg src1)
191 {
192 vec4_instruction *inst = emit(opcode, dst, src0, src1);
193 inst->base_mrf = 1;
194 inst->mlen = 2;
195 }
196
197 void
198 vec4_visitor::emit_math(enum opcode opcode,
199 dst_reg dst, src_reg src0, src_reg src1)
200 {
201 assert(opcode == SHADER_OPCODE_POW);
202
203 if (intel->gen >= 6) {
204 return emit_math2_gen6(opcode, dst, src0, src1);
205 } else {
206 return emit_math2_gen4(opcode, dst, src0, src1);
207 }
208 }
209
210 void
211 vec4_visitor::visit_instructions(const exec_list *list)
212 {
213 foreach_iter(exec_list_iterator, iter, *list) {
214 ir_instruction *ir = (ir_instruction *)iter.get();
215
216 base_ir = ir;
217 ir->accept(this);
218 }
219 }
220
221
222 static int
223 type_size(const struct glsl_type *type)
224 {
225 unsigned int i;
226 int size;
227
228 switch (type->base_type) {
229 case GLSL_TYPE_UINT:
230 case GLSL_TYPE_INT:
231 case GLSL_TYPE_FLOAT:
232 case GLSL_TYPE_BOOL:
233 if (type->is_matrix()) {
234 return type->matrix_columns;
235 } else {
236 /* Regardless of size of vector, it gets a vec4. This is bad
237 * packing for things like floats, but otherwise arrays become a
238 * mess. Hopefully a later pass over the code can pack scalars
239 * down if appropriate.
240 */
241 return 1;
242 }
243 case GLSL_TYPE_ARRAY:
244 assert(type->length > 0);
245 return type_size(type->fields.array) * type->length;
246 case GLSL_TYPE_STRUCT:
247 size = 0;
248 for (i = 0; i < type->length; i++) {
249 size += type_size(type->fields.structure[i].type);
250 }
251 return size;
252 case GLSL_TYPE_SAMPLER:
253 /* Samplers take up one slot in UNIFORMS[], but they're baked in
254 * at link time.
255 */
256 return 1;
257 default:
258 assert(0);
259 return 0;
260 }
261 }
262
263 int
264 vec4_visitor::virtual_grf_alloc(int size)
265 {
266 if (virtual_grf_array_size <= virtual_grf_count) {
267 if (virtual_grf_array_size == 0)
268 virtual_grf_array_size = 16;
269 else
270 virtual_grf_array_size *= 2;
271 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
272 virtual_grf_array_size);
273 }
274 virtual_grf_sizes[virtual_grf_count] = size;
275 return virtual_grf_count++;
276 }
277
278 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
279 {
280 init();
281
282 this->file = GRF;
283 this->reg = v->virtual_grf_alloc(type_size(type));
284
285 if (type->is_array() || type->is_record()) {
286 this->swizzle = BRW_SWIZZLE_NOOP;
287 } else {
288 this->swizzle = swizzle_for_size(type->vector_elements);
289 }
290
291 this->type = brw_type_for_base_type(type);
292 }
293
294 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
295 {
296 init();
297
298 this->file = GRF;
299 this->reg = v->virtual_grf_alloc(type_size(type));
300
301 if (type->is_array() || type->is_record()) {
302 this->writemask = WRITEMASK_XYZW;
303 } else {
304 this->writemask = (1 << type->vector_elements) - 1;
305 }
306
307 this->type = brw_type_for_base_type(type);
308 }
309
310 /* Our support for uniforms is piggy-backed on the struct
311 * gl_fragment_program, because that's where the values actually
312 * get stored, rather than in some global gl_shader_program uniform
313 * store.
314 */
315 int
316 vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
317 {
318 unsigned int offset = 0;
319 float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
320
321 if (type->is_matrix()) {
322 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
323 type->vector_elements,
324 1);
325
326 for (unsigned int i = 0; i < type->matrix_columns; i++) {
327 offset += setup_uniform_values(loc + offset, column);
328 }
329
330 return offset;
331 }
332
333 switch (type->base_type) {
334 case GLSL_TYPE_FLOAT:
335 case GLSL_TYPE_UINT:
336 case GLSL_TYPE_INT:
337 case GLSL_TYPE_BOOL:
338 for (unsigned int i = 0; i < type->vector_elements; i++) {
339 int slot = this->uniforms * 4 + i;
340 switch (type->base_type) {
341 case GLSL_TYPE_FLOAT:
342 c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
343 break;
344 case GLSL_TYPE_UINT:
345 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U;
346 break;
347 case GLSL_TYPE_INT:
348 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I;
349 break;
350 case GLSL_TYPE_BOOL:
351 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B;
352 break;
353 default:
354 assert(!"not reached");
355 c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
356 break;
357 }
358 c->prog_data.param[slot] = &values[i];
359 }
360
361 for (unsigned int i = type->vector_elements; i < 4; i++) {
362 c->prog_data.param_convert[this->uniforms * 4 + i] =
363 PARAM_CONVERT_ZERO;
364 c->prog_data.param[this->uniforms * 4 + i] = NULL;
365 }
366
367 this->uniform_size[this->uniforms] = type->vector_elements;
368 this->uniforms++;
369
370 return 1;
371
372 case GLSL_TYPE_STRUCT:
373 for (unsigned int i = 0; i < type->length; i++) {
374 offset += setup_uniform_values(loc + offset,
375 type->fields.structure[i].type);
376 }
377 return offset;
378
379 case GLSL_TYPE_ARRAY:
380 for (unsigned int i = 0; i < type->length; i++) {
381 offset += setup_uniform_values(loc + offset, type->fields.array);
382 }
383 return offset;
384
385 case GLSL_TYPE_SAMPLER:
386 /* The sampler takes up a slot, but we don't use any values from it. */
387 return 1;
388
389 default:
390 assert(!"not reached");
391 return 0;
392 }
393 }
394
395 /* Our support for builtin uniforms is even scarier than non-builtin.
396 * It sits on top of the PROG_STATE_VAR parameters that are
397 * automatically updated from GL context state.
398 */
399 void
400 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
401 {
402 const ir_state_slot *const slots = ir->state_slots;
403 assert(ir->state_slots != NULL);
404
405 for (unsigned int i = 0; i < ir->num_state_slots; i++) {
406 /* This state reference has already been setup by ir_to_mesa,
407 * but we'll get the same index back here. We can reference
408 * ParameterValues directly, since unlike brw_fs.cpp, we never
409 * add new state references during compile.
410 */
411 int index = _mesa_add_state_reference(this->vp->Base.Parameters,
412 (gl_state_index *)slots[i].tokens);
413 float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
414
415 this->uniform_size[this->uniforms] = 0;
416 /* Add each of the unique swizzled channels of the element.
417 * This will end up matching the size of the glsl_type of this field.
418 */
419 int last_swiz = -1;
420 for (unsigned int j = 0; j < 4; j++) {
421 int swiz = GET_SWZ(slots[i].swizzle, j);
422 if (swiz == last_swiz)
423 break;
424 last_swiz = swiz;
425
426 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
427 c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT;
428 this->uniform_size[this->uniforms]++;
429 }
430 this->uniforms++;
431 }
432 }
433
434 dst_reg *
435 vec4_visitor::variable_storage(ir_variable *var)
436 {
437 return (dst_reg *)hash_table_find(this->variable_ht, var);
438 }
439
440 void
441 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
442 {
443 ir_expression *expr = ir->as_expression();
444
445 if (expr) {
446 src_reg op[2];
447 vec4_instruction *inst;
448
449 assert(expr->get_num_operands() <= 2);
450 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
451 assert(expr->operands[i]->type->is_scalar());
452
453 expr->operands[i]->accept(this);
454 op[i] = this->result;
455 }
456
457 switch (expr->operation) {
458 case ir_unop_logic_not:
459 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1));
460 inst->conditional_mod = BRW_CONDITIONAL_Z;
461 break;
462
463 case ir_binop_logic_xor:
464 inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]);
465 inst->conditional_mod = BRW_CONDITIONAL_NZ;
466 break;
467
468 case ir_binop_logic_or:
469 inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]);
470 inst->conditional_mod = BRW_CONDITIONAL_NZ;
471 break;
472
473 case ir_binop_logic_and:
474 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]);
475 inst->conditional_mod = BRW_CONDITIONAL_NZ;
476 break;
477
478 case ir_unop_f2b:
479 if (intel->gen >= 6) {
480 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f));
481 } else {
482 inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]);
483 }
484 inst->conditional_mod = BRW_CONDITIONAL_NZ;
485 break;
486
487 case ir_unop_i2b:
488 if (intel->gen >= 6) {
489 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
490 } else {
491 inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]);
492 }
493 inst->conditional_mod = BRW_CONDITIONAL_NZ;
494 break;
495
496 case ir_binop_greater:
497 case ir_binop_gequal:
498 case ir_binop_less:
499 case ir_binop_lequal:
500 case ir_binop_equal:
501 case ir_binop_all_equal:
502 case ir_binop_nequal:
503 case ir_binop_any_nequal:
504 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
505 inst->conditional_mod =
506 brw_conditional_for_comparison(expr->operation);
507 break;
508
509 default:
510 assert(!"not reached");
511 break;
512 }
513 return;
514 }
515
516 ir->accept(this);
517
518 if (intel->gen >= 6) {
519 vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(),
520 this->result, src_reg(1));
521 inst->conditional_mod = BRW_CONDITIONAL_NZ;
522 } else {
523 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result);
524 inst->conditional_mod = BRW_CONDITIONAL_NZ;
525 }
526 }
527
528 /**
529 * Emit a gen6 IF statement with the comparison folded into the IF
530 * instruction.
531 */
532 void
533 vec4_visitor::emit_if_gen6(ir_if *ir)
534 {
535 ir_expression *expr = ir->condition->as_expression();
536
537 if (expr) {
538 src_reg op[2];
539 vec4_instruction *inst;
540 dst_reg temp;
541
542 assert(expr->get_num_operands() <= 2);
543 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
544 assert(expr->operands[i]->type->is_scalar() ||
545 expr->operation == ir_binop_any_nequal ||
546 expr->operation == ir_binop_all_equal);
547
548 expr->operands[i]->accept(this);
549 op[i] = this->result;
550 }
551
552 switch (expr->operation) {
553 case ir_unop_logic_not:
554 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
555 inst->conditional_mod = BRW_CONDITIONAL_Z;
556 return;
557
558 case ir_binop_logic_xor:
559 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
560 inst->conditional_mod = BRW_CONDITIONAL_NZ;
561 return;
562
563 case ir_binop_logic_or:
564 temp = dst_reg(this, glsl_type::bool_type);
565 emit(BRW_OPCODE_OR, temp, op[0], op[1]);
566 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
567 inst->conditional_mod = BRW_CONDITIONAL_NZ;
568 return;
569
570 case ir_binop_logic_and:
571 temp = dst_reg(this, glsl_type::bool_type);
572 emit(BRW_OPCODE_AND, temp, op[0], op[1]);
573 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
574 inst->conditional_mod = BRW_CONDITIONAL_NZ;
575 return;
576
577 case ir_unop_f2b:
578 inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0));
579 inst->conditional_mod = BRW_CONDITIONAL_NZ;
580 return;
581
582 case ir_unop_i2b:
583 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
584 inst->conditional_mod = BRW_CONDITIONAL_NZ;
585 return;
586
587 case ir_binop_greater:
588 case ir_binop_gequal:
589 case ir_binop_less:
590 case ir_binop_lequal:
591 case ir_binop_equal:
592 case ir_binop_nequal:
593 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
594 inst->conditional_mod =
595 brw_conditional_for_comparison(expr->operation);
596 return;
597
598 case ir_binop_all_equal:
599 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
600 inst->conditional_mod = BRW_CONDITIONAL_Z;
601
602 inst = emit(BRW_OPCODE_IF);
603 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
604 return;
605
606 case ir_binop_any_nequal:
607 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
608 inst->conditional_mod = BRW_CONDITIONAL_NZ;
609
610 inst = emit(BRW_OPCODE_IF);
611 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
612 return;
613
614 default:
615 assert(!"not reached");
616 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
617 inst->conditional_mod = BRW_CONDITIONAL_NZ;
618 return;
619 }
620 return;
621 }
622
623 ir->condition->accept(this);
624
625 vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(),
626 this->result, src_reg(0));
627 inst->conditional_mod = BRW_CONDITIONAL_NZ;
628 }
629
630 void
631 vec4_visitor::visit(ir_variable *ir)
632 {
633 dst_reg *reg = NULL;
634
635 if (variable_storage(ir))
636 return;
637
638 switch (ir->mode) {
639 case ir_var_in:
640 reg = new(mem_ctx) dst_reg(ATTR, ir->location);
641 break;
642
643 case ir_var_out:
644 reg = new(mem_ctx) dst_reg(this, ir->type);
645
646 for (int i = 0; i < type_size(ir->type); i++) {
647 output_reg[ir->location + i] = *reg;
648 output_reg[ir->location + i].reg_offset = i;
649 output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F;
650 }
651 break;
652
653 case ir_var_auto:
654 case ir_var_temporary:
655 reg = new(mem_ctx) dst_reg(this, ir->type);
656 break;
657
658 case ir_var_uniform:
659 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
660
661 if (!strncmp(ir->name, "gl_", 3)) {
662 setup_builtin_uniform_values(ir);
663 } else {
664 setup_uniform_values(ir->location, ir->type);
665 }
666 break;
667
668 default:
669 assert(!"not reached");
670 }
671
672 reg->type = brw_type_for_base_type(ir->type);
673 hash_table_insert(this->variable_ht, reg, ir);
674 }
675
676 void
677 vec4_visitor::visit(ir_loop *ir)
678 {
679 ir_dereference_variable *counter = NULL;
680
681 fail("not yet\n");
682
683 /* We don't want debugging output to print the whole body of the
684 * loop as the annotation.
685 */
686 this->base_ir = NULL;
687
688 if (ir->counter != NULL)
689 counter = new(ir) ir_dereference_variable(ir->counter);
690
691 if (ir->from != NULL) {
692 assert(ir->counter != NULL);
693
694 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
695
696 a->accept(this);
697 delete a;
698 }
699
700 emit(BRW_OPCODE_DO);
701
702 if (ir->to) {
703 ir_expression *e =
704 new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
705 counter, ir->to);
706 ir_if *if_stmt = new(ir) ir_if(e);
707
708 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
709
710 if_stmt->then_instructions.push_tail(brk);
711
712 if_stmt->accept(this);
713
714 delete if_stmt;
715 delete e;
716 delete brk;
717 }
718
719 visit_instructions(&ir->body_instructions);
720
721 if (ir->increment) {
722 ir_expression *e =
723 new(ir) ir_expression(ir_binop_add, counter->type,
724 counter, ir->increment);
725
726 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
727
728 a->accept(this);
729 delete a;
730 delete e;
731 }
732
733 emit(BRW_OPCODE_WHILE);
734 }
735
736 void
737 vec4_visitor::visit(ir_loop_jump *ir)
738 {
739 switch (ir->mode) {
740 case ir_loop_jump::jump_break:
741 emit(BRW_OPCODE_BREAK);
742 break;
743 case ir_loop_jump::jump_continue:
744 emit(BRW_OPCODE_CONTINUE);
745 break;
746 }
747 }
748
749
750 void
751 vec4_visitor::visit(ir_function_signature *ir)
752 {
753 assert(0);
754 (void)ir;
755 }
756
757 void
758 vec4_visitor::visit(ir_function *ir)
759 {
760 /* Ignore function bodies other than main() -- we shouldn't see calls to
761 * them since they should all be inlined.
762 */
763 if (strcmp(ir->name, "main") == 0) {
764 const ir_function_signature *sig;
765 exec_list empty;
766
767 sig = ir->matching_signature(&empty);
768
769 assert(sig);
770
771 visit_instructions(&sig->body);
772 }
773 }
774
775 GLboolean
776 vec4_visitor::try_emit_sat(ir_expression *ir)
777 {
778 ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
779 if (!sat_src)
780 return false;
781
782 sat_src->accept(this);
783 src_reg src = this->result;
784
785 this->result = src_reg(this, ir->type);
786 vec4_instruction *inst;
787 inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src);
788 inst->saturate = true;
789
790 return true;
791 }
792
793 void
794 vec4_visitor::emit_bool_comparison(unsigned int op,
795 dst_reg dst, src_reg src0, src_reg src1)
796 {
797 /* original gen4 does destination conversion before comparison. */
798 if (intel->gen < 5)
799 dst.type = src0.type;
800
801 vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1);
802 inst->conditional_mod = brw_conditional_for_comparison(op);
803
804 dst.type = BRW_REGISTER_TYPE_D;
805 emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1));
806 }
807
808 void
809 vec4_visitor::visit(ir_expression *ir)
810 {
811 unsigned int operand;
812 src_reg op[Elements(ir->operands)];
813 src_reg result_src;
814 dst_reg result_dst;
815 vec4_instruction *inst;
816
817 if (try_emit_sat(ir))
818 return;
819
820 for (operand = 0; operand < ir->get_num_operands(); operand++) {
821 this->result.file = BAD_FILE;
822 ir->operands[operand]->accept(this);
823 if (this->result.file == BAD_FILE) {
824 printf("Failed to get tree for expression operand:\n");
825 ir->operands[operand]->print();
826 exit(1);
827 }
828 op[operand] = this->result;
829
830 /* Matrix expression operands should have been broken down to vector
831 * operations already.
832 */
833 assert(!ir->operands[operand]->type->is_matrix());
834 }
835
836 int vector_elements = ir->operands[0]->type->vector_elements;
837 if (ir->operands[1]) {
838 vector_elements = MAX2(vector_elements,
839 ir->operands[1]->type->vector_elements);
840 }
841
842 this->result.file = BAD_FILE;
843
844 /* Storage for our result. Ideally for an assignment we'd be using
845 * the actual storage for the result here, instead.
846 */
847 result_src = src_reg(this, ir->type);
848 /* convenience for the emit functions below. */
849 result_dst = dst_reg(result_src);
850 /* If nothing special happens, this is the result. */
851 this->result = result_src;
852 /* Limit writes to the channels that will be used by result_src later.
853 * This does limit this temp's use as a temporary for multi-instruction
854 * sequences.
855 */
856 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
857
858 switch (ir->operation) {
859 case ir_unop_logic_not:
860 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
861 * ones complement of the whole register, not just bit 0.
862 */
863 emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1));
864 break;
865 case ir_unop_neg:
866 op[0].negate = !op[0].negate;
867 this->result = op[0];
868 break;
869 case ir_unop_abs:
870 op[0].abs = true;
871 op[0].negate = false;
872 this->result = op[0];
873 break;
874
875 case ir_unop_sign:
876 emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f));
877
878 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
879 inst->conditional_mod = BRW_CONDITIONAL_G;
880 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f));
881 inst->predicate = BRW_PREDICATE_NORMAL;
882
883 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
884 inst->conditional_mod = BRW_CONDITIONAL_L;
885 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f));
886 inst->predicate = BRW_PREDICATE_NORMAL;
887
888 break;
889
890 case ir_unop_rcp:
891 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
892 break;
893
894 case ir_unop_exp2:
895 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
896 break;
897 case ir_unop_log2:
898 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
899 break;
900 case ir_unop_exp:
901 case ir_unop_log:
902 assert(!"not reached: should be handled by ir_explog_to_explog2");
903 break;
904 case ir_unop_sin:
905 case ir_unop_sin_reduced:
906 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
907 break;
908 case ir_unop_cos:
909 case ir_unop_cos_reduced:
910 emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
911 break;
912
913 case ir_unop_dFdx:
914 case ir_unop_dFdy:
915 assert(!"derivatives not valid in vertex shader");
916 break;
917
918 case ir_unop_noise:
919 assert(!"not reached: should be handled by lower_noise");
920 break;
921
922 case ir_binop_add:
923 emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]);
924 break;
925 case ir_binop_sub:
926 assert(!"not reached: should be handled by ir_sub_to_add_neg");
927 break;
928
929 case ir_binop_mul:
930 emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
931 break;
932 case ir_binop_div:
933 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
934 case ir_binop_mod:
935 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
936 break;
937
938 case ir_binop_less:
939 case ir_binop_greater:
940 case ir_binop_lequal:
941 case ir_binop_gequal:
942 case ir_binop_equal:
943 case ir_binop_nequal: {
944 dst_reg temp = result_dst;
945 /* original gen4 does implicit conversion before comparison. */
946 if (intel->gen < 5)
947 temp.type = op[0].type;
948
949 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
950 inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
951 emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1));
952 break;
953 }
954
955 case ir_binop_all_equal:
956 /* "==" operator producing a scalar boolean. */
957 if (ir->operands[0]->type->is_vector() ||
958 ir->operands[1]->type->is_vector()) {
959 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
960 inst->conditional_mod = BRW_CONDITIONAL_Z;
961
962 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
963 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
964 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
965 } else {
966 dst_reg temp = result_dst;
967 /* original gen4 does implicit conversion before comparison. */
968 if (intel->gen < 5)
969 temp.type = op[0].type;
970
971 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
972 inst->conditional_mod = BRW_CONDITIONAL_NZ;
973 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
974 }
975 break;
976 case ir_binop_any_nequal:
977 /* "!=" operator producing a scalar boolean. */
978 if (ir->operands[0]->type->is_vector() ||
979 ir->operands[1]->type->is_vector()) {
980 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
981 inst->conditional_mod = BRW_CONDITIONAL_NZ;
982
983 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
984 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
985 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
986 } else {
987 dst_reg temp = result_dst;
988 /* original gen4 does implicit conversion before comparison. */
989 if (intel->gen < 5)
990 temp.type = op[0].type;
991
992 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
993 inst->conditional_mod = BRW_CONDITIONAL_NZ;
994 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
995 }
996 break;
997
998 case ir_unop_any:
999 emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
1000 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
1001
1002 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
1003 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1004 break;
1005
1006 case ir_binop_logic_xor:
1007 emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
1008 break;
1009
1010 case ir_binop_logic_or:
1011 emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
1012 break;
1013
1014 case ir_binop_logic_and:
1015 emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
1016 break;
1017
1018 case ir_binop_dot:
1019 assert(ir->operands[0]->type->is_vector());
1020 assert(ir->operands[0]->type == ir->operands[1]->type);
1021 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1022 break;
1023
1024 case ir_unop_sqrt:
1025 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1026 break;
1027 case ir_unop_rsq:
1028 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1029 break;
1030 case ir_unop_i2f:
1031 case ir_unop_i2u:
1032 case ir_unop_u2i:
1033 case ir_unop_u2f:
1034 case ir_unop_b2f:
1035 case ir_unop_b2i:
1036 case ir_unop_f2i:
1037 emit(BRW_OPCODE_MOV, result_dst, op[0]);
1038 break;
1039 case ir_unop_f2b:
1040 case ir_unop_i2b: {
1041 dst_reg temp = result_dst;
1042 /* original gen4 does implicit conversion before comparison. */
1043 if (intel->gen < 5)
1044 temp.type = op[0].type;
1045
1046 inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f));
1047 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1048 inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1));
1049 break;
1050 }
1051
1052 case ir_unop_trunc:
1053 emit(BRW_OPCODE_RNDZ, result_dst, op[0]);
1054 break;
1055 case ir_unop_ceil:
1056 op[0].negate = !op[0].negate;
1057 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
1058 this->result.negate = true;
1059 break;
1060 case ir_unop_floor:
1061 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
1062 break;
1063 case ir_unop_fract:
1064 inst = emit(BRW_OPCODE_FRC, result_dst, op[0]);
1065 break;
1066 case ir_unop_round_even:
1067 emit(BRW_OPCODE_RNDE, result_dst, op[0]);
1068 break;
1069
1070 case ir_binop_min:
1071 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
1072 inst->conditional_mod = BRW_CONDITIONAL_L;
1073
1074 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1075 inst->predicate = BRW_PREDICATE_NORMAL;
1076 break;
1077 case ir_binop_max:
1078 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
1079 inst->conditional_mod = BRW_CONDITIONAL_G;
1080
1081 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1082 inst->predicate = BRW_PREDICATE_NORMAL;
1083 break;
1084
1085 case ir_binop_pow:
1086 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1087 break;
1088
1089 case ir_unop_bit_not:
1090 inst = emit(BRW_OPCODE_NOT, result_dst, op[0]);
1091 break;
1092 case ir_binop_bit_and:
1093 inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
1094 break;
1095 case ir_binop_bit_xor:
1096 inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
1097 break;
1098 case ir_binop_bit_or:
1099 inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
1100 break;
1101
1102 case ir_binop_lshift:
1103 case ir_binop_rshift:
1104 assert(!"GLSL 1.30 features unsupported");
1105 break;
1106
1107 case ir_quadop_vector:
1108 assert(!"not reached: should be handled by lower_quadop_vector");
1109 break;
1110 }
1111 }
1112
1113
1114 void
1115 vec4_visitor::visit(ir_swizzle *ir)
1116 {
1117 src_reg src;
1118 int i = 0;
1119 int swizzle[4];
1120
1121 /* Note that this is only swizzles in expressions, not those on the left
1122 * hand side of an assignment, which do write masking. See ir_assignment
1123 * for that.
1124 */
1125
1126 ir->val->accept(this);
1127 src = this->result;
1128 assert(src.file != BAD_FILE);
1129
1130 for (i = 0; i < ir->type->vector_elements; i++) {
1131 switch (i) {
1132 case 0:
1133 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1134 break;
1135 case 1:
1136 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1137 break;
1138 case 2:
1139 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1140 break;
1141 case 3:
1142 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1143 break;
1144 }
1145 }
1146 for (; i < 4; i++) {
1147 /* Replicate the last channel out. */
1148 swizzle[i] = swizzle[ir->type->vector_elements - 1];
1149 }
1150
1151 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1152
1153 this->result = src;
1154 }
1155
1156 void
1157 vec4_visitor::visit(ir_dereference_variable *ir)
1158 {
1159 const struct glsl_type *type = ir->type;
1160 dst_reg *reg = variable_storage(ir->var);
1161
1162 if (!reg) {
1163 fail("Failed to find variable storage for %s\n", ir->var->name);
1164 this->result = src_reg(brw_null_reg());
1165 return;
1166 }
1167
1168 this->result = src_reg(*reg);
1169
1170 if (type->is_scalar() || type->is_vector() || type->is_matrix())
1171 this->result.swizzle = swizzle_for_size(type->vector_elements);
1172 }
1173
1174 void
1175 vec4_visitor::visit(ir_dereference_array *ir)
1176 {
1177 ir_constant *constant_index;
1178 src_reg src;
1179 int element_size = type_size(ir->type);
1180
1181 constant_index = ir->array_index->constant_expression_value();
1182
1183 ir->array->accept(this);
1184 src = this->result;
1185
1186 if (constant_index) {
1187 src.reg_offset += constant_index->value.i[0] * element_size;
1188 } else {
1189 #if 0 /* Variable array index */
1190 /* Variable index array dereference. It eats the "vec4" of the
1191 * base of the array and an index that offsets the Mesa register
1192 * index.
1193 */
1194 ir->array_index->accept(this);
1195
1196 src_reg index_reg;
1197
1198 if (element_size == 1) {
1199 index_reg = this->result;
1200 } else {
1201 index_reg = src_reg(this, glsl_type::float_type);
1202
1203 emit(BRW_OPCODE_MUL, dst_reg(index_reg),
1204 this->result, src_reg_for_float(element_size));
1205 }
1206
1207 src.reladdr = ralloc(mem_ctx, src_reg);
1208 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1209 #endif
1210 }
1211
1212 /* If the type is smaller than a vec4, replicate the last channel out. */
1213 if (ir->type->is_scalar() || ir->type->is_vector())
1214 src.swizzle = swizzle_for_size(ir->type->vector_elements);
1215 else
1216 src.swizzle = BRW_SWIZZLE_NOOP;
1217 src.type = brw_type_for_base_type(ir->type);
1218
1219 this->result = src;
1220 }
1221
1222 void
1223 vec4_visitor::visit(ir_dereference_record *ir)
1224 {
1225 unsigned int i;
1226 const glsl_type *struct_type = ir->record->type;
1227 int offset = 0;
1228
1229 ir->record->accept(this);
1230
1231 for (i = 0; i < struct_type->length; i++) {
1232 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1233 break;
1234 offset += type_size(struct_type->fields.structure[i].type);
1235 }
1236
1237 /* If the type is smaller than a vec4, replicate the last channel out. */
1238 if (ir->type->is_scalar() || ir->type->is_vector())
1239 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1240 else
1241 this->result.swizzle = BRW_SWIZZLE_NOOP;
1242 this->result.type = brw_type_for_base_type(ir->type);
1243
1244 this->result.reg_offset += offset;
1245 }
1246
1247 /**
1248 * We want to be careful in assignment setup to hit the actual storage
1249 * instead of potentially using a temporary like we might with the
1250 * ir_dereference handler.
1251 */
1252 static dst_reg
1253 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1254 {
1255 /* The LHS must be a dereference. If the LHS is a variable indexed array
1256 * access of a vector, it must be separated into a series conditional moves
1257 * before reaching this point (see ir_vec_index_to_cond_assign).
1258 */
1259 assert(ir->as_dereference());
1260 ir_dereference_array *deref_array = ir->as_dereference_array();
1261 if (deref_array) {
1262 assert(!deref_array->array->type->is_vector());
1263 }
1264
1265 /* Use the rvalue deref handler for the most part. We'll ignore
1266 * swizzles in it and write swizzles using writemask, though.
1267 */
1268 ir->accept(v);
1269 return dst_reg(v->result);
1270 }
1271
1272 void
1273 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1274 const struct glsl_type *type, bool predicated)
1275 {
1276 if (type->base_type == GLSL_TYPE_STRUCT) {
1277 for (unsigned int i = 0; i < type->length; i++) {
1278 emit_block_move(dst, src, type->fields.structure[i].type, predicated);
1279 }
1280 return;
1281 }
1282
1283 if (type->is_array()) {
1284 for (unsigned int i = 0; i < type->length; i++) {
1285 emit_block_move(dst, src, type->fields.array, predicated);
1286 }
1287 return;
1288 }
1289
1290 if (type->is_matrix()) {
1291 const struct glsl_type *vec_type;
1292
1293 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1294 type->vector_elements, 1);
1295
1296 for (int i = 0; i < type->matrix_columns; i++) {
1297 emit_block_move(dst, src, vec_type, predicated);
1298 }
1299 return;
1300 }
1301
1302 assert(type->is_scalar() || type->is_vector());
1303
1304 dst->type = brw_type_for_base_type(type);
1305 src->type = dst->type;
1306
1307 dst->writemask = (1 << type->vector_elements) - 1;
1308
1309 /* Do we need to worry about swizzling a swizzle? */
1310 assert(src->swizzle = BRW_SWIZZLE_NOOP);
1311 src->swizzle = swizzle_for_size(type->vector_elements);
1312
1313 vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src);
1314 if (predicated)
1315 inst->predicate = BRW_PREDICATE_NORMAL;
1316
1317 dst->reg_offset++;
1318 src->reg_offset++;
1319 }
1320
1321 void
1322 vec4_visitor::visit(ir_assignment *ir)
1323 {
1324 dst_reg dst = get_assignment_lhs(ir->lhs, this);
1325
1326 if (!ir->lhs->type->is_scalar() &&
1327 !ir->lhs->type->is_vector()) {
1328 ir->rhs->accept(this);
1329 src_reg src = this->result;
1330
1331 if (ir->condition) {
1332 emit_bool_to_cond_code(ir->condition);
1333 }
1334
1335 emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL);
1336 return;
1337 }
1338
1339 /* Now we're down to just a scalar/vector with writemasks. */
1340 int i;
1341
1342 ir->rhs->accept(this);
1343 src_reg src = this->result;
1344
1345 int swizzles[4];
1346 int first_enabled_chan = 0;
1347 int src_chan = 0;
1348
1349 assert(ir->lhs->type->is_vector() ||
1350 ir->lhs->type->is_scalar());
1351 dst.writemask = ir->write_mask;
1352
1353 for (int i = 0; i < 4; i++) {
1354 if (dst.writemask & (1 << i)) {
1355 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1356 break;
1357 }
1358 }
1359
1360 /* Swizzle a small RHS vector into the channels being written.
1361 *
1362 * glsl ir treats write_mask as dictating how many channels are
1363 * present on the RHS while in our instructions we need to make
1364 * those channels appear in the slots of the vec4 they're written to.
1365 */
1366 for (int i = 0; i < 4; i++) {
1367 if (dst.writemask & (1 << i))
1368 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1369 else
1370 swizzles[i] = first_enabled_chan;
1371 }
1372 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1373 swizzles[2], swizzles[3]);
1374
1375 if (ir->condition) {
1376 emit_bool_to_cond_code(ir->condition);
1377 }
1378
1379 for (i = 0; i < type_size(ir->lhs->type); i++) {
1380 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
1381
1382 if (ir->condition)
1383 inst->predicate = BRW_PREDICATE_NORMAL;
1384
1385 dst.reg_offset++;
1386 src.reg_offset++;
1387 }
1388 }
1389
1390
1391 void
1392 vec4_visitor::visit(ir_constant *ir)
1393 {
1394 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1395 src_reg temp_base = src_reg(this, ir->type);
1396 dst_reg temp = dst_reg(temp_base);
1397
1398 foreach_iter(exec_list_iterator, iter, ir->components) {
1399 ir_constant *field_value = (ir_constant *)iter.get();
1400 int size = type_size(field_value->type);
1401
1402 assert(size > 0);
1403
1404 field_value->accept(this);
1405 src_reg src = this->result;
1406
1407 for (int i = 0; i < (unsigned int)size; i++) {
1408 emit(BRW_OPCODE_MOV, temp, src);
1409
1410 src.reg_offset++;
1411 temp.reg_offset++;
1412 }
1413 }
1414 this->result = temp_base;
1415 return;
1416 }
1417
1418 if (ir->type->is_array()) {
1419 src_reg temp_base = src_reg(this, ir->type);
1420 dst_reg temp = dst_reg(temp_base);
1421 int size = type_size(ir->type->fields.array);
1422
1423 assert(size > 0);
1424
1425 for (unsigned int i = 0; i < ir->type->length; i++) {
1426 ir->array_elements[i]->accept(this);
1427 src_reg src = this->result;
1428 for (int j = 0; j < size; j++) {
1429 emit(BRW_OPCODE_MOV, temp, src);
1430
1431 src.reg_offset++;
1432 temp.reg_offset++;
1433 }
1434 }
1435 this->result = temp_base;
1436 return;
1437 }
1438
1439 if (ir->type->is_matrix()) {
1440 this->result = src_reg(this, ir->type);
1441 dst_reg dst = dst_reg(this->result);
1442
1443 assert(ir->type->base_type == GLSL_TYPE_FLOAT);
1444
1445 for (int i = 0; i < ir->type->matrix_columns; i++) {
1446 for (int j = 0; j < ir->type->vector_elements; j++) {
1447 dst.writemask = 1 << j;
1448 emit(BRW_OPCODE_MOV, dst,
1449 src_reg(ir->value.f[i * ir->type->vector_elements + j]));
1450 }
1451 dst.reg_offset++;
1452 }
1453 return;
1454 }
1455
1456 this->result = src_reg(this, ir->type);
1457 dst_reg dst = dst_reg(this->result);
1458
1459 for (int i = 0; i < ir->type->vector_elements; i++) {
1460 dst.writemask = 1 << i;
1461
1462 switch (ir->type->base_type) {
1463 case GLSL_TYPE_FLOAT:
1464 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.f[i]));
1465 break;
1466 case GLSL_TYPE_INT:
1467 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.i[i]));
1468 break;
1469 case GLSL_TYPE_UINT:
1470 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.u[i]));
1471 break;
1472 case GLSL_TYPE_BOOL:
1473 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.b[i]));
1474 break;
1475 default:
1476 assert(!"Non-float/uint/int/bool constant");
1477 break;
1478 }
1479 }
1480 }
1481
1482 void
1483 vec4_visitor::visit(ir_call *ir)
1484 {
1485 assert(!"not reached");
1486 }
1487
1488 void
1489 vec4_visitor::visit(ir_texture *ir)
1490 {
1491 assert(!"not reached");
1492 }
1493
1494 void
1495 vec4_visitor::visit(ir_return *ir)
1496 {
1497 assert(!"not reached");
1498 }
1499
1500 void
1501 vec4_visitor::visit(ir_discard *ir)
1502 {
1503 assert(!"not reached");
1504 }
1505
1506 void
1507 vec4_visitor::visit(ir_if *ir)
1508 {
1509 /* Don't point the annotation at the if statement, because then it plus
1510 * the then and else blocks get printed.
1511 */
1512 this->base_ir = ir->condition;
1513
1514 if (intel->gen == 6) {
1515 emit_if_gen6(ir);
1516 } else {
1517 emit_bool_to_cond_code(ir->condition);
1518 vec4_instruction *inst = emit(BRW_OPCODE_IF);
1519 inst->predicate = BRW_PREDICATE_NORMAL;
1520 }
1521
1522 visit_instructions(&ir->then_instructions);
1523
1524 if (!ir->else_instructions.is_empty()) {
1525 this->base_ir = ir->condition;
1526 emit(BRW_OPCODE_ELSE);
1527
1528 visit_instructions(&ir->else_instructions);
1529 }
1530
1531 this->base_ir = ir->condition;
1532 emit(BRW_OPCODE_ENDIF);
1533 }
1534
1535 int
1536 vec4_visitor::emit_vue_header_gen4(int header_mrf)
1537 {
1538 /* Get the position */
1539 src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
1540
1541 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
1542 dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
1543
1544 current_annotation = "NDC";
1545 dst_reg ndc_w = ndc;
1546 ndc_w.writemask = WRITEMASK_W;
1547 src_reg pos_w = pos;
1548 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
1549 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
1550
1551 dst_reg ndc_xyz = ndc;
1552 ndc_xyz.writemask = WRITEMASK_XYZ;
1553
1554 emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w));
1555
1556 if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
1557 c->key.nr_userclip || brw->has_negative_rhw_bug) {
1558 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
1559 GLuint i;
1560
1561 emit(BRW_OPCODE_MOV, header1, 0u);
1562
1563 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1564 assert(!"finishme: psiz");
1565 src_reg psiz;
1566
1567 header1.writemask = WRITEMASK_W;
1568 emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11);
1569 emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8);
1570 }
1571
1572 for (i = 0; i < c->key.nr_userclip; i++) {
1573 vec4_instruction *inst;
1574
1575 inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()),
1576 pos, src_reg(c->userplane[i]));
1577 inst->conditional_mod = BRW_CONDITIONAL_L;
1578
1579 emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i);
1580 inst->predicate = BRW_PREDICATE_NORMAL;
1581 }
1582
1583 /* i965 clipping workaround:
1584 * 1) Test for -ve rhw
1585 * 2) If set,
1586 * set ndc = (0,0,0,0)
1587 * set ucp[6] = 1
1588 *
1589 * Later, clipping will detect ucp[6] and ensure the primitive is
1590 * clipped against all fixed planes.
1591 */
1592 if (brw->has_negative_rhw_bug) {
1593 #if 0
1594 /* FINISHME */
1595 brw_CMP(p,
1596 vec8(brw_null_reg()),
1597 BRW_CONDITIONAL_L,
1598 brw_swizzle1(ndc, 3),
1599 brw_imm_f(0));
1600
1601 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
1602 brw_MOV(p, ndc, brw_imm_f(0));
1603 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1604 #endif
1605 }
1606
1607 header1.writemask = WRITEMASK_XYZW;
1608 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1));
1609 } else {
1610 emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++),
1611 BRW_REGISTER_TYPE_UD), 0u);
1612 }
1613
1614 if (intel->gen == 5) {
1615 /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
1616 * dword 0-3 (m1) of the header is indices, point width, clip flags.
1617 * dword 4-7 (m2) is the ndc position (set above)
1618 * dword 8-11 (m3) of the vertex header is the 4D space position
1619 * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
1620 * m6 is a pad so that the vertex element data is aligned
1621 * m7 is the first vertex data we fill.
1622 */
1623 current_annotation = "NDC";
1624 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1625
1626 current_annotation = "gl_Position";
1627 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1628
1629 /* user clip distance. */
1630 header_mrf += 2;
1631
1632 /* Pad so that vertex element data is aligned. */
1633 header_mrf++;
1634 } else {
1635 /* There are 8 dwords in VUE header pre-Ironlake:
1636 * dword 0-3 (m1) is indices, point width, clip flags.
1637 * dword 4-7 (m2) is ndc position (set above)
1638 *
1639 * dword 8-11 (m3) is the first vertex data.
1640 */
1641 current_annotation = "NDC";
1642 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1643
1644 current_annotation = "gl_Position";
1645 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1646 }
1647
1648 return header_mrf;
1649 }
1650
1651 int
1652 vec4_visitor::emit_vue_header_gen6(int header_mrf)
1653 {
1654 struct brw_reg reg;
1655
1656 /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
1657 * dword 0-3 (m2) of the header is indices, point width, clip flags.
1658 * dword 4-7 (m3) is the 4D space position
1659 * dword 8-15 (m4,m5) of the vertex header is the user clip distance if
1660 * enabled.
1661 *
1662 * m4 or 6 is the first vertex element data we fill.
1663 */
1664
1665 current_annotation = "indices, point width, clip flags";
1666 reg = brw_message_reg(header_mrf++);
1667 emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0));
1668 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1669 emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W),
1670 src_reg(output_reg[VERT_RESULT_PSIZ]));
1671 }
1672
1673 current_annotation = "gl_Position";
1674 emit(BRW_OPCODE_MOV,
1675 brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS]));
1676
1677 current_annotation = "user clip distances";
1678 if (c->key.nr_userclip) {
1679 for (int i = 0; i < c->key.nr_userclip; i++) {
1680 struct brw_reg m;
1681 if (i < 4)
1682 m = brw_message_reg(header_mrf);
1683 else
1684 m = brw_message_reg(header_mrf + 1);
1685
1686 emit(BRW_OPCODE_DP4,
1687 dst_reg(brw_writemask(m, 1 << (i & 3))),
1688 src_reg(c->userplane[i]));
1689 }
1690 header_mrf += 2;
1691 }
1692
1693 current_annotation = NULL;
1694
1695 return header_mrf;
1696 }
1697
1698 static int
1699 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
1700 {
1701 struct intel_context *intel = &brw->intel;
1702
1703 if (intel->gen >= 6) {
1704 /* URB data written (does not include the message header reg) must
1705 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
1706 * section 5.4.3.2.2: URB_INTERLEAVED.
1707 *
1708 * URB entries are allocated on a multiple of 1024 bits, so an
1709 * extra 128 bits written here to make the end align to 256 is
1710 * no problem.
1711 */
1712 if ((mlen % 2) != 1)
1713 mlen++;
1714 }
1715
1716 return mlen;
1717 }
1718
1719 /**
1720 * Generates the VUE payload plus the 1 or 2 URB write instructions to
1721 * complete the VS thread.
1722 *
1723 * The VUE layout is documented in Volume 2a.
1724 */
1725 void
1726 vec4_visitor::emit_urb_writes()
1727 {
1728 int base_mrf = 1;
1729 int mrf = base_mrf;
1730 int urb_entry_size;
1731
1732 /* FINISHME: edgeflag */
1733
1734 /* First mrf is the g0-based message header containing URB handles and such,
1735 * which is implied in VS_OPCODE_URB_WRITE.
1736 */
1737 mrf++;
1738
1739 if (intel->gen >= 6) {
1740 mrf = emit_vue_header_gen6(mrf);
1741 } else {
1742 mrf = emit_vue_header_gen4(mrf);
1743 }
1744
1745 int attr;
1746 for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
1747 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1748 continue;
1749
1750 /* This is set up in the VUE header. */
1751 if (attr == VERT_RESULT_HPOS)
1752 continue;
1753
1754 /* This is loaded into the VUE header, and thus doesn't occupy
1755 * an attribute slot.
1756 */
1757 if (attr == VERT_RESULT_PSIZ)
1758 continue;
1759
1760 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
1761
1762 /* If this is MRF 15, we can't fit anything more into this URB
1763 * WRITE. Note that base_mrf of 1 means that MRF 15 is an
1764 * even-numbered amount of URB write data, which will meet
1765 * gen6's requirements for length alignment.
1766 */
1767 if (mrf == 15)
1768 break;
1769 }
1770
1771 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
1772 inst->base_mrf = base_mrf;
1773 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1774 inst->eot = true;
1775
1776 urb_entry_size = mrf - base_mrf;
1777
1778 for (; attr < VERT_RESULT_MAX; attr++) {
1779 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1780 continue;
1781 fail("Second URB write not supported.\n");
1782 break;
1783 }
1784
1785 if (intel->gen == 6)
1786 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8;
1787 else
1788 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4;
1789 }
1790
1791 vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
1792 struct gl_shader_program *prog,
1793 struct brw_shader *shader)
1794 {
1795 this->c = c;
1796 this->p = &c->func;
1797 this->brw = p->brw;
1798 this->intel = &brw->intel;
1799 this->ctx = &intel->ctx;
1800 this->prog = prog;
1801 this->shader = shader;
1802
1803 this->mem_ctx = ralloc_context(NULL);
1804 this->failed = false;
1805
1806 this->base_ir = NULL;
1807 this->current_annotation = NULL;
1808
1809 this->c = c;
1810 this->vp = brw->vertex_program; /* FINISHME: change for precompile */
1811 this->prog_data = &c->prog_data;
1812
1813 this->variable_ht = hash_table_ctor(0,
1814 hash_table_pointer_hash,
1815 hash_table_pointer_compare);
1816
1817 this->virtual_grf_sizes = NULL;
1818 this->virtual_grf_count = 0;
1819 this->virtual_grf_array_size = 0;
1820
1821 this->uniforms = 0;
1822
1823 this->variable_ht = hash_table_ctor(0,
1824 hash_table_pointer_hash,
1825 hash_table_pointer_compare);
1826 }
1827
1828 vec4_visitor::~vec4_visitor()
1829 {
1830 hash_table_dtor(this->variable_ht);
1831 }
1832
1833
1834 void
1835 vec4_visitor::fail(const char *format, ...)
1836 {
1837 va_list va;
1838 char *msg;
1839
1840 if (failed)
1841 return;
1842
1843 failed = true;
1844
1845 va_start(va, format);
1846 msg = ralloc_vasprintf(mem_ctx, format, va);
1847 va_end(va);
1848 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
1849
1850 this->fail_msg = msg;
1851
1852 if (INTEL_DEBUG & DEBUG_VS) {
1853 fprintf(stderr, "%s", msg);
1854 }
1855 }
1856
1857 } /* namespace brw */