i965: Start adding the VS visitor and codegen.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_visitor.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25 #include "main/macros.h"
26
27 namespace brw {
28
29 src_reg::src_reg(dst_reg reg)
30 {
31 init();
32
33 this->file = reg.file;
34 this->reg = reg.reg;
35 this->reg_offset = reg.reg_offset;
36 this->type = reg.type;
37
38 int swizzles[4];
39 int next_chan = 0;
40 int last = 0;
41
42 for (int i = 0; i < 4; i++) {
43 if (!(reg.writemask & (1 << i)))
44 continue;
45
46 swizzles[next_chan++] = last = i;
47 }
48
49 for (; next_chan < 4; next_chan++) {
50 swizzles[next_chan] = last;
51 }
52
53 this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
54 swizzles[2], swizzles[3]);
55 }
56
57 dst_reg::dst_reg(src_reg reg)
58 {
59 init();
60
61 this->file = reg.file;
62 this->reg = reg.reg;
63 this->reg_offset = reg.reg_offset;
64 this->type = reg.type;
65 this->writemask = WRITEMASK_XYZW;
66 }
67
68 vec4_instruction *
69 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
70 src_reg src0, src_reg src1, src_reg src2)
71 {
72 vec4_instruction *inst = new(mem_ctx) vec4_instruction();
73
74 inst->opcode = opcode;
75 inst->dst = dst;
76 inst->src[0] = src0;
77 inst->src[1] = src1;
78 inst->src[2] = src2;
79 inst->ir = this->base_ir;
80 inst->annotation = this->current_annotation;
81
82 this->instructions.push_tail(inst);
83
84 return inst;
85 }
86
87
88 vec4_instruction *
89 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
90 {
91 return emit(opcode, dst, src0, src1, src_reg());
92 }
93
94 vec4_instruction *
95 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
96 {
97 assert(dst.writemask != 0);
98 return emit(opcode, dst, src0, src_reg(), src_reg());
99 }
100
101 vec4_instruction *
102 vec4_visitor::emit(enum opcode opcode)
103 {
104 return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg());
105 }
106
107 void
108 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
109 {
110 static enum opcode dot_opcodes[] = {
111 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
112 };
113
114 emit(dot_opcodes[elements - 2], dst, src0, src1);
115 }
116
117 void
118 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
119 {
120 /* The gen6 math instruction ignores the source modifiers --
121 * swizzle, abs, negate, and at least some parts of the register
122 * region description. Move the source to the corresponding slots
123 * of the destination generally work.
124 */
125 src_reg expanded = src_reg(this, glsl_type::float_type);
126 emit(BRW_OPCODE_MOV, dst, src);
127 src = expanded;
128
129 emit(opcode, dst, src);
130 }
131
132 void
133 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
134 {
135 vec4_instruction *inst = emit(opcode, dst, src);
136 inst->base_mrf = 1;
137 inst->mlen = 1;
138 }
139
140 void
141 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
142 {
143 switch (opcode) {
144 case SHADER_OPCODE_RCP:
145 case SHADER_OPCODE_RSQ:
146 case SHADER_OPCODE_SQRT:
147 case SHADER_OPCODE_EXP2:
148 case SHADER_OPCODE_LOG2:
149 case SHADER_OPCODE_SIN:
150 case SHADER_OPCODE_COS:
151 break;
152 default:
153 assert(!"not reached: bad math opcode");
154 return;
155 }
156
157 if (intel->gen >= 6) {
158 return emit_math1_gen6(opcode, dst, src);
159 } else {
160 return emit_math1_gen4(opcode, dst, src);
161 }
162 }
163
164 void
165 vec4_visitor::emit_math2_gen6(enum opcode opcode,
166 dst_reg dst, src_reg src0, src_reg src1)
167 {
168 src_reg expanded;
169
170 /* The gen6 math instruction ignores the source modifiers --
171 * swizzle, abs, negate, and at least some parts of the register
172 * region description. Move the sources to temporaries to make it
173 * generally work.
174 */
175
176 expanded = src_reg(this, glsl_type::vec4_type);
177 emit(BRW_OPCODE_MOV, dst, src0);
178 src0 = expanded;
179
180 expanded = src_reg(this, glsl_type::vec4_type);
181 emit(BRW_OPCODE_MOV, dst, src1);
182 src1 = expanded;
183
184 emit(opcode, dst, src0, src1);
185 }
186
187 void
188 vec4_visitor::emit_math2_gen4(enum opcode opcode,
189 dst_reg dst, src_reg src0, src_reg src1)
190 {
191 vec4_instruction *inst = emit(opcode, dst, src0, src1);
192 inst->base_mrf = 1;
193 inst->mlen = 2;
194 }
195
196 void
197 vec4_visitor::emit_math(enum opcode opcode,
198 dst_reg dst, src_reg src0, src_reg src1)
199 {
200 assert(opcode == SHADER_OPCODE_POW);
201
202 if (intel->gen >= 6) {
203 return emit_math2_gen6(opcode, dst, src0, src1);
204 } else {
205 return emit_math2_gen4(opcode, dst, src0, src1);
206 }
207 }
208
209 void
210 vec4_visitor::visit_instructions(const exec_list *list)
211 {
212 foreach_iter(exec_list_iterator, iter, *list) {
213 ir_instruction *ir = (ir_instruction *)iter.get();
214
215 base_ir = ir;
216 ir->accept(this);
217 }
218 }
219
220
221 static int
222 type_size(const struct glsl_type *type)
223 {
224 unsigned int i;
225 int size;
226
227 switch (type->base_type) {
228 case GLSL_TYPE_UINT:
229 case GLSL_TYPE_INT:
230 case GLSL_TYPE_FLOAT:
231 case GLSL_TYPE_BOOL:
232 if (type->is_matrix()) {
233 return type->matrix_columns;
234 } else {
235 /* Regardless of size of vector, it gets a vec4. This is bad
236 * packing for things like floats, but otherwise arrays become a
237 * mess. Hopefully a later pass over the code can pack scalars
238 * down if appropriate.
239 */
240 return 1;
241 }
242 case GLSL_TYPE_ARRAY:
243 assert(type->length > 0);
244 return type_size(type->fields.array) * type->length;
245 case GLSL_TYPE_STRUCT:
246 size = 0;
247 for (i = 0; i < type->length; i++) {
248 size += type_size(type->fields.structure[i].type);
249 }
250 return size;
251 case GLSL_TYPE_SAMPLER:
252 /* Samplers take up one slot in UNIFORMS[], but they're baked in
253 * at link time.
254 */
255 return 1;
256 default:
257 assert(0);
258 return 0;
259 }
260 }
261
262 int
263 vec4_visitor::virtual_grf_alloc(int size)
264 {
265 if (virtual_grf_array_size <= virtual_grf_count) {
266 if (virtual_grf_array_size == 0)
267 virtual_grf_array_size = 16;
268 else
269 virtual_grf_array_size *= 2;
270 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
271 virtual_grf_array_size);
272 }
273 virtual_grf_sizes[virtual_grf_count] = size;
274 return virtual_grf_count++;
275 }
276
277 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
278 {
279 init();
280
281 this->file = GRF;
282 this->reg = v->virtual_grf_alloc(type_size(type));
283
284 if (type->is_array() || type->is_record()) {
285 this->swizzle = BRW_SWIZZLE_NOOP;
286 } else {
287 this->swizzle = swizzle_for_size(type->vector_elements);
288 }
289
290 this->type = brw_type_for_base_type(type);
291 }
292
293 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
294 {
295 init();
296
297 this->file = GRF;
298 this->reg = v->virtual_grf_alloc(type_size(type));
299
300 if (type->is_array() || type->is_record()) {
301 this->writemask = WRITEMASK_XYZW;
302 } else {
303 this->writemask = (1 << type->vector_elements) - 1;
304 }
305
306 this->type = brw_type_for_base_type(type);
307 }
308
309 dst_reg *
310 vec4_visitor::variable_storage(ir_variable *var)
311 {
312 return (dst_reg *)hash_table_find(this->variable_ht, var);
313 }
314
315 void
316 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
317 {
318 ir_expression *expr = ir->as_expression();
319
320 if (expr) {
321 src_reg op[2];
322 vec4_instruction *inst;
323
324 assert(expr->get_num_operands() <= 2);
325 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
326 assert(expr->operands[i]->type->is_scalar());
327
328 expr->operands[i]->accept(this);
329 op[i] = this->result;
330 }
331
332 switch (expr->operation) {
333 case ir_unop_logic_not:
334 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1));
335 inst->conditional_mod = BRW_CONDITIONAL_Z;
336 break;
337
338 case ir_binop_logic_xor:
339 inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]);
340 inst->conditional_mod = BRW_CONDITIONAL_NZ;
341 break;
342
343 case ir_binop_logic_or:
344 inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]);
345 inst->conditional_mod = BRW_CONDITIONAL_NZ;
346 break;
347
348 case ir_binop_logic_and:
349 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]);
350 inst->conditional_mod = BRW_CONDITIONAL_NZ;
351 break;
352
353 case ir_unop_f2b:
354 if (intel->gen >= 6) {
355 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f));
356 } else {
357 inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]);
358 }
359 inst->conditional_mod = BRW_CONDITIONAL_NZ;
360 break;
361
362 case ir_unop_i2b:
363 if (intel->gen >= 6) {
364 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
365 } else {
366 inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]);
367 }
368 inst->conditional_mod = BRW_CONDITIONAL_NZ;
369 break;
370
371 case ir_binop_greater:
372 case ir_binop_gequal:
373 case ir_binop_less:
374 case ir_binop_lequal:
375 case ir_binop_equal:
376 case ir_binop_all_equal:
377 case ir_binop_nequal:
378 case ir_binop_any_nequal:
379 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
380 inst->conditional_mod =
381 brw_conditional_for_comparison(expr->operation);
382 break;
383
384 default:
385 assert(!"not reached");
386 break;
387 }
388 return;
389 }
390
391 ir->accept(this);
392
393 if (intel->gen >= 6) {
394 vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(),
395 this->result, src_reg(1));
396 inst->conditional_mod = BRW_CONDITIONAL_NZ;
397 } else {
398 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result);
399 inst->conditional_mod = BRW_CONDITIONAL_NZ;
400 }
401 }
402
403 /**
404 * Emit a gen6 IF statement with the comparison folded into the IF
405 * instruction.
406 */
407 void
408 vec4_visitor::emit_if_gen6(ir_if *ir)
409 {
410 ir_expression *expr = ir->condition->as_expression();
411
412 if (expr) {
413 src_reg op[2];
414 vec4_instruction *inst;
415 dst_reg temp;
416
417 assert(expr->get_num_operands() <= 2);
418 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
419 assert(expr->operands[i]->type->is_scalar());
420
421 expr->operands[i]->accept(this);
422 op[i] = this->result;
423 }
424
425 switch (expr->operation) {
426 case ir_unop_logic_not:
427 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
428 inst->conditional_mod = BRW_CONDITIONAL_Z;
429 return;
430
431 case ir_binop_logic_xor:
432 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
433 inst->conditional_mod = BRW_CONDITIONAL_NZ;
434 return;
435
436 case ir_binop_logic_or:
437 temp = dst_reg(this, glsl_type::bool_type);
438 emit(BRW_OPCODE_OR, temp, op[0], op[1]);
439 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
440 inst->conditional_mod = BRW_CONDITIONAL_NZ;
441 return;
442
443 case ir_binop_logic_and:
444 temp = dst_reg(this, glsl_type::bool_type);
445 emit(BRW_OPCODE_AND, temp, op[0], op[1]);
446 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
447 inst->conditional_mod = BRW_CONDITIONAL_NZ;
448 return;
449
450 case ir_unop_f2b:
451 inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0));
452 inst->conditional_mod = BRW_CONDITIONAL_NZ;
453 return;
454
455 case ir_unop_i2b:
456 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
457 inst->conditional_mod = BRW_CONDITIONAL_NZ;
458 return;
459
460 case ir_binop_greater:
461 case ir_binop_gequal:
462 case ir_binop_less:
463 case ir_binop_lequal:
464 case ir_binop_equal:
465 case ir_binop_all_equal:
466 case ir_binop_nequal:
467 case ir_binop_any_nequal:
468 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
469 inst->conditional_mod =
470 brw_conditional_for_comparison(expr->operation);
471 return;
472 default:
473 assert(!"not reached");
474 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
475 inst->conditional_mod = BRW_CONDITIONAL_NZ;
476 return;
477 }
478 return;
479 }
480
481 ir->condition->accept(this);
482
483 vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(),
484 this->result, src_reg(0));
485 inst->conditional_mod = BRW_CONDITIONAL_NZ;
486 }
487
488 void
489 vec4_visitor::visit(ir_variable *ir)
490 {
491 dst_reg *reg = NULL;
492
493 if (variable_storage(ir))
494 return;
495
496 switch (ir->mode) {
497 case ir_var_in:
498 reg = new(mem_ctx) dst_reg(ATTR, ir->location);
499 reg->type = brw_type_for_base_type(ir->type);
500 hash_table_insert(this->variable_ht, reg, ir);
501 break;
502
503 case ir_var_out:
504 reg = new(mem_ctx) dst_reg(this, ir->type);
505 hash_table_insert(this->variable_ht, reg, ir);
506
507 for (int i = 0; i < type_size(ir->type); i++) {
508 output_reg[ir->location + i] = *reg;
509 output_reg[ir->location + i].reg_offset = i;
510 }
511 break;
512
513 case ir_var_temporary:
514 reg = new(mem_ctx) dst_reg(this, ir->type);
515 hash_table_insert(this->variable_ht, reg, ir);
516
517 break;
518
519 case ir_var_uniform:
520 /* FINISHME: uniforms */
521 break;
522 }
523 }
524
525 void
526 vec4_visitor::visit(ir_loop *ir)
527 {
528 ir_dereference_variable *counter = NULL;
529
530 /* We don't want debugging output to print the whole body of the
531 * loop as the annotation.
532 */
533 this->base_ir = NULL;
534
535 if (ir->counter != NULL)
536 counter = new(ir) ir_dereference_variable(ir->counter);
537
538 if (ir->from != NULL) {
539 assert(ir->counter != NULL);
540
541 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
542
543 a->accept(this);
544 delete a;
545 }
546
547 emit(BRW_OPCODE_DO);
548
549 if (ir->to) {
550 ir_expression *e =
551 new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
552 counter, ir->to);
553 ir_if *if_stmt = new(ir) ir_if(e);
554
555 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
556
557 if_stmt->then_instructions.push_tail(brk);
558
559 if_stmt->accept(this);
560
561 delete if_stmt;
562 delete e;
563 delete brk;
564 }
565
566 visit_instructions(&ir->body_instructions);
567
568 if (ir->increment) {
569 ir_expression *e =
570 new(ir) ir_expression(ir_binop_add, counter->type,
571 counter, ir->increment);
572
573 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
574
575 a->accept(this);
576 delete a;
577 delete e;
578 }
579
580 emit(BRW_OPCODE_WHILE);
581 }
582
583 void
584 vec4_visitor::visit(ir_loop_jump *ir)
585 {
586 switch (ir->mode) {
587 case ir_loop_jump::jump_break:
588 emit(BRW_OPCODE_BREAK);
589 break;
590 case ir_loop_jump::jump_continue:
591 emit(BRW_OPCODE_CONTINUE);
592 break;
593 }
594 }
595
596
597 void
598 vec4_visitor::visit(ir_function_signature *ir)
599 {
600 assert(0);
601 (void)ir;
602 }
603
604 void
605 vec4_visitor::visit(ir_function *ir)
606 {
607 /* Ignore function bodies other than main() -- we shouldn't see calls to
608 * them since they should all be inlined.
609 */
610 if (strcmp(ir->name, "main") == 0) {
611 const ir_function_signature *sig;
612 exec_list empty;
613
614 sig = ir->matching_signature(&empty);
615
616 assert(sig);
617
618 visit_instructions(&sig->body);
619 }
620 }
621
622 GLboolean
623 vec4_visitor::try_emit_sat(ir_expression *ir)
624 {
625 ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
626 if (!sat_src)
627 return false;
628
629 sat_src->accept(this);
630 src_reg src = this->result;
631
632 this->result = src_reg(this, ir->type);
633 vec4_instruction *inst;
634 inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src);
635 inst->saturate = true;
636
637 return true;
638 }
639
640 void
641 vec4_visitor::emit_bool_comparison(unsigned int op,
642 dst_reg dst, src_reg src0, src_reg src1)
643 {
644 /* original gen4 does destination conversion before comparison. */
645 if (intel->gen < 5)
646 dst.type = src0.type;
647
648 vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1);
649 inst->conditional_mod = brw_conditional_for_comparison(op);
650
651 dst.type = BRW_REGISTER_TYPE_D;
652 emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1));
653 }
654
655 void
656 vec4_visitor::visit(ir_expression *ir)
657 {
658 unsigned int operand;
659 src_reg op[Elements(ir->operands)];
660 src_reg result_src;
661 dst_reg result_dst;
662 vec4_instruction *inst;
663
664 if (try_emit_sat(ir))
665 return;
666
667 for (operand = 0; operand < ir->get_num_operands(); operand++) {
668 this->result.file = BAD_FILE;
669 ir->operands[operand]->accept(this);
670 if (this->result.file == BAD_FILE) {
671 printf("Failed to get tree for expression operand:\n");
672 ir->operands[operand]->print();
673 exit(1);
674 }
675 op[operand] = this->result;
676
677 /* Matrix expression operands should have been broken down to vector
678 * operations already.
679 */
680 assert(!ir->operands[operand]->type->is_matrix());
681 }
682
683 int vector_elements = ir->operands[0]->type->vector_elements;
684 if (ir->operands[1]) {
685 vector_elements = MAX2(vector_elements,
686 ir->operands[1]->type->vector_elements);
687 }
688
689 this->result.file = BAD_FILE;
690
691 /* Storage for our result. Ideally for an assignment we'd be using
692 * the actual storage for the result here, instead.
693 */
694 result_src = src_reg(this, ir->type);
695 /* convenience for the emit functions below. */
696 result_dst = dst_reg(result_src);
697 /* If nothing special happens, this is the result. */
698 this->result = result_src;
699 /* Limit writes to the channels that will be used by result_src later.
700 * This does limit this temp's use as a temporary for multi-instruction
701 * sequences.
702 */
703 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
704
705 switch (ir->operation) {
706 case ir_unop_logic_not:
707 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
708 * ones complement of the whole register, not just bit 0.
709 */
710 emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1));
711 break;
712 case ir_unop_neg:
713 op[0].negate = !op[0].negate;
714 this->result = op[0];
715 break;
716 case ir_unop_abs:
717 op[0].abs = true;
718 op[0].negate = false;
719 this->result = op[0];
720 break;
721
722 case ir_unop_sign:
723 emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f));
724
725 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
726 inst->conditional_mod = BRW_CONDITIONAL_G;
727 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f));
728 inst->predicate = BRW_PREDICATE_NORMAL;
729
730 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
731 inst->conditional_mod = BRW_CONDITIONAL_L;
732 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f));
733 inst->predicate = BRW_PREDICATE_NORMAL;
734
735 break;
736
737 case ir_unop_rcp:
738 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
739 break;
740
741 case ir_unop_exp2:
742 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
743 break;
744 case ir_unop_log2:
745 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
746 break;
747 case ir_unop_exp:
748 case ir_unop_log:
749 assert(!"not reached: should be handled by ir_explog_to_explog2");
750 break;
751 case ir_unop_sin:
752 case ir_unop_sin_reduced:
753 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
754 break;
755 case ir_unop_cos:
756 case ir_unop_cos_reduced:
757 emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
758 break;
759
760 case ir_unop_dFdx:
761 case ir_unop_dFdy:
762 assert(!"derivatives not valid in vertex shader");
763 break;
764
765 case ir_unop_noise:
766 assert(!"not reached: should be handled by lower_noise");
767 break;
768
769 case ir_binop_add:
770 emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]);
771 break;
772 case ir_binop_sub:
773 assert(!"not reached: should be handled by ir_sub_to_add_neg");
774 break;
775
776 case ir_binop_mul:
777 emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
778 break;
779 case ir_binop_div:
780 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
781 case ir_binop_mod:
782 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
783 break;
784
785 case ir_binop_less:
786 case ir_binop_greater:
787 case ir_binop_lequal:
788 case ir_binop_gequal:
789 case ir_binop_equal:
790 case ir_binop_nequal: {
791 dst_reg temp = result_dst;
792 /* original gen4 does implicit conversion before comparison. */
793 if (intel->gen < 5)
794 temp.type = op[0].type;
795
796 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
797 inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
798 emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1));
799 break;
800 }
801
802 case ir_binop_all_equal:
803 /* "==" operator producing a scalar boolean. */
804 if (ir->operands[0]->type->is_vector() ||
805 ir->operands[1]->type->is_vector()) {
806 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
807 inst->conditional_mod = BRW_CONDITIONAL_Z;
808
809 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
810 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
811 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
812 } else {
813 dst_reg temp = result_dst;
814 /* original gen4 does implicit conversion before comparison. */
815 if (intel->gen < 5)
816 temp.type = op[0].type;
817
818 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
819 inst->conditional_mod = BRW_CONDITIONAL_NZ;
820 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
821 }
822 break;
823 case ir_binop_any_nequal:
824 /* "!=" operator producing a scalar boolean. */
825 if (ir->operands[0]->type->is_vector() ||
826 ir->operands[1]->type->is_vector()) {
827 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
828 inst->conditional_mod = BRW_CONDITIONAL_NZ;
829
830 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
831 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
832 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
833 } else {
834 dst_reg temp = result_dst;
835 /* original gen4 does implicit conversion before comparison. */
836 if (intel->gen < 5)
837 temp.type = op[0].type;
838
839 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
840 inst->conditional_mod = BRW_CONDITIONAL_NZ;
841 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
842 }
843 break;
844
845 case ir_unop_any:
846 emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
847 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
848
849 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
850 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
851 break;
852
853 case ir_binop_logic_xor:
854 emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
855 break;
856
857 case ir_binop_logic_or:
858 emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
859 break;
860
861 case ir_binop_logic_and:
862 emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
863 break;
864
865 case ir_binop_dot:
866 assert(ir->operands[0]->type->is_vector());
867 assert(ir->operands[0]->type == ir->operands[1]->type);
868 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
869 break;
870
871 case ir_unop_sqrt:
872 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
873 break;
874 case ir_unop_rsq:
875 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
876 break;
877 case ir_unop_i2f:
878 case ir_unop_i2u:
879 case ir_unop_u2i:
880 case ir_unop_u2f:
881 case ir_unop_b2f:
882 case ir_unop_b2i:
883 case ir_unop_f2i:
884 emit(BRW_OPCODE_MOV, result_dst, op[0]);
885 break;
886 case ir_unop_f2b:
887 case ir_unop_i2b: {
888 dst_reg temp = result_dst;
889 /* original gen4 does implicit conversion before comparison. */
890 if (intel->gen < 5)
891 temp.type = op[0].type;
892
893 inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f));
894 inst->conditional_mod = BRW_CONDITIONAL_NZ;
895 inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1));
896 break;
897 }
898
899 case ir_unop_trunc:
900 emit(BRW_OPCODE_RNDZ, result_dst, op[0]);
901 break;
902 case ir_unop_ceil:
903 op[0].negate = !op[0].negate;
904 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
905 this->result.negate = true;
906 break;
907 case ir_unop_floor:
908 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
909 break;
910 case ir_unop_fract:
911 inst = emit(BRW_OPCODE_FRC, result_dst, op[0]);
912 break;
913 case ir_unop_round_even:
914 emit(BRW_OPCODE_RNDE, result_dst, op[0]);
915 break;
916
917 case ir_binop_min:
918 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
919 inst->conditional_mod = BRW_CONDITIONAL_L;
920
921 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
922 inst->predicate = BRW_PREDICATE_NORMAL;
923 break;
924 case ir_binop_max:
925 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
926 inst->conditional_mod = BRW_CONDITIONAL_G;
927
928 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
929 inst->predicate = BRW_PREDICATE_NORMAL;
930 break;
931
932 case ir_binop_pow:
933 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
934 break;
935
936 case ir_unop_bit_not:
937 inst = emit(BRW_OPCODE_NOT, result_dst, op[0]);
938 break;
939 case ir_binop_bit_and:
940 inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
941 break;
942 case ir_binop_bit_xor:
943 inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
944 break;
945 case ir_binop_bit_or:
946 inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
947 break;
948
949 case ir_binop_lshift:
950 case ir_binop_rshift:
951 assert(!"GLSL 1.30 features unsupported");
952 break;
953
954 case ir_quadop_vector:
955 assert(!"not reached: should be handled by lower_quadop_vector");
956 break;
957 }
958 }
959
960
961 void
962 vec4_visitor::visit(ir_swizzle *ir)
963 {
964 src_reg src;
965 int i = 0;
966 int swizzle[4];
967
968 /* Note that this is only swizzles in expressions, not those on the left
969 * hand side of an assignment, which do write masking. See ir_assignment
970 * for that.
971 */
972
973 ir->val->accept(this);
974 src = this->result;
975 assert(src.file != BAD_FILE);
976
977 if (i < ir->type->vector_elements) {
978 switch (i) {
979 case 0:
980 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
981 break;
982 case 1:
983 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
984 break;
985 case 2:
986 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
987 break;
988 case 3:
989 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
990 break;
991 }
992 }
993 for (; i < 4; i++) {
994 /* Replicate the last channel out. */
995 swizzle[i] = swizzle[ir->type->vector_elements - 1];
996 }
997
998 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
999
1000 this->result = src;
1001 }
1002
1003 void
1004 vec4_visitor::visit(ir_dereference_variable *ir)
1005 {
1006 dst_reg *reg = variable_storage(ir->var);
1007
1008 if (!reg) {
1009 fail("Failed to find variable storage for %s\n", ir->var->name);
1010 this->result = src_reg(brw_null_reg());
1011 return;
1012 }
1013
1014 this->result = src_reg(*reg);
1015 }
1016
1017 void
1018 vec4_visitor::visit(ir_dereference_array *ir)
1019 {
1020 ir_constant *constant_index;
1021 src_reg src;
1022 int element_size = type_size(ir->type);
1023
1024 constant_index = ir->array_index->constant_expression_value();
1025
1026 ir->array->accept(this);
1027 src = this->result;
1028
1029 if (constant_index) {
1030 src.reg_offset += constant_index->value.i[0] * element_size;
1031 } else {
1032 #if 0 /* Variable array index */
1033 /* Variable index array dereference. It eats the "vec4" of the
1034 * base of the array and an index that offsets the Mesa register
1035 * index.
1036 */
1037 ir->array_index->accept(this);
1038
1039 src_reg index_reg;
1040
1041 if (element_size == 1) {
1042 index_reg = this->result;
1043 } else {
1044 index_reg = src_reg(this, glsl_type::float_type);
1045
1046 emit(BRW_OPCODE_MUL, dst_reg(index_reg),
1047 this->result, src_reg_for_float(element_size));
1048 }
1049
1050 src.reladdr = ralloc(mem_ctx, src_reg);
1051 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1052 #endif
1053 }
1054
1055 /* If the type is smaller than a vec4, replicate the last channel out. */
1056 if (ir->type->is_scalar() || ir->type->is_vector())
1057 src.swizzle = swizzle_for_size(ir->type->vector_elements);
1058 else
1059 src.swizzle = BRW_SWIZZLE_NOOP;
1060
1061 this->result = src;
1062 }
1063
1064 void
1065 vec4_visitor::visit(ir_dereference_record *ir)
1066 {
1067 unsigned int i;
1068 const glsl_type *struct_type = ir->record->type;
1069 int offset = 0;
1070
1071 ir->record->accept(this);
1072
1073 for (i = 0; i < struct_type->length; i++) {
1074 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1075 break;
1076 offset += type_size(struct_type->fields.structure[i].type);
1077 }
1078
1079 /* If the type is smaller than a vec4, replicate the last channel out. */
1080 if (ir->type->is_scalar() || ir->type->is_vector())
1081 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1082 else
1083 this->result.swizzle = BRW_SWIZZLE_NOOP;
1084
1085 this->result.reg_offset += offset;
1086 }
1087
1088 /**
1089 * We want to be careful in assignment setup to hit the actual storage
1090 * instead of potentially using a temporary like we might with the
1091 * ir_dereference handler.
1092 */
1093 static dst_reg
1094 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1095 {
1096 /* The LHS must be a dereference. If the LHS is a variable indexed array
1097 * access of a vector, it must be separated into a series conditional moves
1098 * before reaching this point (see ir_vec_index_to_cond_assign).
1099 */
1100 assert(ir->as_dereference());
1101 ir_dereference_array *deref_array = ir->as_dereference_array();
1102 if (deref_array) {
1103 assert(!deref_array->array->type->is_vector());
1104 }
1105
1106 /* Use the rvalue deref handler for the most part. We'll ignore
1107 * swizzles in it and write swizzles using writemask, though.
1108 */
1109 ir->accept(v);
1110 return dst_reg(v->result);
1111 }
1112
1113 void
1114 vec4_visitor::emit_block_move(ir_assignment *ir)
1115 {
1116 ir->rhs->accept(this);
1117 src_reg src = this->result;
1118
1119 dst_reg dst = get_assignment_lhs(ir->lhs, this);
1120
1121 /* FINISHME: This should really set to the correct maximal writemask for each
1122 * FINISHME: component written (in the loops below).
1123 */
1124 dst.writemask = WRITEMASK_XYZW;
1125
1126 for (int i = 0; i < type_size(ir->lhs->type); i++) {
1127 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
1128 if (ir->condition)
1129 inst->predicate = BRW_PREDICATE_NORMAL;
1130
1131 dst.reg_offset++;
1132 src.reg_offset++;
1133 }
1134 }
1135
1136 void
1137 vec4_visitor::visit(ir_assignment *ir)
1138 {
1139 if (!ir->lhs->type->is_scalar() &&
1140 !ir->lhs->type->is_vector()) {
1141 emit_block_move(ir);
1142 return;
1143 }
1144
1145 /* Now we're down to just a scalar/vector with writemasks. */
1146 int i;
1147
1148 ir->rhs->accept(this);
1149 src_reg src = this->result;
1150
1151 dst_reg dst = get_assignment_lhs(ir->lhs, this);
1152
1153 int swizzles[4];
1154 int first_enabled_chan = 0;
1155 int src_chan = 0;
1156
1157 assert(ir->lhs->type->is_vector());
1158 dst.writemask = ir->write_mask;
1159
1160 for (int i = 0; i < 4; i++) {
1161 if (dst.writemask & (1 << i)) {
1162 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1163 break;
1164 }
1165 }
1166
1167 /* Swizzle a small RHS vector into the channels being written.
1168 *
1169 * glsl ir treats write_mask as dictating how many channels are
1170 * present on the RHS while in our instructions we need to make
1171 * those channels appear in the slots of the vec4 they're written to.
1172 */
1173 for (int i = 0; i < 4; i++) {
1174 if (dst.writemask & (1 << i))
1175 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1176 else
1177 swizzles[i] = first_enabled_chan;
1178 }
1179 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1180 swizzles[2], swizzles[3]);
1181
1182 if (ir->condition) {
1183 emit_bool_to_cond_code(ir->condition);
1184 }
1185
1186 for (i = 0; i < type_size(ir->lhs->type); i++) {
1187 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
1188
1189 if (ir->condition)
1190 inst->predicate = BRW_PREDICATE_NORMAL;
1191
1192 dst.reg_offset++;
1193 src.reg_offset++;
1194 }
1195 }
1196
1197
1198 void
1199 vec4_visitor::visit(ir_constant *ir)
1200 {
1201 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1202 src_reg temp_base = src_reg(this, ir->type);
1203 dst_reg temp = dst_reg(temp_base);
1204
1205 foreach_iter(exec_list_iterator, iter, ir->components) {
1206 ir_constant *field_value = (ir_constant *)iter.get();
1207 int size = type_size(field_value->type);
1208
1209 assert(size > 0);
1210
1211 field_value->accept(this);
1212 src_reg src = this->result;
1213
1214 for (int i = 0; i < (unsigned int)size; i++) {
1215 emit(BRW_OPCODE_MOV, temp, src);
1216
1217 src.reg_offset++;
1218 temp.reg_offset++;
1219 }
1220 }
1221 this->result = temp_base;
1222 return;
1223 }
1224
1225 if (ir->type->is_array()) {
1226 src_reg temp_base = src_reg(this, ir->type);
1227 dst_reg temp = dst_reg(temp_base);
1228 int size = type_size(ir->type->fields.array);
1229
1230 assert(size > 0);
1231
1232 for (unsigned int i = 0; i < ir->type->length; i++) {
1233 ir->array_elements[i]->accept(this);
1234 src_reg src = this->result;
1235 for (int j = 0; j < size; j++) {
1236 emit(BRW_OPCODE_MOV, temp, src);
1237
1238 src.reg_offset++;
1239 temp.reg_offset++;
1240 }
1241 }
1242 this->result = temp_base;
1243 return;
1244 }
1245
1246 if (ir->type->is_matrix()) {
1247 this->result = src_reg(this, ir->type);
1248 dst_reg dst = dst_reg(this->result);
1249
1250 assert(ir->type->base_type == GLSL_TYPE_FLOAT);
1251
1252 for (int i = 0; i < ir->type->matrix_columns; i++) {
1253 for (int j = 0; j < ir->type->vector_elements; j++) {
1254 dst.writemask = 1 << j;
1255 emit(BRW_OPCODE_MOV, dst,
1256 src_reg(ir->value.f[i * ir->type->vector_elements + j]));
1257 }
1258 dst.reg_offset++;
1259 }
1260 return;
1261 }
1262
1263 for (int i = 0; i < ir->type->vector_elements; i++) {
1264 this->result = src_reg(this, ir->type);
1265 dst_reg dst = dst_reg(this->result);
1266
1267 dst.writemask = 1 << i;
1268
1269 switch (ir->type->base_type) {
1270 case GLSL_TYPE_FLOAT:
1271 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.f[i]));
1272 break;
1273 case GLSL_TYPE_INT:
1274 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.i[i]));
1275 break;
1276 case GLSL_TYPE_UINT:
1277 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.u[i]));
1278 break;
1279 case GLSL_TYPE_BOOL:
1280 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.b[i]));
1281 break;
1282 default:
1283 assert(!"Non-float/uint/int/bool constant");
1284 break;
1285 }
1286 }
1287 }
1288
1289 void
1290 vec4_visitor::visit(ir_call *ir)
1291 {
1292 assert(!"not reached");
1293 }
1294
1295 void
1296 vec4_visitor::visit(ir_texture *ir)
1297 {
1298 assert(!"not reached");
1299 }
1300
1301 void
1302 vec4_visitor::visit(ir_return *ir)
1303 {
1304 assert(!"not reached");
1305 }
1306
1307 void
1308 vec4_visitor::visit(ir_discard *ir)
1309 {
1310 assert(!"not reached");
1311 }
1312
1313 void
1314 vec4_visitor::visit(ir_if *ir)
1315 {
1316 this->base_ir = ir->condition;
1317 ir->condition->accept(this);
1318 assert(this->result.file != BAD_FILE);
1319
1320 /* FINISHME: condcode */
1321 emit(BRW_OPCODE_IF);
1322
1323 visit_instructions(&ir->then_instructions);
1324
1325 if (!ir->else_instructions.is_empty()) {
1326 this->base_ir = ir->condition;
1327 emit(BRW_OPCODE_ELSE);
1328
1329 visit_instructions(&ir->else_instructions);
1330 }
1331
1332 this->base_ir = ir->condition;
1333 emit(BRW_OPCODE_ENDIF);
1334 }
1335
1336 int
1337 vec4_visitor::emit_vue_header_gen4(int header_mrf)
1338 {
1339 /* Get the position */
1340 src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
1341
1342 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
1343 dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
1344
1345 current_annotation = "NDC";
1346 dst_reg ndc_w = ndc;
1347 ndc_w.writemask = WRITEMASK_W;
1348 src_reg pos_w = pos;
1349 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
1350 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
1351
1352 dst_reg ndc_xyz = ndc;
1353 ndc_xyz.writemask = WRITEMASK_XYZ;
1354
1355 emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w));
1356
1357 if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
1358 c->key.nr_userclip || brw->has_negative_rhw_bug) {
1359 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
1360 GLuint i;
1361
1362 emit(BRW_OPCODE_MOV, header1, 0u);
1363
1364 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1365 assert(!"finishme: psiz");
1366 src_reg psiz;
1367
1368 header1.writemask = WRITEMASK_W;
1369 emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11);
1370 emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8);
1371 }
1372
1373 for (i = 0; i < c->key.nr_userclip; i++) {
1374 vec4_instruction *inst;
1375
1376 inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()),
1377 pos, src_reg(c->userplane[i]));
1378 inst->conditional_mod = BRW_CONDITIONAL_L;
1379
1380 emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i);
1381 inst->predicate = BRW_PREDICATE_NORMAL;
1382 }
1383
1384 /* i965 clipping workaround:
1385 * 1) Test for -ve rhw
1386 * 2) If set,
1387 * set ndc = (0,0,0,0)
1388 * set ucp[6] = 1
1389 *
1390 * Later, clipping will detect ucp[6] and ensure the primitive is
1391 * clipped against all fixed planes.
1392 */
1393 if (brw->has_negative_rhw_bug) {
1394 #if 0
1395 /* FINISHME */
1396 brw_CMP(p,
1397 vec8(brw_null_reg()),
1398 BRW_CONDITIONAL_L,
1399 brw_swizzle1(ndc, 3),
1400 brw_imm_f(0));
1401
1402 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
1403 brw_MOV(p, ndc, brw_imm_f(0));
1404 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1405 #endif
1406 }
1407
1408 header1.writemask = WRITEMASK_XYZW;
1409 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1));
1410 } else {
1411 emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++),
1412 BRW_REGISTER_TYPE_UD), 0u);
1413 }
1414
1415 if (intel->gen == 5) {
1416 /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
1417 * dword 0-3 (m1) of the header is indices, point width, clip flags.
1418 * dword 4-7 (m2) is the ndc position (set above)
1419 * dword 8-11 (m3) of the vertex header is the 4D space position
1420 * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
1421 * m6 is a pad so that the vertex element data is aligned
1422 * m7 is the first vertex data we fill, which is the vertex position.
1423 */
1424 current_annotation = "NDC";
1425 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1426
1427 current_annotation = "gl_Position";
1428 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1429
1430 /* user clip distance. */
1431 header_mrf += 2;
1432
1433 /* Pad so that vertex element data (starts with position) is aligned. */
1434 header_mrf++;
1435 } else {
1436 /* There are 8 dwords in VUE header pre-Ironlake:
1437 * dword 0-3 (m1) is indices, point width, clip flags.
1438 * dword 4-7 (m2) is ndc position (set above)
1439 *
1440 * dword 8-11 (m3) is the first vertex data, which we always have be the
1441 * vertex position.
1442 */
1443 current_annotation = "NDC";
1444 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1445
1446 current_annotation = "gl_Position";
1447 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1448 }
1449
1450 return header_mrf;
1451 }
1452
1453 int
1454 vec4_visitor::emit_vue_header_gen6(int header_mrf)
1455 {
1456 struct brw_reg reg;
1457
1458 /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
1459 * dword 0-3 (m2) of the header is indices, point width, clip flags.
1460 * dword 4-7 (m3) is the 4D space position
1461 * dword 8-15 (m4,m5) of the vertex header is the user clip distance if
1462 * enabled.
1463 *
1464 * m4 or 6 is the first vertex element data we fill, which is
1465 * the vertex position.
1466 */
1467
1468 current_annotation = "indices, point width, clip flags";
1469 reg = brw_message_reg(header_mrf++);
1470 emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0));
1471 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1472 emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W),
1473 src_reg(output_reg[VERT_RESULT_PSIZ]));
1474 }
1475
1476 current_annotation = "gl_Position";
1477 emit(BRW_OPCODE_MOV,
1478 brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS]));
1479
1480 current_annotation = "user clip distances";
1481 if (c->key.nr_userclip) {
1482 for (int i = 0; i < c->key.nr_userclip; i++) {
1483 struct brw_reg m;
1484 if (i < 4)
1485 m = brw_message_reg(header_mrf);
1486 else
1487 m = brw_message_reg(header_mrf + 1);
1488
1489 emit(BRW_OPCODE_DP4,
1490 dst_reg(brw_writemask(m, 1 << (i & 7))),
1491 src_reg(c->userplane[i]));
1492 }
1493 header_mrf += 2;
1494 }
1495
1496 current_annotation = NULL;
1497
1498 return header_mrf;
1499 }
1500
1501 static int
1502 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
1503 {
1504 struct intel_context *intel = &brw->intel;
1505
1506 if (intel->gen >= 6) {
1507 /* URB data written (does not include the message header reg) must
1508 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
1509 * section 5.4.3.2.2: URB_INTERLEAVED.
1510 *
1511 * URB entries are allocated on a multiple of 1024 bits, so an
1512 * extra 128 bits written here to make the end align to 256 is
1513 * no problem.
1514 */
1515 if ((mlen % 2) != 1)
1516 mlen++;
1517 }
1518
1519 return mlen;
1520 }
1521
1522 /**
1523 * Generates the VUE payload plus the 1 or 2 URB write instructions to
1524 * complete the VS thread.
1525 *
1526 * The VUE layout is documented in Volume 2a.
1527 */
1528 void
1529 vec4_visitor::emit_urb_writes()
1530 {
1531 int base_mrf = 1;
1532 int mrf = base_mrf;
1533 int urb_entry_size;
1534
1535 /* FINISHME: edgeflag */
1536
1537 /* First mrf is the g0-based message header containing URB handles and such,
1538 * which is implied in VS_OPCODE_URB_WRITE.
1539 */
1540 mrf++;
1541
1542 if (intel->gen >= 6) {
1543 mrf = emit_vue_header_gen6(mrf);
1544 } else {
1545 mrf = emit_vue_header_gen4(mrf);
1546 }
1547
1548 int attr;
1549 for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
1550 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1551 continue;
1552
1553 /* This is loaded into the VUE header, and thus doesn't occupy
1554 * an attribute slot.
1555 */
1556 if (attr == VERT_RESULT_PSIZ)
1557 continue;
1558
1559 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
1560
1561 /* If this is MRF 15, we can't fit anything more into this URB
1562 * WRITE. Note that base_mrf of 1 means that MRF 15 is an
1563 * even-numbered amount of URB write data, which will meet
1564 * gen6's requirements for length alignment.
1565 */
1566 if (mrf == 15)
1567 break;
1568 }
1569
1570 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
1571 inst->base_mrf = base_mrf;
1572 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1573 inst->eot = true;
1574
1575 urb_entry_size = mrf - base_mrf;
1576
1577 for (; attr < VERT_RESULT_MAX; attr++) {
1578 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1579 continue;
1580 fail("Second URB write not supported.\n");
1581 break;
1582 }
1583
1584 if (intel->gen == 6)
1585 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8;
1586 else
1587 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4;
1588 }
1589
1590 vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
1591 struct gl_shader_program *prog,
1592 struct brw_shader *shader)
1593 {
1594 this->c = c;
1595 this->p = &c->func;
1596 this->brw = p->brw;
1597 this->intel = &brw->intel;
1598 this->ctx = &intel->ctx;
1599 this->prog = prog;
1600 this->shader = shader;
1601
1602 this->mem_ctx = ralloc_context(NULL);
1603 this->failed = false;
1604
1605 this->base_ir = NULL;
1606 this->current_annotation = NULL;
1607
1608 this->c = c;
1609 this->prog_data = &c->prog_data;
1610
1611 this->variable_ht = hash_table_ctor(0,
1612 hash_table_pointer_hash,
1613 hash_table_pointer_compare);
1614
1615 this->virtual_grf_sizes = NULL;
1616 this->virtual_grf_count = 0;
1617 this->virtual_grf_array_size = 0;
1618 }
1619
1620 vec4_visitor::~vec4_visitor()
1621 {
1622 hash_table_dtor(this->variable_ht);
1623 }
1624
1625
1626 void
1627 vec4_visitor::fail(const char *format, ...)
1628 {
1629 va_list va;
1630 char *msg;
1631
1632 if (failed)
1633 return;
1634
1635 failed = true;
1636
1637 va_start(va, format);
1638 msg = ralloc_vasprintf(mem_ctx, format, va);
1639 va_end(va);
1640 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
1641
1642 this->fail_msg = msg;
1643
1644 if (INTEL_DEBUG & DEBUG_VS) {
1645 fprintf(stderr, "%s", msg);
1646 }
1647 }
1648
1649 } /* namespace brw */