i965/vs: Convert gen6 userclip handling to new generators.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_visitor.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25 extern "C" {
26 #include "main/macros.h"
27 #include "program/prog_parameter.h"
28 }
29
30 namespace brw {
31
32 src_reg::src_reg(dst_reg reg)
33 {
34 init();
35
36 this->file = reg.file;
37 this->reg = reg.reg;
38 this->reg_offset = reg.reg_offset;
39 this->type = reg.type;
40 this->reladdr = reg.reladdr;
41 this->fixed_hw_reg = reg.fixed_hw_reg;
42
43 int swizzles[4];
44 int next_chan = 0;
45 int last = 0;
46
47 for (int i = 0; i < 4; i++) {
48 if (!(reg.writemask & (1 << i)))
49 continue;
50
51 swizzles[next_chan++] = last = i;
52 }
53
54 for (; next_chan < 4; next_chan++) {
55 swizzles[next_chan] = last;
56 }
57
58 this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
59 swizzles[2], swizzles[3]);
60 }
61
62 dst_reg::dst_reg(src_reg reg)
63 {
64 init();
65
66 this->file = reg.file;
67 this->reg = reg.reg;
68 this->reg_offset = reg.reg_offset;
69 this->type = reg.type;
70 this->writemask = WRITEMASK_XYZW;
71 this->reladdr = reg.reladdr;
72 this->fixed_hw_reg = reg.fixed_hw_reg;
73 }
74
75 vec4_instruction::vec4_instruction(vec4_visitor *v,
76 enum opcode opcode, dst_reg dst,
77 src_reg src0, src_reg src1, src_reg src2)
78 {
79 this->opcode = opcode;
80 this->dst = dst;
81 this->src[0] = src0;
82 this->src[1] = src1;
83 this->src[2] = src2;
84 this->ir = v->base_ir;
85 this->annotation = v->current_annotation;
86 }
87
88 vec4_instruction *
89 vec4_visitor::emit(vec4_instruction *inst)
90 {
91 this->instructions.push_tail(inst);
92
93 return inst;
94 }
95
96 vec4_instruction *
97 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
98 src_reg src0, src_reg src1, src_reg src2)
99 {
100 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
101 src0, src1, src2));
102 }
103
104
105 vec4_instruction *
106 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
107 {
108 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
109 }
110
111 vec4_instruction *
112 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
113 {
114 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
115 }
116
117 vec4_instruction *
118 vec4_visitor::emit(enum opcode opcode)
119 {
120 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
121 }
122
123 #define ALU1(op) \
124 vec4_instruction * \
125 vec4_visitor::op(dst_reg dst, src_reg src0) \
126 { \
127 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
128 src0); \
129 }
130
131 #define ALU2(op) \
132 vec4_instruction * \
133 vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \
134 { \
135 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
136 src0, src1); \
137 }
138
139 ALU1(NOT)
140 ALU1(MOV)
141 ALU1(FRC)
142 ALU1(RNDD)
143 ALU1(RNDE)
144 ALU1(RNDZ)
145 ALU2(ADD)
146 ALU2(MUL)
147 ALU2(MACH)
148 ALU2(AND)
149 ALU2(OR)
150 ALU2(XOR)
151 ALU2(DP3)
152 ALU2(DP4)
153
154 /** Gen4 predicated IF. */
155 vec4_instruction *
156 vec4_visitor::IF(uint32_t predicate)
157 {
158 vec4_instruction *inst;
159
160 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF);
161 inst->predicate = predicate;
162
163 return inst;
164 }
165
166 /** Gen6+ IF with embedded comparison. */
167 vec4_instruction *
168 vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition)
169 {
170 assert(intel->gen >= 6);
171
172 vec4_instruction *inst;
173
174 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF, dst_null_d(),
175 src0, src1);
176 inst->conditional_mod = condition;
177
178 return inst;
179 }
180
181 vec4_instruction *
182 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition)
183 {
184 vec4_instruction *inst;
185
186 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_CMP, dst,
187 src0, src1, src_reg());
188 inst->conditional_mod = condition;
189
190 return inst;
191 }
192
193 void
194 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
195 {
196 static enum opcode dot_opcodes[] = {
197 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
198 };
199
200 emit(dot_opcodes[elements - 2], dst, src0, src1);
201 }
202
203 void
204 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
205 {
206 /* The gen6 math instruction ignores the source modifiers --
207 * swizzle, abs, negate, and at least some parts of the register
208 * region description.
209 *
210 * While it would seem that this MOV could be avoided at this point
211 * in the case that the swizzle is matched up with the destination
212 * writemask, note that uniform packing and register allocation
213 * could rearrange our swizzle, so let's leave this matter up to
214 * copy propagation later.
215 */
216 src_reg temp_src = src_reg(this, glsl_type::vec4_type);
217 emit(BRW_OPCODE_MOV, dst_reg(temp_src), src);
218
219 if (dst.writemask != WRITEMASK_XYZW) {
220 /* The gen6 math instruction must be align1, so we can't do
221 * writemasks.
222 */
223 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
224
225 emit(opcode, temp_dst, temp_src);
226
227 emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
228 } else {
229 emit(opcode, dst, temp_src);
230 }
231 }
232
233 void
234 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
235 {
236 vec4_instruction *inst = emit(opcode, dst, src);
237 inst->base_mrf = 1;
238 inst->mlen = 1;
239 }
240
241 void
242 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
243 {
244 switch (opcode) {
245 case SHADER_OPCODE_RCP:
246 case SHADER_OPCODE_RSQ:
247 case SHADER_OPCODE_SQRT:
248 case SHADER_OPCODE_EXP2:
249 case SHADER_OPCODE_LOG2:
250 case SHADER_OPCODE_SIN:
251 case SHADER_OPCODE_COS:
252 break;
253 default:
254 assert(!"not reached: bad math opcode");
255 return;
256 }
257
258 if (intel->gen >= 6) {
259 return emit_math1_gen6(opcode, dst, src);
260 } else {
261 return emit_math1_gen4(opcode, dst, src);
262 }
263 }
264
265 void
266 vec4_visitor::emit_math2_gen6(enum opcode opcode,
267 dst_reg dst, src_reg src0, src_reg src1)
268 {
269 src_reg expanded;
270
271 /* The gen6 math instruction ignores the source modifiers --
272 * swizzle, abs, negate, and at least some parts of the register
273 * region description. Move the sources to temporaries to make it
274 * generally work.
275 */
276
277 expanded = src_reg(this, glsl_type::vec4_type);
278 emit(BRW_OPCODE_MOV, dst_reg(expanded), src0);
279 src0 = expanded;
280
281 expanded = src_reg(this, glsl_type::vec4_type);
282 emit(BRW_OPCODE_MOV, dst_reg(expanded), src1);
283 src1 = expanded;
284
285 if (dst.writemask != WRITEMASK_XYZW) {
286 /* The gen6 math instruction must be align1, so we can't do
287 * writemasks.
288 */
289 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
290
291 emit(opcode, temp_dst, src0, src1);
292
293 emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
294 } else {
295 emit(opcode, dst, src0, src1);
296 }
297 }
298
299 void
300 vec4_visitor::emit_math2_gen4(enum opcode opcode,
301 dst_reg dst, src_reg src0, src_reg src1)
302 {
303 vec4_instruction *inst = emit(opcode, dst, src0, src1);
304 inst->base_mrf = 1;
305 inst->mlen = 2;
306 }
307
308 void
309 vec4_visitor::emit_math(enum opcode opcode,
310 dst_reg dst, src_reg src0, src_reg src1)
311 {
312 assert(opcode == SHADER_OPCODE_POW);
313
314 if (intel->gen >= 6) {
315 return emit_math2_gen6(opcode, dst, src0, src1);
316 } else {
317 return emit_math2_gen4(opcode, dst, src0, src1);
318 }
319 }
320
321 void
322 vec4_visitor::visit_instructions(const exec_list *list)
323 {
324 foreach_list(node, list) {
325 ir_instruction *ir = (ir_instruction *)node;
326
327 base_ir = ir;
328 ir->accept(this);
329 }
330 }
331
332
333 static int
334 type_size(const struct glsl_type *type)
335 {
336 unsigned int i;
337 int size;
338
339 switch (type->base_type) {
340 case GLSL_TYPE_UINT:
341 case GLSL_TYPE_INT:
342 case GLSL_TYPE_FLOAT:
343 case GLSL_TYPE_BOOL:
344 if (type->is_matrix()) {
345 return type->matrix_columns;
346 } else {
347 /* Regardless of size of vector, it gets a vec4. This is bad
348 * packing for things like floats, but otherwise arrays become a
349 * mess. Hopefully a later pass over the code can pack scalars
350 * down if appropriate.
351 */
352 return 1;
353 }
354 case GLSL_TYPE_ARRAY:
355 assert(type->length > 0);
356 return type_size(type->fields.array) * type->length;
357 case GLSL_TYPE_STRUCT:
358 size = 0;
359 for (i = 0; i < type->length; i++) {
360 size += type_size(type->fields.structure[i].type);
361 }
362 return size;
363 case GLSL_TYPE_SAMPLER:
364 /* Samplers take up one slot in UNIFORMS[], but they're baked in
365 * at link time.
366 */
367 return 1;
368 default:
369 assert(0);
370 return 0;
371 }
372 }
373
374 int
375 vec4_visitor::virtual_grf_alloc(int size)
376 {
377 if (virtual_grf_array_size <= virtual_grf_count) {
378 if (virtual_grf_array_size == 0)
379 virtual_grf_array_size = 16;
380 else
381 virtual_grf_array_size *= 2;
382 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
383 virtual_grf_array_size);
384 }
385 virtual_grf_sizes[virtual_grf_count] = size;
386 return virtual_grf_count++;
387 }
388
389 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
390 {
391 init();
392
393 this->file = GRF;
394 this->reg = v->virtual_grf_alloc(type_size(type));
395
396 if (type->is_array() || type->is_record()) {
397 this->swizzle = BRW_SWIZZLE_NOOP;
398 } else {
399 this->swizzle = swizzle_for_size(type->vector_elements);
400 }
401
402 this->type = brw_type_for_base_type(type);
403 }
404
405 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
406 {
407 init();
408
409 this->file = GRF;
410 this->reg = v->virtual_grf_alloc(type_size(type));
411
412 if (type->is_array() || type->is_record()) {
413 this->writemask = WRITEMASK_XYZW;
414 } else {
415 this->writemask = (1 << type->vector_elements) - 1;
416 }
417
418 this->type = brw_type_for_base_type(type);
419 }
420
421 /* Our support for uniforms is piggy-backed on the struct
422 * gl_fragment_program, because that's where the values actually
423 * get stored, rather than in some global gl_shader_program uniform
424 * store.
425 */
426 int
427 vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
428 {
429 unsigned int offset = 0;
430 float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
431
432 if (type->is_matrix()) {
433 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
434 type->vector_elements,
435 1);
436
437 for (unsigned int i = 0; i < type->matrix_columns; i++) {
438 offset += setup_uniform_values(loc + offset, column);
439 }
440
441 return offset;
442 }
443
444 switch (type->base_type) {
445 case GLSL_TYPE_FLOAT:
446 case GLSL_TYPE_UINT:
447 case GLSL_TYPE_INT:
448 case GLSL_TYPE_BOOL:
449 for (unsigned int i = 0; i < type->vector_elements; i++) {
450 c->prog_data.param[this->uniforms * 4 + i] = &values[i];
451 }
452
453 /* Set up pad elements to get things aligned to a vec4 boundary. */
454 for (unsigned int i = type->vector_elements; i < 4; i++) {
455 static float zero = 0;
456
457 c->prog_data.param[this->uniforms * 4 + i] = &zero;
458 }
459
460 /* Track the size of this uniform vector, for future packing of
461 * uniforms.
462 */
463 this->uniform_vector_size[this->uniforms] = type->vector_elements;
464 this->uniforms++;
465
466 return 1;
467
468 case GLSL_TYPE_STRUCT:
469 for (unsigned int i = 0; i < type->length; i++) {
470 offset += setup_uniform_values(loc + offset,
471 type->fields.structure[i].type);
472 }
473 return offset;
474
475 case GLSL_TYPE_ARRAY:
476 for (unsigned int i = 0; i < type->length; i++) {
477 offset += setup_uniform_values(loc + offset, type->fields.array);
478 }
479 return offset;
480
481 case GLSL_TYPE_SAMPLER:
482 /* The sampler takes up a slot, but we don't use any values from it. */
483 return 1;
484
485 default:
486 assert(!"not reached");
487 return 0;
488 }
489 }
490
491 /* Our support for builtin uniforms is even scarier than non-builtin.
492 * It sits on top of the PROG_STATE_VAR parameters that are
493 * automatically updated from GL context state.
494 */
495 void
496 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
497 {
498 const ir_state_slot *const slots = ir->state_slots;
499 assert(ir->state_slots != NULL);
500
501 for (unsigned int i = 0; i < ir->num_state_slots; i++) {
502 /* This state reference has already been setup by ir_to_mesa,
503 * but we'll get the same index back here. We can reference
504 * ParameterValues directly, since unlike brw_fs.cpp, we never
505 * add new state references during compile.
506 */
507 int index = _mesa_add_state_reference(this->vp->Base.Parameters,
508 (gl_state_index *)slots[i].tokens);
509 float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
510
511 this->uniform_vector_size[this->uniforms] = 0;
512 /* Add each of the unique swizzled channels of the element.
513 * This will end up matching the size of the glsl_type of this field.
514 */
515 int last_swiz = -1;
516 for (unsigned int j = 0; j < 4; j++) {
517 int swiz = GET_SWZ(slots[i].swizzle, j);
518 last_swiz = swiz;
519
520 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
521 if (swiz <= last_swiz)
522 this->uniform_vector_size[this->uniforms]++;
523 }
524 this->uniforms++;
525 }
526 }
527
528 dst_reg *
529 vec4_visitor::variable_storage(ir_variable *var)
530 {
531 return (dst_reg *)hash_table_find(this->variable_ht, var);
532 }
533
534 void
535 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
536 {
537 ir_expression *expr = ir->as_expression();
538
539 if (expr) {
540 src_reg op[2];
541 vec4_instruction *inst;
542
543 assert(expr->get_num_operands() <= 2);
544 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
545 assert(expr->operands[i]->type->is_scalar());
546
547 expr->operands[i]->accept(this);
548 op[i] = this->result;
549 }
550
551 switch (expr->operation) {
552 case ir_unop_logic_not:
553 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1));
554 inst->conditional_mod = BRW_CONDITIONAL_Z;
555 break;
556
557 case ir_binop_logic_xor:
558 inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]);
559 inst->conditional_mod = BRW_CONDITIONAL_NZ;
560 break;
561
562 case ir_binop_logic_or:
563 inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]);
564 inst->conditional_mod = BRW_CONDITIONAL_NZ;
565 break;
566
567 case ir_binop_logic_and:
568 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]);
569 inst->conditional_mod = BRW_CONDITIONAL_NZ;
570 break;
571
572 case ir_unop_f2b:
573 if (intel->gen >= 6) {
574 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f));
575 } else {
576 inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]);
577 }
578 inst->conditional_mod = BRW_CONDITIONAL_NZ;
579 break;
580
581 case ir_unop_i2b:
582 if (intel->gen >= 6) {
583 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
584 } else {
585 inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]);
586 }
587 inst->conditional_mod = BRW_CONDITIONAL_NZ;
588 break;
589
590 case ir_binop_greater:
591 case ir_binop_gequal:
592 case ir_binop_less:
593 case ir_binop_lequal:
594 case ir_binop_equal:
595 case ir_binop_all_equal:
596 case ir_binop_nequal:
597 case ir_binop_any_nequal:
598 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
599 inst->conditional_mod =
600 brw_conditional_for_comparison(expr->operation);
601 break;
602
603 default:
604 assert(!"not reached");
605 break;
606 }
607 return;
608 }
609
610 ir->accept(this);
611
612 if (intel->gen >= 6) {
613 vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(),
614 this->result, src_reg(1));
615 inst->conditional_mod = BRW_CONDITIONAL_NZ;
616 } else {
617 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result);
618 inst->conditional_mod = BRW_CONDITIONAL_NZ;
619 }
620 }
621
622 /**
623 * Emit a gen6 IF statement with the comparison folded into the IF
624 * instruction.
625 */
626 void
627 vec4_visitor::emit_if_gen6(ir_if *ir)
628 {
629 ir_expression *expr = ir->condition->as_expression();
630
631 if (expr) {
632 src_reg op[2];
633 vec4_instruction *inst;
634 dst_reg temp;
635
636 assert(expr->get_num_operands() <= 2);
637 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
638 expr->operands[i]->accept(this);
639 op[i] = this->result;
640 }
641
642 switch (expr->operation) {
643 case ir_unop_logic_not:
644 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
645 inst->conditional_mod = BRW_CONDITIONAL_Z;
646 return;
647
648 case ir_binop_logic_xor:
649 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
650 inst->conditional_mod = BRW_CONDITIONAL_NZ;
651 return;
652
653 case ir_binop_logic_or:
654 temp = dst_reg(this, glsl_type::bool_type);
655 emit(BRW_OPCODE_OR, temp, op[0], op[1]);
656 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
657 inst->conditional_mod = BRW_CONDITIONAL_NZ;
658 return;
659
660 case ir_binop_logic_and:
661 temp = dst_reg(this, glsl_type::bool_type);
662 emit(BRW_OPCODE_AND, temp, op[0], op[1]);
663 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
664 inst->conditional_mod = BRW_CONDITIONAL_NZ;
665 return;
666
667 case ir_unop_f2b:
668 inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0));
669 inst->conditional_mod = BRW_CONDITIONAL_NZ;
670 return;
671
672 case ir_unop_i2b:
673 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
674 inst->conditional_mod = BRW_CONDITIONAL_NZ;
675 return;
676
677 case ir_binop_greater:
678 case ir_binop_gequal:
679 case ir_binop_less:
680 case ir_binop_lequal:
681 case ir_binop_equal:
682 case ir_binop_nequal:
683 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
684 inst->conditional_mod =
685 brw_conditional_for_comparison(expr->operation);
686 return;
687
688 case ir_binop_all_equal:
689 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
690 inst->conditional_mod = BRW_CONDITIONAL_Z;
691
692 inst = emit(BRW_OPCODE_IF);
693 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
694 return;
695
696 case ir_binop_any_nequal:
697 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
698 inst->conditional_mod = BRW_CONDITIONAL_NZ;
699
700 inst = emit(BRW_OPCODE_IF);
701 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
702 return;
703
704 case ir_unop_any:
705 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
706 inst->conditional_mod = BRW_CONDITIONAL_NZ;
707
708 inst = emit(BRW_OPCODE_IF);
709 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
710 return;
711
712 default:
713 assert(!"not reached");
714 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
715 inst->conditional_mod = BRW_CONDITIONAL_NZ;
716 return;
717 }
718 return;
719 }
720
721 ir->condition->accept(this);
722
723 vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(),
724 this->result, src_reg(0));
725 inst->conditional_mod = BRW_CONDITIONAL_NZ;
726 }
727
728 void
729 vec4_visitor::visit(ir_variable *ir)
730 {
731 dst_reg *reg = NULL;
732
733 if (variable_storage(ir))
734 return;
735
736 switch (ir->mode) {
737 case ir_var_in:
738 reg = new(mem_ctx) dst_reg(ATTR, ir->location);
739
740 /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED attributes
741 * come in as floating point conversions of the integer values.
742 */
743 for (int i = ir->location; i < ir->location + type_size(ir->type); i++) {
744 if (!c->key.gl_fixed_input_size[i])
745 continue;
746
747 dst_reg dst = *reg;
748 dst.writemask = (1 << c->key.gl_fixed_input_size[i]) - 1;
749 emit(BRW_OPCODE_MUL, dst, src_reg(dst), src_reg(1.0f / 65536.0f));
750 }
751 break;
752
753 case ir_var_out:
754 reg = new(mem_ctx) dst_reg(this, ir->type);
755
756 for (int i = 0; i < type_size(ir->type); i++) {
757 output_reg[ir->location + i] = *reg;
758 output_reg[ir->location + i].reg_offset = i;
759 output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F;
760 }
761 break;
762
763 case ir_var_auto:
764 case ir_var_temporary:
765 reg = new(mem_ctx) dst_reg(this, ir->type);
766 break;
767
768 case ir_var_uniform:
769 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
770
771 /* Track how big the whole uniform variable is, in case we need to put a
772 * copy of its data into pull constants for array access.
773 */
774 this->uniform_size[this->uniforms] = type_size(ir->type);
775
776 if (!strncmp(ir->name, "gl_", 3)) {
777 setup_builtin_uniform_values(ir);
778 } else {
779 setup_uniform_values(ir->location, ir->type);
780 }
781 break;
782
783 default:
784 assert(!"not reached");
785 }
786
787 reg->type = brw_type_for_base_type(ir->type);
788 hash_table_insert(this->variable_ht, reg, ir);
789 }
790
791 void
792 vec4_visitor::visit(ir_loop *ir)
793 {
794 dst_reg counter;
795
796 /* We don't want debugging output to print the whole body of the
797 * loop as the annotation.
798 */
799 this->base_ir = NULL;
800
801 if (ir->counter != NULL) {
802 this->base_ir = ir->counter;
803 ir->counter->accept(this);
804 counter = *(variable_storage(ir->counter));
805
806 if (ir->from != NULL) {
807 this->base_ir = ir->from;
808 ir->from->accept(this);
809
810 emit(BRW_OPCODE_MOV, counter, this->result);
811 }
812 }
813
814 emit(BRW_OPCODE_DO);
815
816 if (ir->to) {
817 this->base_ir = ir->to;
818 ir->to->accept(this);
819
820 vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst_null_d(),
821 src_reg(counter), this->result);
822 inst->conditional_mod = brw_conditional_for_comparison(ir->cmp);
823
824 inst = emit(BRW_OPCODE_BREAK);
825 inst->predicate = BRW_PREDICATE_NORMAL;
826 }
827
828 visit_instructions(&ir->body_instructions);
829
830
831 if (ir->increment) {
832 this->base_ir = ir->increment;
833 ir->increment->accept(this);
834 emit(BRW_OPCODE_ADD, counter, src_reg(counter), this->result);
835 }
836
837 emit(BRW_OPCODE_WHILE);
838 }
839
840 void
841 vec4_visitor::visit(ir_loop_jump *ir)
842 {
843 switch (ir->mode) {
844 case ir_loop_jump::jump_break:
845 emit(BRW_OPCODE_BREAK);
846 break;
847 case ir_loop_jump::jump_continue:
848 emit(BRW_OPCODE_CONTINUE);
849 break;
850 }
851 }
852
853
854 void
855 vec4_visitor::visit(ir_function_signature *ir)
856 {
857 assert(0);
858 (void)ir;
859 }
860
861 void
862 vec4_visitor::visit(ir_function *ir)
863 {
864 /* Ignore function bodies other than main() -- we shouldn't see calls to
865 * them since they should all be inlined.
866 */
867 if (strcmp(ir->name, "main") == 0) {
868 const ir_function_signature *sig;
869 exec_list empty;
870
871 sig = ir->matching_signature(&empty);
872
873 assert(sig);
874
875 visit_instructions(&sig->body);
876 }
877 }
878
879 GLboolean
880 vec4_visitor::try_emit_sat(ir_expression *ir)
881 {
882 ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
883 if (!sat_src)
884 return false;
885
886 sat_src->accept(this);
887 src_reg src = this->result;
888
889 this->result = src_reg(this, ir->type);
890 vec4_instruction *inst;
891 inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src);
892 inst->saturate = true;
893
894 return true;
895 }
896
897 void
898 vec4_visitor::emit_bool_comparison(unsigned int op,
899 dst_reg dst, src_reg src0, src_reg src1)
900 {
901 /* original gen4 does destination conversion before comparison. */
902 if (intel->gen < 5)
903 dst.type = src0.type;
904
905 vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1);
906 inst->conditional_mod = brw_conditional_for_comparison(op);
907
908 dst.type = BRW_REGISTER_TYPE_D;
909 emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1));
910 }
911
912 void
913 vec4_visitor::visit(ir_expression *ir)
914 {
915 unsigned int operand;
916 src_reg op[Elements(ir->operands)];
917 src_reg result_src;
918 dst_reg result_dst;
919 vec4_instruction *inst;
920
921 if (try_emit_sat(ir))
922 return;
923
924 for (operand = 0; operand < ir->get_num_operands(); operand++) {
925 this->result.file = BAD_FILE;
926 ir->operands[operand]->accept(this);
927 if (this->result.file == BAD_FILE) {
928 printf("Failed to get tree for expression operand:\n");
929 ir->operands[operand]->print();
930 exit(1);
931 }
932 op[operand] = this->result;
933
934 /* Matrix expression operands should have been broken down to vector
935 * operations already.
936 */
937 assert(!ir->operands[operand]->type->is_matrix());
938 }
939
940 int vector_elements = ir->operands[0]->type->vector_elements;
941 if (ir->operands[1]) {
942 vector_elements = MAX2(vector_elements,
943 ir->operands[1]->type->vector_elements);
944 }
945
946 this->result.file = BAD_FILE;
947
948 /* Storage for our result. Ideally for an assignment we'd be using
949 * the actual storage for the result here, instead.
950 */
951 result_src = src_reg(this, ir->type);
952 /* convenience for the emit functions below. */
953 result_dst = dst_reg(result_src);
954 /* If nothing special happens, this is the result. */
955 this->result = result_src;
956 /* Limit writes to the channels that will be used by result_src later.
957 * This does limit this temp's use as a temporary for multi-instruction
958 * sequences.
959 */
960 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
961
962 switch (ir->operation) {
963 case ir_unop_logic_not:
964 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
965 * ones complement of the whole register, not just bit 0.
966 */
967 emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1));
968 break;
969 case ir_unop_neg:
970 op[0].negate = !op[0].negate;
971 this->result = op[0];
972 break;
973 case ir_unop_abs:
974 op[0].abs = true;
975 op[0].negate = false;
976 this->result = op[0];
977 break;
978
979 case ir_unop_sign:
980 emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f));
981
982 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
983 inst->conditional_mod = BRW_CONDITIONAL_G;
984 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f));
985 inst->predicate = BRW_PREDICATE_NORMAL;
986
987 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
988 inst->conditional_mod = BRW_CONDITIONAL_L;
989 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f));
990 inst->predicate = BRW_PREDICATE_NORMAL;
991
992 break;
993
994 case ir_unop_rcp:
995 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
996 break;
997
998 case ir_unop_exp2:
999 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
1000 break;
1001 case ir_unop_log2:
1002 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
1003 break;
1004 case ir_unop_exp:
1005 case ir_unop_log:
1006 assert(!"not reached: should be handled by ir_explog_to_explog2");
1007 break;
1008 case ir_unop_sin:
1009 case ir_unop_sin_reduced:
1010 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
1011 break;
1012 case ir_unop_cos:
1013 case ir_unop_cos_reduced:
1014 emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
1015 break;
1016
1017 case ir_unop_dFdx:
1018 case ir_unop_dFdy:
1019 assert(!"derivatives not valid in vertex shader");
1020 break;
1021
1022 case ir_unop_noise:
1023 assert(!"not reached: should be handled by lower_noise");
1024 break;
1025
1026 case ir_binop_add:
1027 emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]);
1028 break;
1029 case ir_binop_sub:
1030 assert(!"not reached: should be handled by ir_sub_to_add_neg");
1031 break;
1032
1033 case ir_binop_mul:
1034 if (ir->type->is_integer()) {
1035 /* For integer multiplication, the MUL uses the low 16 bits
1036 * of one of the operands (src0 on gen6, src1 on gen7). The
1037 * MACH accumulates in the contribution of the upper 16 bits
1038 * of that operand.
1039 *
1040 * FINISHME: Emit just the MUL if we know an operand is small
1041 * enough.
1042 */
1043 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
1044
1045 emit(BRW_OPCODE_MUL, acc, op[0], op[1]);
1046 emit(BRW_OPCODE_MACH, dst_null_d(), op[0], op[1]);
1047 emit(BRW_OPCODE_MOV, result_dst, src_reg(acc));
1048 } else {
1049 emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
1050 }
1051 break;
1052 case ir_binop_div:
1053 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1054 case ir_binop_mod:
1055 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1056 break;
1057
1058 case ir_binop_less:
1059 case ir_binop_greater:
1060 case ir_binop_lequal:
1061 case ir_binop_gequal:
1062 case ir_binop_equal:
1063 case ir_binop_nequal: {
1064 dst_reg temp = result_dst;
1065 /* original gen4 does implicit conversion before comparison. */
1066 if (intel->gen < 5)
1067 temp.type = op[0].type;
1068
1069 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
1070 inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
1071 emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1));
1072 break;
1073 }
1074
1075 case ir_binop_all_equal:
1076 /* "==" operator producing a scalar boolean. */
1077 if (ir->operands[0]->type->is_vector() ||
1078 ir->operands[1]->type->is_vector()) {
1079 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
1080 inst->conditional_mod = BRW_CONDITIONAL_Z;
1081
1082 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
1083 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
1084 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
1085 } else {
1086 dst_reg temp = result_dst;
1087 /* original gen4 does implicit conversion before comparison. */
1088 if (intel->gen < 5)
1089 temp.type = op[0].type;
1090
1091 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
1092 inst->conditional_mod = BRW_CONDITIONAL_Z;
1093 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
1094 }
1095 break;
1096 case ir_binop_any_nequal:
1097 /* "!=" operator producing a scalar boolean. */
1098 if (ir->operands[0]->type->is_vector() ||
1099 ir->operands[1]->type->is_vector()) {
1100 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
1101 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1102
1103 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
1104 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
1105 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1106 } else {
1107 dst_reg temp = result_dst;
1108 /* original gen4 does implicit conversion before comparison. */
1109 if (intel->gen < 5)
1110 temp.type = op[0].type;
1111
1112 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
1113 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1114 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
1115 }
1116 break;
1117
1118 case ir_unop_any:
1119 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
1120 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1121
1122 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
1123
1124 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
1125 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1126 break;
1127
1128 case ir_binop_logic_xor:
1129 emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
1130 break;
1131
1132 case ir_binop_logic_or:
1133 emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
1134 break;
1135
1136 case ir_binop_logic_and:
1137 emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
1138 break;
1139
1140 case ir_binop_dot:
1141 assert(ir->operands[0]->type->is_vector());
1142 assert(ir->operands[0]->type == ir->operands[1]->type);
1143 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1144 break;
1145
1146 case ir_unop_sqrt:
1147 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1148 break;
1149 case ir_unop_rsq:
1150 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1151 break;
1152 case ir_unop_i2f:
1153 case ir_unop_i2u:
1154 case ir_unop_u2i:
1155 case ir_unop_u2f:
1156 case ir_unop_b2f:
1157 case ir_unop_b2i:
1158 case ir_unop_f2i:
1159 emit(BRW_OPCODE_MOV, result_dst, op[0]);
1160 break;
1161 case ir_unop_f2b:
1162 case ir_unop_i2b: {
1163 dst_reg temp = result_dst;
1164 /* original gen4 does implicit conversion before comparison. */
1165 if (intel->gen < 5)
1166 temp.type = op[0].type;
1167
1168 inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f));
1169 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1170 inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1));
1171 break;
1172 }
1173
1174 case ir_unop_trunc:
1175 emit(BRW_OPCODE_RNDZ, result_dst, op[0]);
1176 break;
1177 case ir_unop_ceil:
1178 op[0].negate = !op[0].negate;
1179 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
1180 this->result.negate = true;
1181 break;
1182 case ir_unop_floor:
1183 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
1184 break;
1185 case ir_unop_fract:
1186 inst = emit(BRW_OPCODE_FRC, result_dst, op[0]);
1187 break;
1188 case ir_unop_round_even:
1189 emit(BRW_OPCODE_RNDE, result_dst, op[0]);
1190 break;
1191
1192 case ir_binop_min:
1193 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
1194 inst->conditional_mod = BRW_CONDITIONAL_L;
1195
1196 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1197 inst->predicate = BRW_PREDICATE_NORMAL;
1198 break;
1199 case ir_binop_max:
1200 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
1201 inst->conditional_mod = BRW_CONDITIONAL_G;
1202
1203 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1204 inst->predicate = BRW_PREDICATE_NORMAL;
1205 break;
1206
1207 case ir_binop_pow:
1208 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1209 break;
1210
1211 case ir_unop_bit_not:
1212 inst = emit(BRW_OPCODE_NOT, result_dst, op[0]);
1213 break;
1214 case ir_binop_bit_and:
1215 inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
1216 break;
1217 case ir_binop_bit_xor:
1218 inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
1219 break;
1220 case ir_binop_bit_or:
1221 inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
1222 break;
1223
1224 case ir_binop_lshift:
1225 case ir_binop_rshift:
1226 assert(!"GLSL 1.30 features unsupported");
1227 break;
1228
1229 case ir_quadop_vector:
1230 assert(!"not reached: should be handled by lower_quadop_vector");
1231 break;
1232 }
1233 }
1234
1235
1236 void
1237 vec4_visitor::visit(ir_swizzle *ir)
1238 {
1239 src_reg src;
1240 int i = 0;
1241 int swizzle[4];
1242
1243 /* Note that this is only swizzles in expressions, not those on the left
1244 * hand side of an assignment, which do write masking. See ir_assignment
1245 * for that.
1246 */
1247
1248 ir->val->accept(this);
1249 src = this->result;
1250 assert(src.file != BAD_FILE);
1251
1252 for (i = 0; i < ir->type->vector_elements; i++) {
1253 switch (i) {
1254 case 0:
1255 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1256 break;
1257 case 1:
1258 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1259 break;
1260 case 2:
1261 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1262 break;
1263 case 3:
1264 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1265 break;
1266 }
1267 }
1268 for (; i < 4; i++) {
1269 /* Replicate the last channel out. */
1270 swizzle[i] = swizzle[ir->type->vector_elements - 1];
1271 }
1272
1273 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1274
1275 this->result = src;
1276 }
1277
1278 void
1279 vec4_visitor::visit(ir_dereference_variable *ir)
1280 {
1281 const struct glsl_type *type = ir->type;
1282 dst_reg *reg = variable_storage(ir->var);
1283
1284 if (!reg) {
1285 fail("Failed to find variable storage for %s\n", ir->var->name);
1286 this->result = src_reg(brw_null_reg());
1287 return;
1288 }
1289
1290 this->result = src_reg(*reg);
1291
1292 if (type->is_scalar() || type->is_vector() || type->is_matrix())
1293 this->result.swizzle = swizzle_for_size(type->vector_elements);
1294 }
1295
1296 void
1297 vec4_visitor::visit(ir_dereference_array *ir)
1298 {
1299 ir_constant *constant_index;
1300 src_reg src;
1301 int element_size = type_size(ir->type);
1302
1303 constant_index = ir->array_index->constant_expression_value();
1304
1305 ir->array->accept(this);
1306 src = this->result;
1307
1308 if (constant_index) {
1309 src.reg_offset += constant_index->value.i[0] * element_size;
1310 } else {
1311 /* Variable index array dereference. It eats the "vec4" of the
1312 * base of the array and an index that offsets the Mesa register
1313 * index.
1314 */
1315 ir->array_index->accept(this);
1316
1317 src_reg index_reg;
1318
1319 if (element_size == 1) {
1320 index_reg = this->result;
1321 } else {
1322 index_reg = src_reg(this, glsl_type::int_type);
1323
1324 emit(BRW_OPCODE_MUL, dst_reg(index_reg),
1325 this->result, src_reg(element_size));
1326 }
1327
1328 if (src.reladdr) {
1329 src_reg temp = src_reg(this, glsl_type::int_type);
1330
1331 emit(BRW_OPCODE_ADD, dst_reg(temp), *src.reladdr, index_reg);
1332
1333 index_reg = temp;
1334 }
1335
1336 src.reladdr = ralloc(mem_ctx, src_reg);
1337 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1338 }
1339
1340 /* If the type is smaller than a vec4, replicate the last channel out. */
1341 if (ir->type->is_scalar() || ir->type->is_vector())
1342 src.swizzle = swizzle_for_size(ir->type->vector_elements);
1343 else
1344 src.swizzle = BRW_SWIZZLE_NOOP;
1345 src.type = brw_type_for_base_type(ir->type);
1346
1347 this->result = src;
1348 }
1349
1350 void
1351 vec4_visitor::visit(ir_dereference_record *ir)
1352 {
1353 unsigned int i;
1354 const glsl_type *struct_type = ir->record->type;
1355 int offset = 0;
1356
1357 ir->record->accept(this);
1358
1359 for (i = 0; i < struct_type->length; i++) {
1360 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1361 break;
1362 offset += type_size(struct_type->fields.structure[i].type);
1363 }
1364
1365 /* If the type is smaller than a vec4, replicate the last channel out. */
1366 if (ir->type->is_scalar() || ir->type->is_vector())
1367 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1368 else
1369 this->result.swizzle = BRW_SWIZZLE_NOOP;
1370 this->result.type = brw_type_for_base_type(ir->type);
1371
1372 this->result.reg_offset += offset;
1373 }
1374
1375 /**
1376 * We want to be careful in assignment setup to hit the actual storage
1377 * instead of potentially using a temporary like we might with the
1378 * ir_dereference handler.
1379 */
1380 static dst_reg
1381 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1382 {
1383 /* The LHS must be a dereference. If the LHS is a variable indexed array
1384 * access of a vector, it must be separated into a series conditional moves
1385 * before reaching this point (see ir_vec_index_to_cond_assign).
1386 */
1387 assert(ir->as_dereference());
1388 ir_dereference_array *deref_array = ir->as_dereference_array();
1389 if (deref_array) {
1390 assert(!deref_array->array->type->is_vector());
1391 }
1392
1393 /* Use the rvalue deref handler for the most part. We'll ignore
1394 * swizzles in it and write swizzles using writemask, though.
1395 */
1396 ir->accept(v);
1397 return dst_reg(v->result);
1398 }
1399
1400 void
1401 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1402 const struct glsl_type *type, bool predicated)
1403 {
1404 if (type->base_type == GLSL_TYPE_STRUCT) {
1405 for (unsigned int i = 0; i < type->length; i++) {
1406 emit_block_move(dst, src, type->fields.structure[i].type, predicated);
1407 }
1408 return;
1409 }
1410
1411 if (type->is_array()) {
1412 for (unsigned int i = 0; i < type->length; i++) {
1413 emit_block_move(dst, src, type->fields.array, predicated);
1414 }
1415 return;
1416 }
1417
1418 if (type->is_matrix()) {
1419 const struct glsl_type *vec_type;
1420
1421 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1422 type->vector_elements, 1);
1423
1424 for (int i = 0; i < type->matrix_columns; i++) {
1425 emit_block_move(dst, src, vec_type, predicated);
1426 }
1427 return;
1428 }
1429
1430 assert(type->is_scalar() || type->is_vector());
1431
1432 dst->type = brw_type_for_base_type(type);
1433 src->type = dst->type;
1434
1435 dst->writemask = (1 << type->vector_elements) - 1;
1436
1437 /* Do we need to worry about swizzling a swizzle? */
1438 assert(src->swizzle = BRW_SWIZZLE_NOOP);
1439 src->swizzle = swizzle_for_size(type->vector_elements);
1440
1441 vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src);
1442 if (predicated)
1443 inst->predicate = BRW_PREDICATE_NORMAL;
1444
1445 dst->reg_offset++;
1446 src->reg_offset++;
1447 }
1448
1449
1450 /* If the RHS processing resulted in an instruction generating a
1451 * temporary value, and it would be easy to rewrite the instruction to
1452 * generate its result right into the LHS instead, do so. This ends
1453 * up reliably removing instructions where it can be tricky to do so
1454 * later without real UD chain information.
1455 */
1456 bool
1457 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
1458 dst_reg dst,
1459 src_reg src,
1460 vec4_instruction *pre_rhs_inst,
1461 vec4_instruction *last_rhs_inst)
1462 {
1463 /* This could be supported, but it would take more smarts. */
1464 if (ir->condition)
1465 return false;
1466
1467 if (pre_rhs_inst == last_rhs_inst)
1468 return false; /* No instructions generated to work with. */
1469
1470 /* Make sure the last instruction generated our source reg. */
1471 if (src.file != GRF ||
1472 src.file != last_rhs_inst->dst.file ||
1473 src.reg != last_rhs_inst->dst.reg ||
1474 src.reg_offset != last_rhs_inst->dst.reg_offset ||
1475 src.reladdr ||
1476 src.abs ||
1477 src.negate ||
1478 last_rhs_inst->predicate != BRW_PREDICATE_NONE)
1479 return false;
1480
1481 /* Check that that last instruction fully initialized the channels
1482 * we want to use, in the order we want to use them. We could
1483 * potentially reswizzle the operands of many instructions so that
1484 * we could handle out of order channels, but don't yet.
1485 */
1486 for (int i = 0; i < 4; i++) {
1487 if (dst.writemask & (1 << i)) {
1488 if (!(last_rhs_inst->dst.writemask & (1 << i)))
1489 return false;
1490
1491 if (BRW_GET_SWZ(src.swizzle, i) != i)
1492 return false;
1493 }
1494 }
1495
1496 /* Success! Rewrite the instruction. */
1497 last_rhs_inst->dst.file = dst.file;
1498 last_rhs_inst->dst.reg = dst.reg;
1499 last_rhs_inst->dst.reg_offset = dst.reg_offset;
1500 last_rhs_inst->dst.reladdr = dst.reladdr;
1501 last_rhs_inst->dst.writemask &= dst.writemask;
1502
1503 return true;
1504 }
1505
1506 void
1507 vec4_visitor::visit(ir_assignment *ir)
1508 {
1509 dst_reg dst = get_assignment_lhs(ir->lhs, this);
1510
1511 if (!ir->lhs->type->is_scalar() &&
1512 !ir->lhs->type->is_vector()) {
1513 ir->rhs->accept(this);
1514 src_reg src = this->result;
1515
1516 if (ir->condition) {
1517 emit_bool_to_cond_code(ir->condition);
1518 }
1519
1520 emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL);
1521 return;
1522 }
1523
1524 /* Now we're down to just a scalar/vector with writemasks. */
1525 int i;
1526
1527 vec4_instruction *pre_rhs_inst, *last_rhs_inst;
1528 pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1529
1530 ir->rhs->accept(this);
1531
1532 last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1533
1534 src_reg src = this->result;
1535
1536 int swizzles[4];
1537 int first_enabled_chan = 0;
1538 int src_chan = 0;
1539
1540 assert(ir->lhs->type->is_vector() ||
1541 ir->lhs->type->is_scalar());
1542 dst.writemask = ir->write_mask;
1543
1544 for (int i = 0; i < 4; i++) {
1545 if (dst.writemask & (1 << i)) {
1546 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1547 break;
1548 }
1549 }
1550
1551 /* Swizzle a small RHS vector into the channels being written.
1552 *
1553 * glsl ir treats write_mask as dictating how many channels are
1554 * present on the RHS while in our instructions we need to make
1555 * those channels appear in the slots of the vec4 they're written to.
1556 */
1557 for (int i = 0; i < 4; i++) {
1558 if (dst.writemask & (1 << i))
1559 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1560 else
1561 swizzles[i] = first_enabled_chan;
1562 }
1563 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1564 swizzles[2], swizzles[3]);
1565
1566 if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
1567 return;
1568 }
1569
1570 if (ir->condition) {
1571 emit_bool_to_cond_code(ir->condition);
1572 }
1573
1574 for (i = 0; i < type_size(ir->lhs->type); i++) {
1575 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
1576
1577 if (ir->condition)
1578 inst->predicate = BRW_PREDICATE_NORMAL;
1579
1580 dst.reg_offset++;
1581 src.reg_offset++;
1582 }
1583 }
1584
1585 void
1586 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
1587 {
1588 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1589 foreach_list(node, &ir->components) {
1590 ir_constant *field_value = (ir_constant *)node;
1591
1592 emit_constant_values(dst, field_value);
1593 }
1594 return;
1595 }
1596
1597 if (ir->type->is_array()) {
1598 for (unsigned int i = 0; i < ir->type->length; i++) {
1599 emit_constant_values(dst, ir->array_elements[i]);
1600 }
1601 return;
1602 }
1603
1604 if (ir->type->is_matrix()) {
1605 for (int i = 0; i < ir->type->matrix_columns; i++) {
1606 for (int j = 0; j < ir->type->vector_elements; j++) {
1607 dst->writemask = 1 << j;
1608 dst->type = BRW_REGISTER_TYPE_F;
1609
1610 emit(BRW_OPCODE_MOV, *dst,
1611 src_reg(ir->value.f[i * ir->type->vector_elements + j]));
1612 }
1613 dst->reg_offset++;
1614 }
1615 return;
1616 }
1617
1618 for (int i = 0; i < ir->type->vector_elements; i++) {
1619 dst->writemask = 1 << i;
1620 dst->type = brw_type_for_base_type(ir->type);
1621
1622 switch (ir->type->base_type) {
1623 case GLSL_TYPE_FLOAT:
1624 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i]));
1625 break;
1626 case GLSL_TYPE_INT:
1627 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.i[i]));
1628 break;
1629 case GLSL_TYPE_UINT:
1630 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.u[i]));
1631 break;
1632 case GLSL_TYPE_BOOL:
1633 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.b[i]));
1634 break;
1635 default:
1636 assert(!"Non-float/uint/int/bool constant");
1637 break;
1638 }
1639 }
1640 dst->reg_offset++;
1641 }
1642
1643 void
1644 vec4_visitor::visit(ir_constant *ir)
1645 {
1646 dst_reg dst = dst_reg(this, ir->type);
1647 this->result = src_reg(dst);
1648
1649 emit_constant_values(&dst, ir);
1650 }
1651
1652 void
1653 vec4_visitor::visit(ir_call *ir)
1654 {
1655 assert(!"not reached");
1656 }
1657
1658 void
1659 vec4_visitor::visit(ir_texture *ir)
1660 {
1661 /* FINISHME: Implement vertex texturing.
1662 *
1663 * With 0 vertex samplers available, the linker will reject
1664 * programs that do vertex texturing, but after our visitor has
1665 * run.
1666 */
1667 }
1668
1669 void
1670 vec4_visitor::visit(ir_return *ir)
1671 {
1672 assert(!"not reached");
1673 }
1674
1675 void
1676 vec4_visitor::visit(ir_discard *ir)
1677 {
1678 assert(!"not reached");
1679 }
1680
1681 void
1682 vec4_visitor::visit(ir_if *ir)
1683 {
1684 /* Don't point the annotation at the if statement, because then it plus
1685 * the then and else blocks get printed.
1686 */
1687 this->base_ir = ir->condition;
1688
1689 if (intel->gen == 6) {
1690 emit_if_gen6(ir);
1691 } else {
1692 emit_bool_to_cond_code(ir->condition);
1693 vec4_instruction *inst = emit(BRW_OPCODE_IF);
1694 inst->predicate = BRW_PREDICATE_NORMAL;
1695 }
1696
1697 visit_instructions(&ir->then_instructions);
1698
1699 if (!ir->else_instructions.is_empty()) {
1700 this->base_ir = ir->condition;
1701 emit(BRW_OPCODE_ELSE);
1702
1703 visit_instructions(&ir->else_instructions);
1704 }
1705
1706 this->base_ir = ir->condition;
1707 emit(BRW_OPCODE_ENDIF);
1708 }
1709
1710 int
1711 vec4_visitor::emit_vue_header_gen4(int header_mrf)
1712 {
1713 /* Get the position */
1714 src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
1715
1716 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
1717 dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
1718
1719 current_annotation = "NDC";
1720 dst_reg ndc_w = ndc;
1721 ndc_w.writemask = WRITEMASK_W;
1722 src_reg pos_w = pos;
1723 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
1724 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
1725
1726 dst_reg ndc_xyz = ndc;
1727 ndc_xyz.writemask = WRITEMASK_XYZ;
1728
1729 emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w));
1730
1731 if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
1732 c->key.nr_userclip || brw->has_negative_rhw_bug) {
1733 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
1734 GLuint i;
1735
1736 emit(BRW_OPCODE_MOV, header1, 0u);
1737
1738 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1739 assert(!"finishme: psiz");
1740 src_reg psiz;
1741
1742 header1.writemask = WRITEMASK_W;
1743 emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11);
1744 emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8);
1745 }
1746
1747 for (i = 0; i < c->key.nr_userclip; i++) {
1748 vec4_instruction *inst;
1749
1750 inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()),
1751 pos, src_reg(c->userplane[i]));
1752 inst->conditional_mod = BRW_CONDITIONAL_L;
1753
1754 emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i);
1755 inst->predicate = BRW_PREDICATE_NORMAL;
1756 }
1757
1758 /* i965 clipping workaround:
1759 * 1) Test for -ve rhw
1760 * 2) If set,
1761 * set ndc = (0,0,0,0)
1762 * set ucp[6] = 1
1763 *
1764 * Later, clipping will detect ucp[6] and ensure the primitive is
1765 * clipped against all fixed planes.
1766 */
1767 if (brw->has_negative_rhw_bug) {
1768 #if 0
1769 /* FINISHME */
1770 brw_CMP(p,
1771 vec8(brw_null_reg()),
1772 BRW_CONDITIONAL_L,
1773 brw_swizzle1(ndc, 3),
1774 brw_imm_f(0));
1775
1776 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
1777 brw_MOV(p, ndc, brw_imm_f(0));
1778 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1779 #endif
1780 }
1781
1782 header1.writemask = WRITEMASK_XYZW;
1783 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1));
1784 } else {
1785 emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++),
1786 BRW_REGISTER_TYPE_UD), 0u);
1787 }
1788
1789 if (intel->gen == 5) {
1790 /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
1791 * dword 0-3 (m1) of the header is indices, point width, clip flags.
1792 * dword 4-7 (m2) is the ndc position (set above)
1793 * dword 8-11 (m3) of the vertex header is the 4D space position
1794 * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
1795 * m6 is a pad so that the vertex element data is aligned
1796 * m7 is the first vertex data we fill.
1797 */
1798 current_annotation = "NDC";
1799 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1800
1801 current_annotation = "gl_Position";
1802 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1803
1804 /* user clip distance. */
1805 header_mrf += 2;
1806
1807 /* Pad so that vertex element data is aligned. */
1808 header_mrf++;
1809 } else {
1810 /* There are 8 dwords in VUE header pre-Ironlake:
1811 * dword 0-3 (m1) is indices, point width, clip flags.
1812 * dword 4-7 (m2) is ndc position (set above)
1813 *
1814 * dword 8-11 (m3) is the first vertex data.
1815 */
1816 current_annotation = "NDC";
1817 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1818
1819 current_annotation = "gl_Position";
1820 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1821 }
1822
1823 return header_mrf;
1824 }
1825
1826 int
1827 vec4_visitor::emit_vue_header_gen6(int header_mrf)
1828 {
1829 struct brw_reg reg;
1830
1831 /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
1832 * dword 0-3 (m2) of the header is indices, point width, clip flags.
1833 * dword 4-7 (m3) is the 4D space position
1834 * dword 8-15 (m4,m5) of the vertex header is the user clip distance if
1835 * enabled.
1836 *
1837 * m4 or 6 is the first vertex element data we fill.
1838 */
1839
1840 current_annotation = "indices, point width, clip flags";
1841 reg = brw_message_reg(header_mrf++);
1842 emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0));
1843 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1844 emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W),
1845 src_reg(output_reg[VERT_RESULT_PSIZ]));
1846 }
1847
1848 current_annotation = "gl_Position";
1849 emit(BRW_OPCODE_MOV,
1850 brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS]));
1851
1852 current_annotation = "user clip distances";
1853 if (c->key.nr_userclip) {
1854 for (int i = 0; i < c->key.nr_userclip; i++) {
1855 struct brw_reg m;
1856 if (i < 4)
1857 m = brw_message_reg(header_mrf);
1858 else
1859 m = brw_message_reg(header_mrf + 1);
1860
1861 emit(DP4(dst_reg(brw_writemask(m, 1 << (i & 3))),
1862 src_reg(output_reg[VERT_RESULT_HPOS]),
1863 src_reg(c->userplane[i])));
1864 }
1865 header_mrf += 2;
1866 }
1867
1868 current_annotation = NULL;
1869
1870 return header_mrf;
1871 }
1872
1873 static int
1874 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
1875 {
1876 struct intel_context *intel = &brw->intel;
1877
1878 if (intel->gen >= 6) {
1879 /* URB data written (does not include the message header reg) must
1880 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
1881 * section 5.4.3.2.2: URB_INTERLEAVED.
1882 *
1883 * URB entries are allocated on a multiple of 1024 bits, so an
1884 * extra 128 bits written here to make the end align to 256 is
1885 * no problem.
1886 */
1887 if ((mlen % 2) != 1)
1888 mlen++;
1889 }
1890
1891 return mlen;
1892 }
1893
1894 /**
1895 * Generates the VUE payload plus the 1 or 2 URB write instructions to
1896 * complete the VS thread.
1897 *
1898 * The VUE layout is documented in Volume 2a.
1899 */
1900 void
1901 vec4_visitor::emit_urb_writes()
1902 {
1903 /* MRF 0 is reserved for the debugger, so start with message header
1904 * in MRF 1.
1905 */
1906 int base_mrf = 1;
1907 int mrf = base_mrf;
1908 int urb_entry_size;
1909 uint64_t outputs_remaining = c->prog_data.outputs_written;
1910 /* In the process of generating our URB write message contents, we
1911 * may need to unspill a register or load from an array. Those
1912 * reads would use MRFs 14-15.
1913 */
1914 int max_usable_mrf = 13;
1915
1916 /* FINISHME: edgeflag */
1917
1918 /* First mrf is the g0-based message header containing URB handles and such,
1919 * which is implied in VS_OPCODE_URB_WRITE.
1920 */
1921 mrf++;
1922
1923 if (intel->gen >= 6) {
1924 mrf = emit_vue_header_gen6(mrf);
1925 } else {
1926 mrf = emit_vue_header_gen4(mrf);
1927 }
1928
1929 /* Set up the VUE data for the first URB write */
1930 int attr;
1931 for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
1932 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1933 continue;
1934
1935 outputs_remaining &= ~BITFIELD64_BIT(attr);
1936
1937 /* This is set up in the VUE header. */
1938 if (attr == VERT_RESULT_HPOS)
1939 continue;
1940
1941 /* This is loaded into the VUE header, and thus doesn't occupy
1942 * an attribute slot.
1943 */
1944 if (attr == VERT_RESULT_PSIZ)
1945 continue;
1946
1947 vec4_instruction *inst = emit(BRW_OPCODE_MOV, brw_message_reg(mrf++),
1948 src_reg(output_reg[attr]));
1949
1950 if ((attr == VERT_RESULT_COL0 ||
1951 attr == VERT_RESULT_COL1 ||
1952 attr == VERT_RESULT_BFC0 ||
1953 attr == VERT_RESULT_BFC1) &&
1954 c->key.clamp_vertex_color) {
1955 inst->saturate = true;
1956 }
1957
1958 /* If this was MRF 15, we can't fit anything more into this URB
1959 * WRITE. Note that base_mrf of 1 means that MRF 15 is an
1960 * even-numbered amount of URB write data, which will meet
1961 * gen6's requirements for length alignment.
1962 */
1963 if (mrf > max_usable_mrf) {
1964 attr++;
1965 break;
1966 }
1967 }
1968
1969 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
1970 inst->base_mrf = base_mrf;
1971 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1972 inst->eot = !outputs_remaining;
1973
1974 urb_entry_size = mrf - base_mrf;
1975
1976 /* Optional second URB write */
1977 if (outputs_remaining) {
1978 mrf = base_mrf + 1;
1979
1980 for (; attr < VERT_RESULT_MAX; attr++) {
1981 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1982 continue;
1983
1984 assert(mrf < max_usable_mrf);
1985
1986 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
1987 }
1988
1989 inst = emit(VS_OPCODE_URB_WRITE);
1990 inst->base_mrf = base_mrf;
1991 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1992 inst->eot = true;
1993 /* URB destination offset. In the previous write, we got MRFs
1994 * 2-13 minus the one header MRF, so 12 regs. URB offset is in
1995 * URB row increments, and each of our MRFs is half of one of
1996 * those, since we're doing interleaved writes.
1997 */
1998 inst->offset = (max_usable_mrf - base_mrf) / 2;
1999
2000 urb_entry_size += mrf - base_mrf;
2001 }
2002
2003 if (intel->gen == 6)
2004 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8;
2005 else
2006 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4;
2007 }
2008
2009 src_reg
2010 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
2011 src_reg *reladdr, int reg_offset)
2012 {
2013 /* Because we store the values to scratch interleaved like our
2014 * vertex data, we need to scale the vec4 index by 2.
2015 */
2016 int message_header_scale = 2;
2017
2018 /* Pre-gen6, the message header uses byte offsets instead of vec4
2019 * (16-byte) offset units.
2020 */
2021 if (intel->gen < 6)
2022 message_header_scale *= 16;
2023
2024 if (reladdr) {
2025 src_reg index = src_reg(this, glsl_type::int_type);
2026
2027 vec4_instruction *add = emit(BRW_OPCODE_ADD,
2028 dst_reg(index),
2029 *reladdr,
2030 src_reg(reg_offset));
2031 /* Move our new instruction from the tail to its correct place. */
2032 add->remove();
2033 inst->insert_before(add);
2034
2035 vec4_instruction *mul = emit(BRW_OPCODE_MUL, dst_reg(index),
2036 index, src_reg(message_header_scale));
2037 mul->remove();
2038 inst->insert_before(mul);
2039
2040 return index;
2041 } else {
2042 return src_reg(reg_offset * message_header_scale);
2043 }
2044 }
2045
2046 src_reg
2047 vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
2048 src_reg *reladdr, int reg_offset)
2049 {
2050 if (reladdr) {
2051 src_reg index = src_reg(this, glsl_type::int_type);
2052
2053 vec4_instruction *add = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_ADD,
2054 dst_reg(index),
2055 *reladdr,
2056 src_reg(reg_offset));
2057 add->ir = inst->ir;
2058 add->annotation = inst->annotation;
2059 inst->insert_before(add);
2060
2061 /* Pre-gen6, the message header uses byte offsets instead of vec4
2062 * (16-byte) offset units.
2063 */
2064 if (intel->gen < 6) {
2065 vec4_instruction *mul = new(mem_ctx) vec4_instruction(this,
2066 BRW_OPCODE_MUL,
2067 dst_reg(index),
2068 index,
2069 src_reg(16));
2070 mul->ir = inst->ir;
2071 mul->annotation = inst->annotation;
2072 inst->insert_before(mul);
2073 }
2074
2075 return index;
2076 } else {
2077 int message_header_scale = intel->gen < 6 ? 16 : 1;
2078 return src_reg(reg_offset * message_header_scale);
2079 }
2080 }
2081
2082 /**
2083 * Emits an instruction before @inst to load the value named by @orig_src
2084 * from scratch space at @base_offset to @temp.
2085 */
2086 void
2087 vec4_visitor::emit_scratch_read(vec4_instruction *inst,
2088 dst_reg temp, src_reg orig_src,
2089 int base_offset)
2090 {
2091 int reg_offset = base_offset + orig_src.reg_offset;
2092 src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
2093
2094 vec4_instruction *scratch_read_inst = emit(VS_OPCODE_SCRATCH_READ,
2095 temp, index);
2096
2097 scratch_read_inst->base_mrf = 14;
2098 scratch_read_inst->mlen = 1;
2099 /* Move our instruction from the tail to its correct place. */
2100 scratch_read_inst->remove();
2101 inst->insert_before(scratch_read_inst);
2102 }
2103
2104 /**
2105 * Emits an instruction after @inst to store the value to be written
2106 * to @orig_dst to scratch space at @base_offset, from @temp.
2107 */
2108 void
2109 vec4_visitor::emit_scratch_write(vec4_instruction *inst,
2110 src_reg temp, dst_reg orig_dst,
2111 int base_offset)
2112 {
2113 int reg_offset = base_offset + orig_dst.reg_offset;
2114 src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset);
2115
2116 dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
2117 orig_dst.writemask));
2118 vec4_instruction *scratch_write_inst = emit(VS_OPCODE_SCRATCH_WRITE,
2119 dst, temp, index);
2120 scratch_write_inst->base_mrf = 13;
2121 scratch_write_inst->mlen = 2;
2122 scratch_write_inst->predicate = inst->predicate;
2123 /* Move our instruction from the tail to its correct place. */
2124 scratch_write_inst->remove();
2125 inst->insert_after(scratch_write_inst);
2126 }
2127
2128 /**
2129 * We can't generally support array access in GRF space, because a
2130 * single instruction's destination can only span 2 contiguous
2131 * registers. So, we send all GRF arrays that get variable index
2132 * access to scratch space.
2133 */
2134 void
2135 vec4_visitor::move_grf_array_access_to_scratch()
2136 {
2137 int scratch_loc[this->virtual_grf_count];
2138
2139 for (int i = 0; i < this->virtual_grf_count; i++) {
2140 scratch_loc[i] = -1;
2141 }
2142
2143 /* First, calculate the set of virtual GRFs that need to be punted
2144 * to scratch due to having any array access on them, and where in
2145 * scratch.
2146 */
2147 foreach_list(node, &this->instructions) {
2148 vec4_instruction *inst = (vec4_instruction *)node;
2149
2150 if (inst->dst.file == GRF && inst->dst.reladdr &&
2151 scratch_loc[inst->dst.reg] == -1) {
2152 scratch_loc[inst->dst.reg] = c->last_scratch;
2153 c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4;
2154 }
2155
2156 for (int i = 0 ; i < 3; i++) {
2157 src_reg *src = &inst->src[i];
2158
2159 if (src->file == GRF && src->reladdr &&
2160 scratch_loc[src->reg] == -1) {
2161 scratch_loc[src->reg] = c->last_scratch;
2162 c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4;
2163 }
2164 }
2165 }
2166
2167 /* Now, for anything that will be accessed through scratch, rewrite
2168 * it to load/store. Note that this is a _safe list walk, because
2169 * we may generate a new scratch_write instruction after the one
2170 * we're processing.
2171 */
2172 foreach_list_safe(node, &this->instructions) {
2173 vec4_instruction *inst = (vec4_instruction *)node;
2174
2175 /* Set up the annotation tracking for new generated instructions. */
2176 base_ir = inst->ir;
2177 current_annotation = inst->annotation;
2178
2179 if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
2180 src_reg temp = src_reg(this, glsl_type::vec4_type);
2181
2182 emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]);
2183
2184 inst->dst.file = temp.file;
2185 inst->dst.reg = temp.reg;
2186 inst->dst.reg_offset = temp.reg_offset;
2187 inst->dst.reladdr = NULL;
2188 }
2189
2190 for (int i = 0 ; i < 3; i++) {
2191 if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
2192 continue;
2193
2194 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2195
2196 emit_scratch_read(inst, temp, inst->src[i],
2197 scratch_loc[inst->src[i].reg]);
2198
2199 inst->src[i].file = temp.file;
2200 inst->src[i].reg = temp.reg;
2201 inst->src[i].reg_offset = temp.reg_offset;
2202 inst->src[i].reladdr = NULL;
2203 }
2204 }
2205 }
2206
2207 /**
2208 * Emits an instruction before @inst to load the value named by @orig_src
2209 * from the pull constant buffer (surface) at @base_offset to @temp.
2210 */
2211 void
2212 vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
2213 dst_reg temp, src_reg orig_src,
2214 int base_offset)
2215 {
2216 int reg_offset = base_offset + orig_src.reg_offset;
2217 src_reg index = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
2218 vec4_instruction *load;
2219
2220 load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
2221 temp, index);
2222 load->annotation = inst->annotation;
2223 load->ir = inst->ir;
2224 load->base_mrf = 14;
2225 load->mlen = 1;
2226 inst->insert_before(load);
2227 }
2228
2229 /**
2230 * Implements array access of uniforms by inserting a
2231 * PULL_CONSTANT_LOAD instruction.
2232 *
2233 * Unlike temporary GRF array access (where we don't support it due to
2234 * the difficulty of doing relative addressing on instruction
2235 * destinations), we could potentially do array access of uniforms
2236 * that were loaded in GRF space as push constants. In real-world
2237 * usage we've seen, though, the arrays being used are always larger
2238 * than we could load as push constants, so just always move all
2239 * uniform array access out to a pull constant buffer.
2240 */
2241 void
2242 vec4_visitor::move_uniform_array_access_to_pull_constants()
2243 {
2244 int pull_constant_loc[this->uniforms];
2245
2246 for (int i = 0; i < this->uniforms; i++) {
2247 pull_constant_loc[i] = -1;
2248 }
2249
2250 /* Walk through and find array access of uniforms. Put a copy of that
2251 * uniform in the pull constant buffer.
2252 *
2253 * Note that we don't move constant-indexed accesses to arrays. No
2254 * testing has been done of the performance impact of this choice.
2255 */
2256 foreach_list_safe(node, &this->instructions) {
2257 vec4_instruction *inst = (vec4_instruction *)node;
2258
2259 for (int i = 0 ; i < 3; i++) {
2260 if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
2261 continue;
2262
2263 int uniform = inst->src[i].reg;
2264
2265 /* If this array isn't already present in the pull constant buffer,
2266 * add it.
2267 */
2268 if (pull_constant_loc[uniform] == -1) {
2269 const float **values = &prog_data->param[uniform * 4];
2270
2271 pull_constant_loc[uniform] = prog_data->nr_pull_params;
2272
2273 for (int j = 0; j < uniform_size[uniform] * 4; j++) {
2274 prog_data->pull_param[prog_data->nr_pull_params++] = values[j];
2275 }
2276 }
2277
2278 /* Set up the annotation tracking for new generated instructions. */
2279 base_ir = inst->ir;
2280 current_annotation = inst->annotation;
2281
2282 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2283
2284 emit_pull_constant_load(inst, temp, inst->src[i],
2285 pull_constant_loc[uniform]);
2286
2287 inst->src[i].file = temp.file;
2288 inst->src[i].reg = temp.reg;
2289 inst->src[i].reg_offset = temp.reg_offset;
2290 inst->src[i].reladdr = NULL;
2291 }
2292 }
2293
2294 /* Now there are no accesses of the UNIFORM file with a reladdr, so
2295 * no need to track them as larger-than-vec4 objects. This will be
2296 * relied on in cutting out unused uniform vectors from push
2297 * constants.
2298 */
2299 split_uniform_registers();
2300 }
2301
2302 vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
2303 struct gl_shader_program *prog,
2304 struct brw_shader *shader)
2305 {
2306 this->c = c;
2307 this->p = &c->func;
2308 this->brw = p->brw;
2309 this->intel = &brw->intel;
2310 this->ctx = &intel->ctx;
2311 this->prog = prog;
2312 this->shader = shader;
2313
2314 this->mem_ctx = ralloc_context(NULL);
2315 this->failed = false;
2316
2317 this->base_ir = NULL;
2318 this->current_annotation = NULL;
2319
2320 this->c = c;
2321 this->vp = prog->VertexProgram;
2322 this->prog_data = &c->prog_data;
2323
2324 this->variable_ht = hash_table_ctor(0,
2325 hash_table_pointer_hash,
2326 hash_table_pointer_compare);
2327
2328 this->virtual_grf_def = NULL;
2329 this->virtual_grf_use = NULL;
2330 this->virtual_grf_sizes = NULL;
2331 this->virtual_grf_count = 0;
2332 this->virtual_grf_array_size = 0;
2333 this->live_intervals_valid = false;
2334
2335 this->uniforms = 0;
2336
2337 this->variable_ht = hash_table_ctor(0,
2338 hash_table_pointer_hash,
2339 hash_table_pointer_compare);
2340 }
2341
2342 vec4_visitor::~vec4_visitor()
2343 {
2344 ralloc_free(this->mem_ctx);
2345 hash_table_dtor(this->variable_ht);
2346 }
2347
2348
2349 void
2350 vec4_visitor::fail(const char *format, ...)
2351 {
2352 va_list va;
2353 char *msg;
2354
2355 if (failed)
2356 return;
2357
2358 failed = true;
2359
2360 va_start(va, format);
2361 msg = ralloc_vasprintf(mem_ctx, format, va);
2362 va_end(va);
2363 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
2364
2365 this->fail_msg = msg;
2366
2367 if (INTEL_DEBUG & DEBUG_VS) {
2368 fprintf(stderr, "%s", msg);
2369 }
2370 }
2371
2372 } /* namespace brw */