i965 new VS: don't share clip plane constants in pre-GEN6
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_visitor.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25 extern "C" {
26 #include "main/macros.h"
27 #include "program/prog_parameter.h"
28 }
29
30 namespace brw {
31
32 src_reg::src_reg(dst_reg reg)
33 {
34 init();
35
36 this->file = reg.file;
37 this->reg = reg.reg;
38 this->reg_offset = reg.reg_offset;
39 this->type = reg.type;
40 this->reladdr = reg.reladdr;
41 this->fixed_hw_reg = reg.fixed_hw_reg;
42
43 int swizzles[4];
44 int next_chan = 0;
45 int last = 0;
46
47 for (int i = 0; i < 4; i++) {
48 if (!(reg.writemask & (1 << i)))
49 continue;
50
51 swizzles[next_chan++] = last = i;
52 }
53
54 for (; next_chan < 4; next_chan++) {
55 swizzles[next_chan] = last;
56 }
57
58 this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
59 swizzles[2], swizzles[3]);
60 }
61
62 dst_reg::dst_reg(src_reg reg)
63 {
64 init();
65
66 this->file = reg.file;
67 this->reg = reg.reg;
68 this->reg_offset = reg.reg_offset;
69 this->type = reg.type;
70 this->writemask = WRITEMASK_XYZW;
71 this->reladdr = reg.reladdr;
72 this->fixed_hw_reg = reg.fixed_hw_reg;
73 }
74
75 vec4_instruction::vec4_instruction(vec4_visitor *v,
76 enum opcode opcode, dst_reg dst,
77 src_reg src0, src_reg src1, src_reg src2)
78 {
79 this->opcode = opcode;
80 this->dst = dst;
81 this->src[0] = src0;
82 this->src[1] = src1;
83 this->src[2] = src2;
84 this->ir = v->base_ir;
85 this->annotation = v->current_annotation;
86 }
87
88 vec4_instruction *
89 vec4_visitor::emit(vec4_instruction *inst)
90 {
91 this->instructions.push_tail(inst);
92
93 return inst;
94 }
95
96 vec4_instruction *
97 vec4_visitor::emit_before(vec4_instruction *inst, vec4_instruction *new_inst)
98 {
99 new_inst->ir = inst->ir;
100 new_inst->annotation = inst->annotation;
101
102 inst->insert_before(new_inst);
103
104 return inst;
105 }
106
107 vec4_instruction *
108 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
109 src_reg src0, src_reg src1, src_reg src2)
110 {
111 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
112 src0, src1, src2));
113 }
114
115
116 vec4_instruction *
117 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
118 {
119 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
120 }
121
122 vec4_instruction *
123 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
124 {
125 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
126 }
127
128 vec4_instruction *
129 vec4_visitor::emit(enum opcode opcode)
130 {
131 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
132 }
133
134 #define ALU1(op) \
135 vec4_instruction * \
136 vec4_visitor::op(dst_reg dst, src_reg src0) \
137 { \
138 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
139 src0); \
140 }
141
142 #define ALU2(op) \
143 vec4_instruction * \
144 vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \
145 { \
146 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
147 src0, src1); \
148 }
149
150 ALU1(NOT)
151 ALU1(MOV)
152 ALU1(FRC)
153 ALU1(RNDD)
154 ALU1(RNDE)
155 ALU1(RNDZ)
156 ALU2(ADD)
157 ALU2(MUL)
158 ALU2(MACH)
159 ALU2(AND)
160 ALU2(OR)
161 ALU2(XOR)
162 ALU2(DP3)
163 ALU2(DP4)
164
165 /** Gen4 predicated IF. */
166 vec4_instruction *
167 vec4_visitor::IF(uint32_t predicate)
168 {
169 vec4_instruction *inst;
170
171 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF);
172 inst->predicate = predicate;
173
174 return inst;
175 }
176
177 /** Gen6+ IF with embedded comparison. */
178 vec4_instruction *
179 vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition)
180 {
181 assert(intel->gen >= 6);
182
183 vec4_instruction *inst;
184
185 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF, dst_null_d(),
186 src0, src1);
187 inst->conditional_mod = condition;
188
189 return inst;
190 }
191
192 /**
193 * CMP: Sets the low bit of the destination channels with the result
194 * of the comparison, while the upper bits are undefined, and updates
195 * the flag register with the packed 16 bits of the result.
196 */
197 vec4_instruction *
198 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition)
199 {
200 vec4_instruction *inst;
201
202 /* original gen4 does type conversion to the destination type
203 * before before comparison, producing garbage results for floating
204 * point comparisons.
205 */
206 if (intel->gen == 4) {
207 dst.type = src0.type;
208 if (dst.file == HW_REG)
209 dst.fixed_hw_reg.type = dst.type;
210 }
211
212 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_CMP, dst, src0, src1);
213 inst->conditional_mod = condition;
214
215 return inst;
216 }
217
218 vec4_instruction *
219 vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
220 {
221 vec4_instruction *inst;
222
223 inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_READ,
224 dst, index);
225 inst->base_mrf = 14;
226 inst->mlen = 1;
227
228 return inst;
229 }
230
231 vec4_instruction *
232 vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index)
233 {
234 vec4_instruction *inst;
235
236 inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_WRITE,
237 dst, src, index);
238 inst->base_mrf = 13;
239 inst->mlen = 2;
240
241 return inst;
242 }
243
244 void
245 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
246 {
247 static enum opcode dot_opcodes[] = {
248 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
249 };
250
251 emit(dot_opcodes[elements - 2], dst, src0, src1);
252 }
253
254 void
255 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
256 {
257 /* The gen6 math instruction ignores the source modifiers --
258 * swizzle, abs, negate, and at least some parts of the register
259 * region description.
260 *
261 * While it would seem that this MOV could be avoided at this point
262 * in the case that the swizzle is matched up with the destination
263 * writemask, note that uniform packing and register allocation
264 * could rearrange our swizzle, so let's leave this matter up to
265 * copy propagation later.
266 */
267 src_reg temp_src = src_reg(this, glsl_type::vec4_type);
268 emit(MOV(dst_reg(temp_src), src));
269
270 if (dst.writemask != WRITEMASK_XYZW) {
271 /* The gen6 math instruction must be align1, so we can't do
272 * writemasks.
273 */
274 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
275
276 emit(opcode, temp_dst, temp_src);
277
278 emit(MOV(dst, src_reg(temp_dst)));
279 } else {
280 emit(opcode, dst, temp_src);
281 }
282 }
283
284 void
285 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
286 {
287 vec4_instruction *inst = emit(opcode, dst, src);
288 inst->base_mrf = 1;
289 inst->mlen = 1;
290 }
291
292 void
293 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
294 {
295 switch (opcode) {
296 case SHADER_OPCODE_RCP:
297 case SHADER_OPCODE_RSQ:
298 case SHADER_OPCODE_SQRT:
299 case SHADER_OPCODE_EXP2:
300 case SHADER_OPCODE_LOG2:
301 case SHADER_OPCODE_SIN:
302 case SHADER_OPCODE_COS:
303 break;
304 default:
305 assert(!"not reached: bad math opcode");
306 return;
307 }
308
309 if (intel->gen >= 6) {
310 return emit_math1_gen6(opcode, dst, src);
311 } else {
312 return emit_math1_gen4(opcode, dst, src);
313 }
314 }
315
316 void
317 vec4_visitor::emit_math2_gen6(enum opcode opcode,
318 dst_reg dst, src_reg src0, src_reg src1)
319 {
320 src_reg expanded;
321
322 /* The gen6 math instruction ignores the source modifiers --
323 * swizzle, abs, negate, and at least some parts of the register
324 * region description. Move the sources to temporaries to make it
325 * generally work.
326 */
327
328 expanded = src_reg(this, glsl_type::vec4_type);
329 emit(MOV(dst_reg(expanded), src0));
330 src0 = expanded;
331
332 expanded = src_reg(this, glsl_type::vec4_type);
333 emit(MOV(dst_reg(expanded), src1));
334 src1 = expanded;
335
336 if (dst.writemask != WRITEMASK_XYZW) {
337 /* The gen6 math instruction must be align1, so we can't do
338 * writemasks.
339 */
340 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
341
342 emit(opcode, temp_dst, src0, src1);
343
344 emit(MOV(dst, src_reg(temp_dst)));
345 } else {
346 emit(opcode, dst, src0, src1);
347 }
348 }
349
350 void
351 vec4_visitor::emit_math2_gen4(enum opcode opcode,
352 dst_reg dst, src_reg src0, src_reg src1)
353 {
354 vec4_instruction *inst = emit(opcode, dst, src0, src1);
355 inst->base_mrf = 1;
356 inst->mlen = 2;
357 }
358
359 void
360 vec4_visitor::emit_math(enum opcode opcode,
361 dst_reg dst, src_reg src0, src_reg src1)
362 {
363 assert(opcode == SHADER_OPCODE_POW);
364
365 if (intel->gen >= 6) {
366 return emit_math2_gen6(opcode, dst, src0, src1);
367 } else {
368 return emit_math2_gen4(opcode, dst, src0, src1);
369 }
370 }
371
372 void
373 vec4_visitor::visit_instructions(const exec_list *list)
374 {
375 foreach_list(node, list) {
376 ir_instruction *ir = (ir_instruction *)node;
377
378 base_ir = ir;
379 ir->accept(this);
380 }
381 }
382
383
384 static int
385 type_size(const struct glsl_type *type)
386 {
387 unsigned int i;
388 int size;
389
390 switch (type->base_type) {
391 case GLSL_TYPE_UINT:
392 case GLSL_TYPE_INT:
393 case GLSL_TYPE_FLOAT:
394 case GLSL_TYPE_BOOL:
395 if (type->is_matrix()) {
396 return type->matrix_columns;
397 } else {
398 /* Regardless of size of vector, it gets a vec4. This is bad
399 * packing for things like floats, but otherwise arrays become a
400 * mess. Hopefully a later pass over the code can pack scalars
401 * down if appropriate.
402 */
403 return 1;
404 }
405 case GLSL_TYPE_ARRAY:
406 assert(type->length > 0);
407 return type_size(type->fields.array) * type->length;
408 case GLSL_TYPE_STRUCT:
409 size = 0;
410 for (i = 0; i < type->length; i++) {
411 size += type_size(type->fields.structure[i].type);
412 }
413 return size;
414 case GLSL_TYPE_SAMPLER:
415 /* Samplers take up one slot in UNIFORMS[], but they're baked in
416 * at link time.
417 */
418 return 1;
419 default:
420 assert(0);
421 return 0;
422 }
423 }
424
425 int
426 vec4_visitor::virtual_grf_alloc(int size)
427 {
428 if (virtual_grf_array_size <= virtual_grf_count) {
429 if (virtual_grf_array_size == 0)
430 virtual_grf_array_size = 16;
431 else
432 virtual_grf_array_size *= 2;
433 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
434 virtual_grf_array_size);
435 virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int,
436 virtual_grf_array_size);
437 }
438 virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count;
439 virtual_grf_reg_count += size;
440 virtual_grf_sizes[virtual_grf_count] = size;
441 return virtual_grf_count++;
442 }
443
444 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
445 {
446 init();
447
448 this->file = GRF;
449 this->reg = v->virtual_grf_alloc(type_size(type));
450
451 if (type->is_array() || type->is_record()) {
452 this->swizzle = BRW_SWIZZLE_NOOP;
453 } else {
454 this->swizzle = swizzle_for_size(type->vector_elements);
455 }
456
457 this->type = brw_type_for_base_type(type);
458 }
459
460 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
461 {
462 init();
463
464 this->file = GRF;
465 this->reg = v->virtual_grf_alloc(type_size(type));
466
467 if (type->is_array() || type->is_record()) {
468 this->writemask = WRITEMASK_XYZW;
469 } else {
470 this->writemask = (1 << type->vector_elements) - 1;
471 }
472
473 this->type = brw_type_for_base_type(type);
474 }
475
476 /* Our support for uniforms is piggy-backed on the struct
477 * gl_fragment_program, because that's where the values actually
478 * get stored, rather than in some global gl_shader_program uniform
479 * store.
480 */
481 int
482 vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
483 {
484 unsigned int offset = 0;
485 float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
486
487 if (type->is_matrix()) {
488 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
489 type->vector_elements,
490 1);
491
492 for (unsigned int i = 0; i < type->matrix_columns; i++) {
493 offset += setup_uniform_values(loc + offset, column);
494 }
495
496 return offset;
497 }
498
499 switch (type->base_type) {
500 case GLSL_TYPE_FLOAT:
501 case GLSL_TYPE_UINT:
502 case GLSL_TYPE_INT:
503 case GLSL_TYPE_BOOL:
504 for (unsigned int i = 0; i < type->vector_elements; i++) {
505 c->prog_data.param[this->uniforms * 4 + i] = &values[i];
506 }
507
508 /* Set up pad elements to get things aligned to a vec4 boundary. */
509 for (unsigned int i = type->vector_elements; i < 4; i++) {
510 static float zero = 0;
511
512 c->prog_data.param[this->uniforms * 4 + i] = &zero;
513 }
514
515 /* Track the size of this uniform vector, for future packing of
516 * uniforms.
517 */
518 this->uniform_vector_size[this->uniforms] = type->vector_elements;
519 this->uniforms++;
520
521 return 1;
522
523 case GLSL_TYPE_STRUCT:
524 for (unsigned int i = 0; i < type->length; i++) {
525 offset += setup_uniform_values(loc + offset,
526 type->fields.structure[i].type);
527 }
528 return offset;
529
530 case GLSL_TYPE_ARRAY:
531 for (unsigned int i = 0; i < type->length; i++) {
532 offset += setup_uniform_values(loc + offset, type->fields.array);
533 }
534 return offset;
535
536 case GLSL_TYPE_SAMPLER:
537 /* The sampler takes up a slot, but we don't use any values from it. */
538 return 1;
539
540 default:
541 assert(!"not reached");
542 return 0;
543 }
544 }
545
546 void
547 vec4_visitor::setup_uniform_clipplane_values()
548 {
549 int compacted_clipplane_index = 0;
550 for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
551 if (ctx->Transform.ClipPlanesEnabled & (1 << i)) {
552 this->uniform_vector_size[this->uniforms] = 4;
553 this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms);
554 this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F;
555 for (int j = 0; j < 4; ++j) {
556 c->prog_data.param[this->uniforms * 4 + j] = &ctx->Transform._ClipUserPlane[i][j];
557 }
558 ++compacted_clipplane_index;
559 ++this->uniforms;
560 }
561 }
562 }
563
564 /* Our support for builtin uniforms is even scarier than non-builtin.
565 * It sits on top of the PROG_STATE_VAR parameters that are
566 * automatically updated from GL context state.
567 */
568 void
569 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
570 {
571 const ir_state_slot *const slots = ir->state_slots;
572 assert(ir->state_slots != NULL);
573
574 for (unsigned int i = 0; i < ir->num_state_slots; i++) {
575 /* This state reference has already been setup by ir_to_mesa,
576 * but we'll get the same index back here. We can reference
577 * ParameterValues directly, since unlike brw_fs.cpp, we never
578 * add new state references during compile.
579 */
580 int index = _mesa_add_state_reference(this->vp->Base.Parameters,
581 (gl_state_index *)slots[i].tokens);
582 float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
583
584 this->uniform_vector_size[this->uniforms] = 0;
585 /* Add each of the unique swizzled channels of the element.
586 * This will end up matching the size of the glsl_type of this field.
587 */
588 int last_swiz = -1;
589 for (unsigned int j = 0; j < 4; j++) {
590 int swiz = GET_SWZ(slots[i].swizzle, j);
591 last_swiz = swiz;
592
593 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
594 if (swiz <= last_swiz)
595 this->uniform_vector_size[this->uniforms]++;
596 }
597 this->uniforms++;
598 }
599 }
600
601 dst_reg *
602 vec4_visitor::variable_storage(ir_variable *var)
603 {
604 return (dst_reg *)hash_table_find(this->variable_ht, var);
605 }
606
607 void
608 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate)
609 {
610 ir_expression *expr = ir->as_expression();
611
612 *predicate = BRW_PREDICATE_NORMAL;
613
614 if (expr) {
615 src_reg op[2];
616 vec4_instruction *inst;
617
618 assert(expr->get_num_operands() <= 2);
619 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
620 expr->operands[i]->accept(this);
621 op[i] = this->result;
622 }
623
624 switch (expr->operation) {
625 case ir_unop_logic_not:
626 inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
627 inst->conditional_mod = BRW_CONDITIONAL_Z;
628 break;
629
630 case ir_binop_logic_xor:
631 inst = emit(XOR(dst_null_d(), op[0], op[1]));
632 inst->conditional_mod = BRW_CONDITIONAL_NZ;
633 break;
634
635 case ir_binop_logic_or:
636 inst = emit(OR(dst_null_d(), op[0], op[1]));
637 inst->conditional_mod = BRW_CONDITIONAL_NZ;
638 break;
639
640 case ir_binop_logic_and:
641 inst = emit(AND(dst_null_d(), op[0], op[1]));
642 inst->conditional_mod = BRW_CONDITIONAL_NZ;
643 break;
644
645 case ir_unop_f2b:
646 if (intel->gen >= 6) {
647 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
648 } else {
649 inst = emit(MOV(dst_null_f(), op[0]));
650 inst->conditional_mod = BRW_CONDITIONAL_NZ;
651 }
652 break;
653
654 case ir_unop_i2b:
655 if (intel->gen >= 6) {
656 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
657 } else {
658 inst = emit(MOV(dst_null_d(), op[0]));
659 inst->conditional_mod = BRW_CONDITIONAL_NZ;
660 }
661 break;
662
663 case ir_binop_all_equal:
664 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
665 *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
666 break;
667
668 case ir_binop_any_nequal:
669 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
670 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
671 break;
672
673 case ir_unop_any:
674 inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
675 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
676 break;
677
678 case ir_binop_greater:
679 case ir_binop_gequal:
680 case ir_binop_less:
681 case ir_binop_lequal:
682 case ir_binop_equal:
683 case ir_binop_nequal:
684 emit(CMP(dst_null_d(), op[0], op[1],
685 brw_conditional_for_comparison(expr->operation)));
686 break;
687
688 default:
689 assert(!"not reached");
690 break;
691 }
692 return;
693 }
694
695 ir->accept(this);
696
697 if (intel->gen >= 6) {
698 vec4_instruction *inst = emit(AND(dst_null_d(),
699 this->result, src_reg(1)));
700 inst->conditional_mod = BRW_CONDITIONAL_NZ;
701 } else {
702 vec4_instruction *inst = emit(MOV(dst_null_d(), this->result));
703 inst->conditional_mod = BRW_CONDITIONAL_NZ;
704 }
705 }
706
707 /**
708 * Emit a gen6 IF statement with the comparison folded into the IF
709 * instruction.
710 */
711 void
712 vec4_visitor::emit_if_gen6(ir_if *ir)
713 {
714 ir_expression *expr = ir->condition->as_expression();
715
716 if (expr) {
717 src_reg op[2];
718 dst_reg temp;
719
720 assert(expr->get_num_operands() <= 2);
721 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
722 expr->operands[i]->accept(this);
723 op[i] = this->result;
724 }
725
726 switch (expr->operation) {
727 case ir_unop_logic_not:
728 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
729 return;
730
731 case ir_binop_logic_xor:
732 emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
733 return;
734
735 case ir_binop_logic_or:
736 temp = dst_reg(this, glsl_type::bool_type);
737 emit(OR(temp, op[0], op[1]));
738 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
739 return;
740
741 case ir_binop_logic_and:
742 temp = dst_reg(this, glsl_type::bool_type);
743 emit(AND(temp, op[0], op[1]));
744 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
745 return;
746
747 case ir_unop_f2b:
748 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
749 return;
750
751 case ir_unop_i2b:
752 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
753 return;
754
755 case ir_binop_greater:
756 case ir_binop_gequal:
757 case ir_binop_less:
758 case ir_binop_lequal:
759 case ir_binop_equal:
760 case ir_binop_nequal:
761 emit(IF(op[0], op[1],
762 brw_conditional_for_comparison(expr->operation)));
763 return;
764
765 case ir_binop_all_equal:
766 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
767 emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
768 return;
769
770 case ir_binop_any_nequal:
771 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
772 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
773 return;
774
775 case ir_unop_any:
776 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
777 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
778 return;
779
780 default:
781 assert(!"not reached");
782 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
783 return;
784 }
785 return;
786 }
787
788 ir->condition->accept(this);
789
790 emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
791 }
792
793 void
794 vec4_visitor::visit(ir_variable *ir)
795 {
796 dst_reg *reg = NULL;
797
798 if (variable_storage(ir))
799 return;
800
801 switch (ir->mode) {
802 case ir_var_in:
803 reg = new(mem_ctx) dst_reg(ATTR, ir->location);
804
805 /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED attributes
806 * come in as floating point conversions of the integer values.
807 */
808 for (int i = ir->location; i < ir->location + type_size(ir->type); i++) {
809 if (!c->key.gl_fixed_input_size[i])
810 continue;
811
812 dst_reg dst = *reg;
813 dst.writemask = (1 << c->key.gl_fixed_input_size[i]) - 1;
814 emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
815 }
816 break;
817
818 case ir_var_out:
819 reg = new(mem_ctx) dst_reg(this, ir->type);
820
821 for (int i = 0; i < type_size(ir->type); i++) {
822 output_reg[ir->location + i] = *reg;
823 output_reg[ir->location + i].reg_offset = i;
824 output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F;
825 output_reg_annotation[ir->location + i] = ir->name;
826 }
827 break;
828
829 case ir_var_auto:
830 case ir_var_temporary:
831 reg = new(mem_ctx) dst_reg(this, ir->type);
832 break;
833
834 case ir_var_uniform:
835 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
836
837 /* Track how big the whole uniform variable is, in case we need to put a
838 * copy of its data into pull constants for array access.
839 */
840 this->uniform_size[this->uniforms] = type_size(ir->type);
841
842 if (!strncmp(ir->name, "gl_", 3)) {
843 setup_builtin_uniform_values(ir);
844 } else {
845 setup_uniform_values(ir->location, ir->type);
846 }
847 break;
848
849 default:
850 assert(!"not reached");
851 }
852
853 reg->type = brw_type_for_base_type(ir->type);
854 hash_table_insert(this->variable_ht, reg, ir);
855 }
856
857 void
858 vec4_visitor::visit(ir_loop *ir)
859 {
860 dst_reg counter;
861
862 /* We don't want debugging output to print the whole body of the
863 * loop as the annotation.
864 */
865 this->base_ir = NULL;
866
867 if (ir->counter != NULL) {
868 this->base_ir = ir->counter;
869 ir->counter->accept(this);
870 counter = *(variable_storage(ir->counter));
871
872 if (ir->from != NULL) {
873 this->base_ir = ir->from;
874 ir->from->accept(this);
875
876 emit(MOV(counter, this->result));
877 }
878 }
879
880 emit(BRW_OPCODE_DO);
881
882 if (ir->to) {
883 this->base_ir = ir->to;
884 ir->to->accept(this);
885
886 emit(CMP(dst_null_d(), src_reg(counter), this->result,
887 brw_conditional_for_comparison(ir->cmp)));
888
889 vec4_instruction *inst = emit(BRW_OPCODE_BREAK);
890 inst->predicate = BRW_PREDICATE_NORMAL;
891 }
892
893 visit_instructions(&ir->body_instructions);
894
895
896 if (ir->increment) {
897 this->base_ir = ir->increment;
898 ir->increment->accept(this);
899 emit(ADD(counter, src_reg(counter), this->result));
900 }
901
902 emit(BRW_OPCODE_WHILE);
903 }
904
905 void
906 vec4_visitor::visit(ir_loop_jump *ir)
907 {
908 switch (ir->mode) {
909 case ir_loop_jump::jump_break:
910 emit(BRW_OPCODE_BREAK);
911 break;
912 case ir_loop_jump::jump_continue:
913 emit(BRW_OPCODE_CONTINUE);
914 break;
915 }
916 }
917
918
919 void
920 vec4_visitor::visit(ir_function_signature *ir)
921 {
922 assert(0);
923 (void)ir;
924 }
925
926 void
927 vec4_visitor::visit(ir_function *ir)
928 {
929 /* Ignore function bodies other than main() -- we shouldn't see calls to
930 * them since they should all be inlined.
931 */
932 if (strcmp(ir->name, "main") == 0) {
933 const ir_function_signature *sig;
934 exec_list empty;
935
936 sig = ir->matching_signature(&empty);
937
938 assert(sig);
939
940 visit_instructions(&sig->body);
941 }
942 }
943
944 GLboolean
945 vec4_visitor::try_emit_sat(ir_expression *ir)
946 {
947 ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
948 if (!sat_src)
949 return false;
950
951 sat_src->accept(this);
952 src_reg src = this->result;
953
954 this->result = src_reg(this, ir->type);
955 vec4_instruction *inst;
956 inst = emit(MOV(dst_reg(this->result), src));
957 inst->saturate = true;
958
959 return true;
960 }
961
962 void
963 vec4_visitor::emit_bool_comparison(unsigned int op,
964 dst_reg dst, src_reg src0, src_reg src1)
965 {
966 /* original gen4 does destination conversion before comparison. */
967 if (intel->gen < 5)
968 dst.type = src0.type;
969
970 emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op)));
971
972 dst.type = BRW_REGISTER_TYPE_D;
973 emit(AND(dst, src_reg(dst), src_reg(0x1)));
974 }
975
976 void
977 vec4_visitor::visit(ir_expression *ir)
978 {
979 unsigned int operand;
980 src_reg op[Elements(ir->operands)];
981 src_reg result_src;
982 dst_reg result_dst;
983 vec4_instruction *inst;
984
985 if (try_emit_sat(ir))
986 return;
987
988 for (operand = 0; operand < ir->get_num_operands(); operand++) {
989 this->result.file = BAD_FILE;
990 ir->operands[operand]->accept(this);
991 if (this->result.file == BAD_FILE) {
992 printf("Failed to get tree for expression operand:\n");
993 ir->operands[operand]->print();
994 exit(1);
995 }
996 op[operand] = this->result;
997
998 /* Matrix expression operands should have been broken down to vector
999 * operations already.
1000 */
1001 assert(!ir->operands[operand]->type->is_matrix());
1002 }
1003
1004 int vector_elements = ir->operands[0]->type->vector_elements;
1005 if (ir->operands[1]) {
1006 vector_elements = MAX2(vector_elements,
1007 ir->operands[1]->type->vector_elements);
1008 }
1009
1010 this->result.file = BAD_FILE;
1011
1012 /* Storage for our result. Ideally for an assignment we'd be using
1013 * the actual storage for the result here, instead.
1014 */
1015 result_src = src_reg(this, ir->type);
1016 /* convenience for the emit functions below. */
1017 result_dst = dst_reg(result_src);
1018 /* If nothing special happens, this is the result. */
1019 this->result = result_src;
1020 /* Limit writes to the channels that will be used by result_src later.
1021 * This does limit this temp's use as a temporary for multi-instruction
1022 * sequences.
1023 */
1024 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1025
1026 switch (ir->operation) {
1027 case ir_unop_logic_not:
1028 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
1029 * ones complement of the whole register, not just bit 0.
1030 */
1031 emit(XOR(result_dst, op[0], src_reg(1)));
1032 break;
1033 case ir_unop_neg:
1034 op[0].negate = !op[0].negate;
1035 this->result = op[0];
1036 break;
1037 case ir_unop_abs:
1038 op[0].abs = true;
1039 op[0].negate = false;
1040 this->result = op[0];
1041 break;
1042
1043 case ir_unop_sign:
1044 emit(MOV(result_dst, src_reg(0.0f)));
1045
1046 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G));
1047 inst = emit(MOV(result_dst, src_reg(1.0f)));
1048 inst->predicate = BRW_PREDICATE_NORMAL;
1049
1050 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L));
1051 inst = emit(MOV(result_dst, src_reg(-1.0f)));
1052 inst->predicate = BRW_PREDICATE_NORMAL;
1053
1054 break;
1055
1056 case ir_unop_rcp:
1057 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
1058 break;
1059
1060 case ir_unop_exp2:
1061 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
1062 break;
1063 case ir_unop_log2:
1064 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
1065 break;
1066 case ir_unop_exp:
1067 case ir_unop_log:
1068 assert(!"not reached: should be handled by ir_explog_to_explog2");
1069 break;
1070 case ir_unop_sin:
1071 case ir_unop_sin_reduced:
1072 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
1073 break;
1074 case ir_unop_cos:
1075 case ir_unop_cos_reduced:
1076 emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
1077 break;
1078
1079 case ir_unop_dFdx:
1080 case ir_unop_dFdy:
1081 assert(!"derivatives not valid in vertex shader");
1082 break;
1083
1084 case ir_unop_noise:
1085 assert(!"not reached: should be handled by lower_noise");
1086 break;
1087
1088 case ir_binop_add:
1089 emit(ADD(result_dst, op[0], op[1]));
1090 break;
1091 case ir_binop_sub:
1092 assert(!"not reached: should be handled by ir_sub_to_add_neg");
1093 break;
1094
1095 case ir_binop_mul:
1096 if (ir->type->is_integer()) {
1097 /* For integer multiplication, the MUL uses the low 16 bits
1098 * of one of the operands (src0 on gen6, src1 on gen7). The
1099 * MACH accumulates in the contribution of the upper 16 bits
1100 * of that operand.
1101 *
1102 * FINISHME: Emit just the MUL if we know an operand is small
1103 * enough.
1104 */
1105 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
1106
1107 emit(MUL(acc, op[0], op[1]));
1108 emit(MACH(dst_null_d(), op[0], op[1]));
1109 emit(MOV(result_dst, src_reg(acc)));
1110 } else {
1111 emit(MUL(result_dst, op[0], op[1]));
1112 }
1113 break;
1114 case ir_binop_div:
1115 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1116 case ir_binop_mod:
1117 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1118 break;
1119
1120 case ir_binop_less:
1121 case ir_binop_greater:
1122 case ir_binop_lequal:
1123 case ir_binop_gequal:
1124 case ir_binop_equal:
1125 case ir_binop_nequal: {
1126 emit(CMP(result_dst, op[0], op[1],
1127 brw_conditional_for_comparison(ir->operation)));
1128 emit(AND(result_dst, result_src, src_reg(0x1)));
1129 break;
1130 }
1131
1132 case ir_binop_all_equal:
1133 /* "==" operator producing a scalar boolean. */
1134 if (ir->operands[0]->type->is_vector() ||
1135 ir->operands[1]->type->is_vector()) {
1136 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
1137 emit(MOV(result_dst, src_reg(0)));
1138 inst = emit(MOV(result_dst, src_reg(1)));
1139 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
1140 } else {
1141 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
1142 emit(AND(result_dst, result_src, src_reg(0x1)));
1143 }
1144 break;
1145 case ir_binop_any_nequal:
1146 /* "!=" operator producing a scalar boolean. */
1147 if (ir->operands[0]->type->is_vector() ||
1148 ir->operands[1]->type->is_vector()) {
1149 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
1150
1151 emit(MOV(result_dst, src_reg(0)));
1152 inst = emit(MOV(result_dst, src_reg(1)));
1153 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1154 } else {
1155 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
1156 emit(AND(result_dst, result_src, src_reg(0x1)));
1157 }
1158 break;
1159
1160 case ir_unop_any:
1161 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1162 emit(MOV(result_dst, src_reg(0)));
1163
1164 inst = emit(MOV(result_dst, src_reg(1)));
1165 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1166 break;
1167
1168 case ir_binop_logic_xor:
1169 emit(XOR(result_dst, op[0], op[1]));
1170 break;
1171
1172 case ir_binop_logic_or:
1173 emit(OR(result_dst, op[0], op[1]));
1174 break;
1175
1176 case ir_binop_logic_and:
1177 emit(AND(result_dst, op[0], op[1]));
1178 break;
1179
1180 case ir_binop_dot:
1181 assert(ir->operands[0]->type->is_vector());
1182 assert(ir->operands[0]->type == ir->operands[1]->type);
1183 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1184 break;
1185
1186 case ir_unop_sqrt:
1187 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1188 break;
1189 case ir_unop_rsq:
1190 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1191 break;
1192 case ir_unop_i2f:
1193 case ir_unop_i2u:
1194 case ir_unop_u2i:
1195 case ir_unop_u2f:
1196 case ir_unop_b2f:
1197 case ir_unop_b2i:
1198 case ir_unop_f2i:
1199 emit(MOV(result_dst, op[0]));
1200 break;
1201 case ir_unop_f2b:
1202 case ir_unop_i2b: {
1203 emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1204 emit(AND(result_dst, result_src, src_reg(1)));
1205 break;
1206 }
1207
1208 case ir_unop_trunc:
1209 emit(RNDZ(result_dst, op[0]));
1210 break;
1211 case ir_unop_ceil:
1212 op[0].negate = !op[0].negate;
1213 inst = emit(RNDD(result_dst, op[0]));
1214 this->result.negate = true;
1215 break;
1216 case ir_unop_floor:
1217 inst = emit(RNDD(result_dst, op[0]));
1218 break;
1219 case ir_unop_fract:
1220 inst = emit(FRC(result_dst, op[0]));
1221 break;
1222 case ir_unop_round_even:
1223 emit(RNDE(result_dst, op[0]));
1224 break;
1225
1226 case ir_binop_min:
1227 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_L));
1228
1229 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1230 inst->predicate = BRW_PREDICATE_NORMAL;
1231 break;
1232 case ir_binop_max:
1233 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_G));
1234
1235 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1236 inst->predicate = BRW_PREDICATE_NORMAL;
1237 break;
1238
1239 case ir_binop_pow:
1240 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1241 break;
1242
1243 case ir_unop_bit_not:
1244 inst = emit(NOT(result_dst, op[0]));
1245 break;
1246 case ir_binop_bit_and:
1247 inst = emit(AND(result_dst, op[0], op[1]));
1248 break;
1249 case ir_binop_bit_xor:
1250 inst = emit(XOR(result_dst, op[0], op[1]));
1251 break;
1252 case ir_binop_bit_or:
1253 inst = emit(OR(result_dst, op[0], op[1]));
1254 break;
1255
1256 case ir_binop_lshift:
1257 case ir_binop_rshift:
1258 assert(!"GLSL 1.30 features unsupported");
1259 break;
1260
1261 case ir_quadop_vector:
1262 assert(!"not reached: should be handled by lower_quadop_vector");
1263 break;
1264 }
1265 }
1266
1267
1268 void
1269 vec4_visitor::visit(ir_swizzle *ir)
1270 {
1271 src_reg src;
1272 int i = 0;
1273 int swizzle[4];
1274
1275 /* Note that this is only swizzles in expressions, not those on the left
1276 * hand side of an assignment, which do write masking. See ir_assignment
1277 * for that.
1278 */
1279
1280 ir->val->accept(this);
1281 src = this->result;
1282 assert(src.file != BAD_FILE);
1283
1284 for (i = 0; i < ir->type->vector_elements; i++) {
1285 switch (i) {
1286 case 0:
1287 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1288 break;
1289 case 1:
1290 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1291 break;
1292 case 2:
1293 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1294 break;
1295 case 3:
1296 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1297 break;
1298 }
1299 }
1300 for (; i < 4; i++) {
1301 /* Replicate the last channel out. */
1302 swizzle[i] = swizzle[ir->type->vector_elements - 1];
1303 }
1304
1305 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1306
1307 this->result = src;
1308 }
1309
1310 void
1311 vec4_visitor::visit(ir_dereference_variable *ir)
1312 {
1313 const struct glsl_type *type = ir->type;
1314 dst_reg *reg = variable_storage(ir->var);
1315
1316 if (!reg) {
1317 fail("Failed to find variable storage for %s\n", ir->var->name);
1318 this->result = src_reg(brw_null_reg());
1319 return;
1320 }
1321
1322 this->result = src_reg(*reg);
1323
1324 if (type->is_scalar() || type->is_vector() || type->is_matrix())
1325 this->result.swizzle = swizzle_for_size(type->vector_elements);
1326 }
1327
1328 void
1329 vec4_visitor::visit(ir_dereference_array *ir)
1330 {
1331 ir_constant *constant_index;
1332 src_reg src;
1333 int element_size = type_size(ir->type);
1334
1335 constant_index = ir->array_index->constant_expression_value();
1336
1337 ir->array->accept(this);
1338 src = this->result;
1339
1340 if (constant_index) {
1341 src.reg_offset += constant_index->value.i[0] * element_size;
1342 } else {
1343 /* Variable index array dereference. It eats the "vec4" of the
1344 * base of the array and an index that offsets the Mesa register
1345 * index.
1346 */
1347 ir->array_index->accept(this);
1348
1349 src_reg index_reg;
1350
1351 if (element_size == 1) {
1352 index_reg = this->result;
1353 } else {
1354 index_reg = src_reg(this, glsl_type::int_type);
1355
1356 emit(MUL(dst_reg(index_reg), this->result, src_reg(element_size)));
1357 }
1358
1359 if (src.reladdr) {
1360 src_reg temp = src_reg(this, glsl_type::int_type);
1361
1362 emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
1363
1364 index_reg = temp;
1365 }
1366
1367 src.reladdr = ralloc(mem_ctx, src_reg);
1368 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1369 }
1370
1371 /* If the type is smaller than a vec4, replicate the last channel out. */
1372 if (ir->type->is_scalar() || ir->type->is_vector())
1373 src.swizzle = swizzle_for_size(ir->type->vector_elements);
1374 else
1375 src.swizzle = BRW_SWIZZLE_NOOP;
1376 src.type = brw_type_for_base_type(ir->type);
1377
1378 this->result = src;
1379 }
1380
1381 void
1382 vec4_visitor::visit(ir_dereference_record *ir)
1383 {
1384 unsigned int i;
1385 const glsl_type *struct_type = ir->record->type;
1386 int offset = 0;
1387
1388 ir->record->accept(this);
1389
1390 for (i = 0; i < struct_type->length; i++) {
1391 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1392 break;
1393 offset += type_size(struct_type->fields.structure[i].type);
1394 }
1395
1396 /* If the type is smaller than a vec4, replicate the last channel out. */
1397 if (ir->type->is_scalar() || ir->type->is_vector())
1398 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1399 else
1400 this->result.swizzle = BRW_SWIZZLE_NOOP;
1401 this->result.type = brw_type_for_base_type(ir->type);
1402
1403 this->result.reg_offset += offset;
1404 }
1405
1406 /**
1407 * We want to be careful in assignment setup to hit the actual storage
1408 * instead of potentially using a temporary like we might with the
1409 * ir_dereference handler.
1410 */
1411 static dst_reg
1412 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1413 {
1414 /* The LHS must be a dereference. If the LHS is a variable indexed array
1415 * access of a vector, it must be separated into a series conditional moves
1416 * before reaching this point (see ir_vec_index_to_cond_assign).
1417 */
1418 assert(ir->as_dereference());
1419 ir_dereference_array *deref_array = ir->as_dereference_array();
1420 if (deref_array) {
1421 assert(!deref_array->array->type->is_vector());
1422 }
1423
1424 /* Use the rvalue deref handler for the most part. We'll ignore
1425 * swizzles in it and write swizzles using writemask, though.
1426 */
1427 ir->accept(v);
1428 return dst_reg(v->result);
1429 }
1430
1431 void
1432 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1433 const struct glsl_type *type, uint32_t predicate)
1434 {
1435 if (type->base_type == GLSL_TYPE_STRUCT) {
1436 for (unsigned int i = 0; i < type->length; i++) {
1437 emit_block_move(dst, src, type->fields.structure[i].type, predicate);
1438 }
1439 return;
1440 }
1441
1442 if (type->is_array()) {
1443 for (unsigned int i = 0; i < type->length; i++) {
1444 emit_block_move(dst, src, type->fields.array, predicate);
1445 }
1446 return;
1447 }
1448
1449 if (type->is_matrix()) {
1450 const struct glsl_type *vec_type;
1451
1452 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1453 type->vector_elements, 1);
1454
1455 for (int i = 0; i < type->matrix_columns; i++) {
1456 emit_block_move(dst, src, vec_type, predicate);
1457 }
1458 return;
1459 }
1460
1461 assert(type->is_scalar() || type->is_vector());
1462
1463 dst->type = brw_type_for_base_type(type);
1464 src->type = dst->type;
1465
1466 dst->writemask = (1 << type->vector_elements) - 1;
1467
1468 /* Do we need to worry about swizzling a swizzle? */
1469 assert(src->swizzle = BRW_SWIZZLE_NOOP);
1470 src->swizzle = swizzle_for_size(type->vector_elements);
1471
1472 vec4_instruction *inst = emit(MOV(*dst, *src));
1473 inst->predicate = predicate;
1474
1475 dst->reg_offset++;
1476 src->reg_offset++;
1477 }
1478
1479
1480 /* If the RHS processing resulted in an instruction generating a
1481 * temporary value, and it would be easy to rewrite the instruction to
1482 * generate its result right into the LHS instead, do so. This ends
1483 * up reliably removing instructions where it can be tricky to do so
1484 * later without real UD chain information.
1485 */
1486 bool
1487 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
1488 dst_reg dst,
1489 src_reg src,
1490 vec4_instruction *pre_rhs_inst,
1491 vec4_instruction *last_rhs_inst)
1492 {
1493 /* This could be supported, but it would take more smarts. */
1494 if (ir->condition)
1495 return false;
1496
1497 if (pre_rhs_inst == last_rhs_inst)
1498 return false; /* No instructions generated to work with. */
1499
1500 /* Make sure the last instruction generated our source reg. */
1501 if (src.file != GRF ||
1502 src.file != last_rhs_inst->dst.file ||
1503 src.reg != last_rhs_inst->dst.reg ||
1504 src.reg_offset != last_rhs_inst->dst.reg_offset ||
1505 src.reladdr ||
1506 src.abs ||
1507 src.negate ||
1508 last_rhs_inst->predicate != BRW_PREDICATE_NONE)
1509 return false;
1510
1511 /* Check that that last instruction fully initialized the channels
1512 * we want to use, in the order we want to use them. We could
1513 * potentially reswizzle the operands of many instructions so that
1514 * we could handle out of order channels, but don't yet.
1515 */
1516 for (int i = 0; i < 4; i++) {
1517 if (dst.writemask & (1 << i)) {
1518 if (!(last_rhs_inst->dst.writemask & (1 << i)))
1519 return false;
1520
1521 if (BRW_GET_SWZ(src.swizzle, i) != i)
1522 return false;
1523 }
1524 }
1525
1526 /* Success! Rewrite the instruction. */
1527 last_rhs_inst->dst.file = dst.file;
1528 last_rhs_inst->dst.reg = dst.reg;
1529 last_rhs_inst->dst.reg_offset = dst.reg_offset;
1530 last_rhs_inst->dst.reladdr = dst.reladdr;
1531 last_rhs_inst->dst.writemask &= dst.writemask;
1532
1533 return true;
1534 }
1535
1536 void
1537 vec4_visitor::visit(ir_assignment *ir)
1538 {
1539 dst_reg dst = get_assignment_lhs(ir->lhs, this);
1540 uint32_t predicate = BRW_PREDICATE_NONE;
1541
1542 if (!ir->lhs->type->is_scalar() &&
1543 !ir->lhs->type->is_vector()) {
1544 ir->rhs->accept(this);
1545 src_reg src = this->result;
1546
1547 if (ir->condition) {
1548 emit_bool_to_cond_code(ir->condition, &predicate);
1549 }
1550
1551 emit_block_move(&dst, &src, ir->rhs->type, predicate);
1552 return;
1553 }
1554
1555 /* Now we're down to just a scalar/vector with writemasks. */
1556 int i;
1557
1558 vec4_instruction *pre_rhs_inst, *last_rhs_inst;
1559 pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1560
1561 ir->rhs->accept(this);
1562
1563 last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1564
1565 src_reg src = this->result;
1566
1567 int swizzles[4];
1568 int first_enabled_chan = 0;
1569 int src_chan = 0;
1570
1571 assert(ir->lhs->type->is_vector() ||
1572 ir->lhs->type->is_scalar());
1573 dst.writemask = ir->write_mask;
1574
1575 for (int i = 0; i < 4; i++) {
1576 if (dst.writemask & (1 << i)) {
1577 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1578 break;
1579 }
1580 }
1581
1582 /* Swizzle a small RHS vector into the channels being written.
1583 *
1584 * glsl ir treats write_mask as dictating how many channels are
1585 * present on the RHS while in our instructions we need to make
1586 * those channels appear in the slots of the vec4 they're written to.
1587 */
1588 for (int i = 0; i < 4; i++) {
1589 if (dst.writemask & (1 << i))
1590 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1591 else
1592 swizzles[i] = first_enabled_chan;
1593 }
1594 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1595 swizzles[2], swizzles[3]);
1596
1597 if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
1598 return;
1599 }
1600
1601 if (ir->condition) {
1602 emit_bool_to_cond_code(ir->condition, &predicate);
1603 }
1604
1605 for (i = 0; i < type_size(ir->lhs->type); i++) {
1606 vec4_instruction *inst = emit(MOV(dst, src));
1607 inst->predicate = predicate;
1608
1609 dst.reg_offset++;
1610 src.reg_offset++;
1611 }
1612 }
1613
1614 void
1615 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
1616 {
1617 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1618 foreach_list(node, &ir->components) {
1619 ir_constant *field_value = (ir_constant *)node;
1620
1621 emit_constant_values(dst, field_value);
1622 }
1623 return;
1624 }
1625
1626 if (ir->type->is_array()) {
1627 for (unsigned int i = 0; i < ir->type->length; i++) {
1628 emit_constant_values(dst, ir->array_elements[i]);
1629 }
1630 return;
1631 }
1632
1633 if (ir->type->is_matrix()) {
1634 for (int i = 0; i < ir->type->matrix_columns; i++) {
1635 for (int j = 0; j < ir->type->vector_elements; j++) {
1636 dst->writemask = 1 << j;
1637 dst->type = BRW_REGISTER_TYPE_F;
1638
1639 emit(MOV(*dst,
1640 src_reg(ir->value.f[i * ir->type->vector_elements + j])));
1641 }
1642 dst->reg_offset++;
1643 }
1644 return;
1645 }
1646
1647 for (int i = 0; i < ir->type->vector_elements; i++) {
1648 dst->writemask = 1 << i;
1649 dst->type = brw_type_for_base_type(ir->type);
1650
1651 switch (ir->type->base_type) {
1652 case GLSL_TYPE_FLOAT:
1653 emit(MOV(*dst, src_reg(ir->value.f[i])));
1654 break;
1655 case GLSL_TYPE_INT:
1656 emit(MOV(*dst, src_reg(ir->value.i[i])));
1657 break;
1658 case GLSL_TYPE_UINT:
1659 emit(MOV(*dst, src_reg(ir->value.u[i])));
1660 break;
1661 case GLSL_TYPE_BOOL:
1662 emit(MOV(*dst, src_reg(ir->value.b[i])));
1663 break;
1664 default:
1665 assert(!"Non-float/uint/int/bool constant");
1666 break;
1667 }
1668 }
1669 dst->reg_offset++;
1670 }
1671
1672 void
1673 vec4_visitor::visit(ir_constant *ir)
1674 {
1675 dst_reg dst = dst_reg(this, ir->type);
1676 this->result = src_reg(dst);
1677
1678 emit_constant_values(&dst, ir);
1679 }
1680
1681 void
1682 vec4_visitor::visit(ir_call *ir)
1683 {
1684 assert(!"not reached");
1685 }
1686
1687 void
1688 vec4_visitor::visit(ir_texture *ir)
1689 {
1690 /* FINISHME: Implement vertex texturing.
1691 *
1692 * With 0 vertex samplers available, the linker will reject
1693 * programs that do vertex texturing, but after our visitor has
1694 * run.
1695 */
1696 this->result = src_reg(this, glsl_type::vec4_type);
1697 }
1698
1699 void
1700 vec4_visitor::visit(ir_return *ir)
1701 {
1702 assert(!"not reached");
1703 }
1704
1705 void
1706 vec4_visitor::visit(ir_discard *ir)
1707 {
1708 assert(!"not reached");
1709 }
1710
1711 void
1712 vec4_visitor::visit(ir_if *ir)
1713 {
1714 /* Don't point the annotation at the if statement, because then it plus
1715 * the then and else blocks get printed.
1716 */
1717 this->base_ir = ir->condition;
1718
1719 if (intel->gen == 6) {
1720 emit_if_gen6(ir);
1721 } else {
1722 uint32_t predicate;
1723 emit_bool_to_cond_code(ir->condition, &predicate);
1724 emit(IF(predicate));
1725 }
1726
1727 visit_instructions(&ir->then_instructions);
1728
1729 if (!ir->else_instructions.is_empty()) {
1730 this->base_ir = ir->condition;
1731 emit(BRW_OPCODE_ELSE);
1732
1733 visit_instructions(&ir->else_instructions);
1734 }
1735
1736 this->base_ir = ir->condition;
1737 emit(BRW_OPCODE_ENDIF);
1738 }
1739
1740 void
1741 vec4_visitor::emit_ndc_computation()
1742 {
1743 /* Get the position */
1744 src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
1745
1746 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
1747 dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
1748 output_reg[BRW_VERT_RESULT_NDC] = ndc;
1749
1750 current_annotation = "NDC";
1751 dst_reg ndc_w = ndc;
1752 ndc_w.writemask = WRITEMASK_W;
1753 src_reg pos_w = pos;
1754 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
1755 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
1756
1757 dst_reg ndc_xyz = ndc;
1758 ndc_xyz.writemask = WRITEMASK_XYZ;
1759
1760 emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
1761 }
1762
1763 void
1764 vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
1765 {
1766 if (intel->gen < 6 &&
1767 ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
1768 c->key.nr_userclip || brw->has_negative_rhw_bug)) {
1769 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
1770 GLuint i;
1771
1772 emit(MOV(header1, 0u));
1773
1774 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1775 src_reg psiz = src_reg(output_reg[VERT_RESULT_PSIZ]);
1776
1777 current_annotation = "Point size";
1778 header1.writemask = WRITEMASK_W;
1779 emit(MUL(header1, psiz, src_reg((float)(1 << 11))));
1780 emit(AND(header1, src_reg(header1), 0x7ff << 8));
1781 }
1782
1783 current_annotation = "Clipping flags";
1784 for (i = 0; i < c->key.nr_userclip; i++) {
1785 vec4_instruction *inst;
1786
1787 inst = emit(DP4(dst_null_f(), src_reg(output_reg[VERT_RESULT_HPOS]),
1788 src_reg(this->userplane[i])));
1789 inst->conditional_mod = BRW_CONDITIONAL_L;
1790
1791 emit(OR(header1, src_reg(header1), 1u << i));
1792 inst->predicate = BRW_PREDICATE_NORMAL;
1793 }
1794
1795 /* i965 clipping workaround:
1796 * 1) Test for -ve rhw
1797 * 2) If set,
1798 * set ndc = (0,0,0,0)
1799 * set ucp[6] = 1
1800 *
1801 * Later, clipping will detect ucp[6] and ensure the primitive is
1802 * clipped against all fixed planes.
1803 */
1804 if (brw->has_negative_rhw_bug) {
1805 #if 0
1806 /* FINISHME */
1807 brw_CMP(p,
1808 vec8(brw_null_reg()),
1809 BRW_CONDITIONAL_L,
1810 brw_swizzle1(output_reg[BRW_VERT_RESULT_NDC], 3),
1811 brw_imm_f(0));
1812
1813 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
1814 brw_MOV(p, output_reg[BRW_VERT_RESULT_NDC], brw_imm_f(0));
1815 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1816 #endif
1817 }
1818
1819 header1.writemask = WRITEMASK_XYZW;
1820 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
1821 } else if (intel->gen < 6) {
1822 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
1823 } else {
1824 emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
1825 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1826 emit(MOV(brw_writemask(reg, WRITEMASK_W),
1827 src_reg(output_reg[VERT_RESULT_PSIZ])));
1828 }
1829 }
1830 }
1831
1832 void
1833 vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset)
1834 {
1835 if (intel->gen < 6) {
1836 /* Clip distance slots are set aside in gen5, but they are not used. It
1837 * is not clear whether we actually need to set aside space for them,
1838 * but the performance cost is negligible.
1839 */
1840 return;
1841 }
1842
1843 for (int i = 0; i + offset < c->key.nr_userclip && i < 4; ++i) {
1844 emit(DP4(dst_reg(brw_writemask(reg, 1 << i)),
1845 src_reg(output_reg[VERT_RESULT_HPOS]),
1846 src_reg(this->userplane[i + offset])));
1847 }
1848 }
1849
1850 void
1851 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int vert_result)
1852 {
1853 assert (vert_result < VERT_RESULT_MAX);
1854 current_annotation = output_reg_annotation[vert_result];
1855 /* Copy the register, saturating if necessary */
1856 vec4_instruction *inst = emit(MOV(reg,
1857 src_reg(output_reg[vert_result])));
1858 if ((vert_result == VERT_RESULT_COL0 ||
1859 vert_result == VERT_RESULT_COL1 ||
1860 vert_result == VERT_RESULT_BFC0 ||
1861 vert_result == VERT_RESULT_BFC1) &&
1862 c->key.clamp_vertex_color) {
1863 inst->saturate = true;
1864 }
1865 }
1866
1867 void
1868 vec4_visitor::emit_urb_slot(int mrf, int vert_result)
1869 {
1870 struct brw_reg hw_reg = brw_message_reg(mrf);
1871 dst_reg reg = dst_reg(MRF, mrf);
1872 reg.type = BRW_REGISTER_TYPE_F;
1873
1874 switch (vert_result) {
1875 case VERT_RESULT_PSIZ:
1876 /* PSIZ is always in slot 0, and is coupled with other flags. */
1877 current_annotation = "indices, point width, clip flags";
1878 emit_psiz_and_flags(hw_reg);
1879 break;
1880 case BRW_VERT_RESULT_NDC:
1881 current_annotation = "NDC";
1882 emit(MOV(reg, src_reg(output_reg[BRW_VERT_RESULT_NDC])));
1883 break;
1884 case BRW_VERT_RESULT_HPOS_DUPLICATE:
1885 case VERT_RESULT_HPOS:
1886 current_annotation = "gl_Position";
1887 emit(MOV(reg, src_reg(output_reg[VERT_RESULT_HPOS])));
1888 break;
1889 case VERT_RESULT_CLIP_DIST0:
1890 case VERT_RESULT_CLIP_DIST1:
1891 if (this->c->key.uses_clip_distance) {
1892 emit_generic_urb_slot(reg, vert_result);
1893 } else {
1894 current_annotation = "user clip distances";
1895 emit_clip_distances(hw_reg, (vert_result - VERT_RESULT_CLIP_DIST0) * 4);
1896 }
1897 break;
1898 case BRW_VERT_RESULT_PAD:
1899 /* No need to write to this slot */
1900 break;
1901 default:
1902 emit_generic_urb_slot(reg, vert_result);
1903 break;
1904 }
1905 }
1906
1907 static int
1908 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
1909 {
1910 struct intel_context *intel = &brw->intel;
1911
1912 if (intel->gen >= 6) {
1913 /* URB data written (does not include the message header reg) must
1914 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
1915 * section 5.4.3.2.2: URB_INTERLEAVED.
1916 *
1917 * URB entries are allocated on a multiple of 1024 bits, so an
1918 * extra 128 bits written here to make the end align to 256 is
1919 * no problem.
1920 */
1921 if ((mlen % 2) != 1)
1922 mlen++;
1923 }
1924
1925 return mlen;
1926 }
1927
1928 /**
1929 * Generates the VUE payload plus the 1 or 2 URB write instructions to
1930 * complete the VS thread.
1931 *
1932 * The VUE layout is documented in Volume 2a.
1933 */
1934 void
1935 vec4_visitor::emit_urb_writes()
1936 {
1937 /* MRF 0 is reserved for the debugger, so start with message header
1938 * in MRF 1.
1939 */
1940 int base_mrf = 1;
1941 int mrf = base_mrf;
1942 /* In the process of generating our URB write message contents, we
1943 * may need to unspill a register or load from an array. Those
1944 * reads would use MRFs 14-15.
1945 */
1946 int max_usable_mrf = 13;
1947
1948 /* The following assertion verifies that max_usable_mrf causes an
1949 * even-numbered amount of URB write data, which will meet gen6's
1950 * requirements for length alignment.
1951 */
1952 assert ((max_usable_mrf - base_mrf) % 2 == 0);
1953
1954 /* FINISHME: edgeflag */
1955
1956 brw_compute_vue_map(&c->vue_map, intel, c->key.nr_userclip,
1957 c->prog_data.outputs_written);
1958
1959 /* First mrf is the g0-based message header containing URB handles and such,
1960 * which is implied in VS_OPCODE_URB_WRITE.
1961 */
1962 mrf++;
1963
1964 if (intel->gen < 6) {
1965 emit_ndc_computation();
1966 }
1967
1968 /* Set up the VUE data for the first URB write */
1969 int slot;
1970 for (slot = 0; slot < c->vue_map.num_slots; ++slot) {
1971 emit_urb_slot(mrf++, c->vue_map.slot_to_vert_result[slot]);
1972
1973 /* If this was max_usable_mrf, we can't fit anything more into this URB
1974 * WRITE.
1975 */
1976 if (mrf > max_usable_mrf) {
1977 slot++;
1978 break;
1979 }
1980 }
1981
1982 current_annotation = "URB write";
1983 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
1984 inst->base_mrf = base_mrf;
1985 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1986 inst->eot = (slot >= c->vue_map.num_slots);
1987
1988 /* Optional second URB write */
1989 if (!inst->eot) {
1990 mrf = base_mrf + 1;
1991
1992 for (; slot < c->vue_map.num_slots; ++slot) {
1993 assert(mrf < max_usable_mrf);
1994
1995 emit_urb_slot(mrf++, c->vue_map.slot_to_vert_result[slot]);
1996 }
1997
1998 current_annotation = "URB write";
1999 inst = emit(VS_OPCODE_URB_WRITE);
2000 inst->base_mrf = base_mrf;
2001 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2002 inst->eot = true;
2003 /* URB destination offset. In the previous write, we got MRFs
2004 * 2-13 minus the one header MRF, so 12 regs. URB offset is in
2005 * URB row increments, and each of our MRFs is half of one of
2006 * those, since we're doing interleaved writes.
2007 */
2008 inst->offset = (max_usable_mrf - base_mrf) / 2;
2009 }
2010
2011 if (intel->gen == 6)
2012 c->prog_data.urb_entry_size = ALIGN(c->vue_map.num_slots, 8) / 8;
2013 else
2014 c->prog_data.urb_entry_size = ALIGN(c->vue_map.num_slots, 4) / 4;
2015 }
2016
2017 src_reg
2018 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
2019 src_reg *reladdr, int reg_offset)
2020 {
2021 /* Because we store the values to scratch interleaved like our
2022 * vertex data, we need to scale the vec4 index by 2.
2023 */
2024 int message_header_scale = 2;
2025
2026 /* Pre-gen6, the message header uses byte offsets instead of vec4
2027 * (16-byte) offset units.
2028 */
2029 if (intel->gen < 6)
2030 message_header_scale *= 16;
2031
2032 if (reladdr) {
2033 src_reg index = src_reg(this, glsl_type::int_type);
2034
2035 emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2036 emit_before(inst, MUL(dst_reg(index),
2037 index, src_reg(message_header_scale)));
2038
2039 return index;
2040 } else {
2041 return src_reg(reg_offset * message_header_scale);
2042 }
2043 }
2044
2045 src_reg
2046 vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
2047 src_reg *reladdr, int reg_offset)
2048 {
2049 if (reladdr) {
2050 src_reg index = src_reg(this, glsl_type::int_type);
2051
2052 emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2053
2054 /* Pre-gen6, the message header uses byte offsets instead of vec4
2055 * (16-byte) offset units.
2056 */
2057 if (intel->gen < 6) {
2058 emit_before(inst, MUL(dst_reg(index), index, src_reg(16)));
2059 }
2060
2061 return index;
2062 } else {
2063 int message_header_scale = intel->gen < 6 ? 16 : 1;
2064 return src_reg(reg_offset * message_header_scale);
2065 }
2066 }
2067
2068 /**
2069 * Emits an instruction before @inst to load the value named by @orig_src
2070 * from scratch space at @base_offset to @temp.
2071 */
2072 void
2073 vec4_visitor::emit_scratch_read(vec4_instruction *inst,
2074 dst_reg temp, src_reg orig_src,
2075 int base_offset)
2076 {
2077 int reg_offset = base_offset + orig_src.reg_offset;
2078 src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
2079
2080 emit_before(inst, SCRATCH_READ(temp, index));
2081 }
2082
2083 /**
2084 * Emits an instruction after @inst to store the value to be written
2085 * to @orig_dst to scratch space at @base_offset, from @temp.
2086 */
2087 void
2088 vec4_visitor::emit_scratch_write(vec4_instruction *inst,
2089 src_reg temp, dst_reg orig_dst,
2090 int base_offset)
2091 {
2092 int reg_offset = base_offset + orig_dst.reg_offset;
2093 src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset);
2094
2095 dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
2096 orig_dst.writemask));
2097 vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
2098 write->predicate = inst->predicate;
2099 write->ir = inst->ir;
2100 write->annotation = inst->annotation;
2101 inst->insert_after(write);
2102 }
2103
2104 /**
2105 * We can't generally support array access in GRF space, because a
2106 * single instruction's destination can only span 2 contiguous
2107 * registers. So, we send all GRF arrays that get variable index
2108 * access to scratch space.
2109 */
2110 void
2111 vec4_visitor::move_grf_array_access_to_scratch()
2112 {
2113 int scratch_loc[this->virtual_grf_count];
2114
2115 for (int i = 0; i < this->virtual_grf_count; i++) {
2116 scratch_loc[i] = -1;
2117 }
2118
2119 /* First, calculate the set of virtual GRFs that need to be punted
2120 * to scratch due to having any array access on them, and where in
2121 * scratch.
2122 */
2123 foreach_list(node, &this->instructions) {
2124 vec4_instruction *inst = (vec4_instruction *)node;
2125
2126 if (inst->dst.file == GRF && inst->dst.reladdr &&
2127 scratch_loc[inst->dst.reg] == -1) {
2128 scratch_loc[inst->dst.reg] = c->last_scratch;
2129 c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4;
2130 }
2131
2132 for (int i = 0 ; i < 3; i++) {
2133 src_reg *src = &inst->src[i];
2134
2135 if (src->file == GRF && src->reladdr &&
2136 scratch_loc[src->reg] == -1) {
2137 scratch_loc[src->reg] = c->last_scratch;
2138 c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4;
2139 }
2140 }
2141 }
2142
2143 /* Now, for anything that will be accessed through scratch, rewrite
2144 * it to load/store. Note that this is a _safe list walk, because
2145 * we may generate a new scratch_write instruction after the one
2146 * we're processing.
2147 */
2148 foreach_list_safe(node, &this->instructions) {
2149 vec4_instruction *inst = (vec4_instruction *)node;
2150
2151 /* Set up the annotation tracking for new generated instructions. */
2152 base_ir = inst->ir;
2153 current_annotation = inst->annotation;
2154
2155 if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
2156 src_reg temp = src_reg(this, glsl_type::vec4_type);
2157
2158 emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]);
2159
2160 inst->dst.file = temp.file;
2161 inst->dst.reg = temp.reg;
2162 inst->dst.reg_offset = temp.reg_offset;
2163 inst->dst.reladdr = NULL;
2164 }
2165
2166 for (int i = 0 ; i < 3; i++) {
2167 if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
2168 continue;
2169
2170 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2171
2172 emit_scratch_read(inst, temp, inst->src[i],
2173 scratch_loc[inst->src[i].reg]);
2174
2175 inst->src[i].file = temp.file;
2176 inst->src[i].reg = temp.reg;
2177 inst->src[i].reg_offset = temp.reg_offset;
2178 inst->src[i].reladdr = NULL;
2179 }
2180 }
2181 }
2182
2183 /**
2184 * Emits an instruction before @inst to load the value named by @orig_src
2185 * from the pull constant buffer (surface) at @base_offset to @temp.
2186 */
2187 void
2188 vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
2189 dst_reg temp, src_reg orig_src,
2190 int base_offset)
2191 {
2192 int reg_offset = base_offset + orig_src.reg_offset;
2193 src_reg index = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
2194 vec4_instruction *load;
2195
2196 load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
2197 temp, index);
2198 load->base_mrf = 14;
2199 load->mlen = 1;
2200 emit_before(inst, load);
2201 }
2202
2203 /**
2204 * Implements array access of uniforms by inserting a
2205 * PULL_CONSTANT_LOAD instruction.
2206 *
2207 * Unlike temporary GRF array access (where we don't support it due to
2208 * the difficulty of doing relative addressing on instruction
2209 * destinations), we could potentially do array access of uniforms
2210 * that were loaded in GRF space as push constants. In real-world
2211 * usage we've seen, though, the arrays being used are always larger
2212 * than we could load as push constants, so just always move all
2213 * uniform array access out to a pull constant buffer.
2214 */
2215 void
2216 vec4_visitor::move_uniform_array_access_to_pull_constants()
2217 {
2218 int pull_constant_loc[this->uniforms];
2219
2220 for (int i = 0; i < this->uniforms; i++) {
2221 pull_constant_loc[i] = -1;
2222 }
2223
2224 /* Walk through and find array access of uniforms. Put a copy of that
2225 * uniform in the pull constant buffer.
2226 *
2227 * Note that we don't move constant-indexed accesses to arrays. No
2228 * testing has been done of the performance impact of this choice.
2229 */
2230 foreach_list_safe(node, &this->instructions) {
2231 vec4_instruction *inst = (vec4_instruction *)node;
2232
2233 for (int i = 0 ; i < 3; i++) {
2234 if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
2235 continue;
2236
2237 int uniform = inst->src[i].reg;
2238
2239 /* If this array isn't already present in the pull constant buffer,
2240 * add it.
2241 */
2242 if (pull_constant_loc[uniform] == -1) {
2243 const float **values = &prog_data->param[uniform * 4];
2244
2245 pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
2246
2247 for (int j = 0; j < uniform_size[uniform] * 4; j++) {
2248 prog_data->pull_param[prog_data->nr_pull_params++] = values[j];
2249 }
2250 }
2251
2252 /* Set up the annotation tracking for new generated instructions. */
2253 base_ir = inst->ir;
2254 current_annotation = inst->annotation;
2255
2256 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2257
2258 emit_pull_constant_load(inst, temp, inst->src[i],
2259 pull_constant_loc[uniform]);
2260
2261 inst->src[i].file = temp.file;
2262 inst->src[i].reg = temp.reg;
2263 inst->src[i].reg_offset = temp.reg_offset;
2264 inst->src[i].reladdr = NULL;
2265 }
2266 }
2267
2268 /* Now there are no accesses of the UNIFORM file with a reladdr, so
2269 * no need to track them as larger-than-vec4 objects. This will be
2270 * relied on in cutting out unused uniform vectors from push
2271 * constants.
2272 */
2273 split_uniform_registers();
2274 }
2275
2276 vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
2277 struct gl_shader_program *prog,
2278 struct brw_shader *shader)
2279 {
2280 this->c = c;
2281 this->p = &c->func;
2282 this->brw = p->brw;
2283 this->intel = &brw->intel;
2284 this->ctx = &intel->ctx;
2285 this->prog = prog;
2286 this->shader = shader;
2287
2288 this->mem_ctx = ralloc_context(NULL);
2289 this->failed = false;
2290
2291 this->base_ir = NULL;
2292 this->current_annotation = NULL;
2293
2294 this->c = c;
2295 this->vp = prog->VertexProgram;
2296 this->prog_data = &c->prog_data;
2297
2298 this->variable_ht = hash_table_ctor(0,
2299 hash_table_pointer_hash,
2300 hash_table_pointer_compare);
2301
2302 this->virtual_grf_def = NULL;
2303 this->virtual_grf_use = NULL;
2304 this->virtual_grf_sizes = NULL;
2305 this->virtual_grf_count = 0;
2306 this->virtual_grf_reg_map = NULL;
2307 this->virtual_grf_reg_count = 0;
2308 this->virtual_grf_array_size = 0;
2309 this->live_intervals_valid = false;
2310
2311 this->uniforms = 0;
2312
2313 this->variable_ht = hash_table_ctor(0,
2314 hash_table_pointer_hash,
2315 hash_table_pointer_compare);
2316 }
2317
2318 vec4_visitor::~vec4_visitor()
2319 {
2320 ralloc_free(this->mem_ctx);
2321 hash_table_dtor(this->variable_ht);
2322 }
2323
2324
2325 void
2326 vec4_visitor::fail(const char *format, ...)
2327 {
2328 va_list va;
2329 char *msg;
2330
2331 if (failed)
2332 return;
2333
2334 failed = true;
2335
2336 va_start(va, format);
2337 msg = ralloc_vasprintf(mem_ctx, format, va);
2338 va_end(va);
2339 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
2340
2341 this->fail_msg = msg;
2342
2343 if (INTEL_DEBUG & DEBUG_VS) {
2344 fprintf(stderr, "%s", msg);
2345 }
2346 }
2347
2348 } /* namespace brw */