i965/vs: Fix gen6+ math operand quirks in one place
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_visitor.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25 extern "C" {
26 #include "main/context.h"
27 #include "main/macros.h"
28 #include "program/prog_parameter.h"
29 #include "program/sampler.h"
30 }
31
32 namespace brw {
33
34 vec4_instruction::vec4_instruction(vec4_visitor *v,
35 enum opcode opcode, dst_reg dst,
36 src_reg src0, src_reg src1, src_reg src2)
37 {
38 this->opcode = opcode;
39 this->dst = dst;
40 this->src[0] = src0;
41 this->src[1] = src1;
42 this->src[2] = src2;
43 this->ir = v->base_ir;
44 this->annotation = v->current_annotation;
45 }
46
47 vec4_instruction *
48 vec4_visitor::emit(vec4_instruction *inst)
49 {
50 this->instructions.push_tail(inst);
51
52 return inst;
53 }
54
55 vec4_instruction *
56 vec4_visitor::emit_before(vec4_instruction *inst, vec4_instruction *new_inst)
57 {
58 new_inst->ir = inst->ir;
59 new_inst->annotation = inst->annotation;
60
61 inst->insert_before(new_inst);
62
63 return inst;
64 }
65
66 vec4_instruction *
67 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
68 src_reg src0, src_reg src1, src_reg src2)
69 {
70 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
71 src0, src1, src2));
72 }
73
74
75 vec4_instruction *
76 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
77 {
78 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
79 }
80
81 vec4_instruction *
82 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
83 {
84 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
85 }
86
87 vec4_instruction *
88 vec4_visitor::emit(enum opcode opcode)
89 {
90 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
91 }
92
93 #define ALU1(op) \
94 vec4_instruction * \
95 vec4_visitor::op(dst_reg dst, src_reg src0) \
96 { \
97 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
98 src0); \
99 }
100
101 #define ALU2(op) \
102 vec4_instruction * \
103 vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \
104 { \
105 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
106 src0, src1); \
107 }
108
109 ALU1(NOT)
110 ALU1(MOV)
111 ALU1(FRC)
112 ALU1(RNDD)
113 ALU1(RNDE)
114 ALU1(RNDZ)
115 ALU2(ADD)
116 ALU2(MUL)
117 ALU2(MACH)
118 ALU2(AND)
119 ALU2(OR)
120 ALU2(XOR)
121 ALU2(DP3)
122 ALU2(DP4)
123 ALU2(DPH)
124 ALU2(SHL)
125 ALU2(SHR)
126 ALU2(ASR)
127
128 /** Gen4 predicated IF. */
129 vec4_instruction *
130 vec4_visitor::IF(uint32_t predicate)
131 {
132 vec4_instruction *inst;
133
134 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF);
135 inst->predicate = predicate;
136
137 return inst;
138 }
139
140 /** Gen6+ IF with embedded comparison. */
141 vec4_instruction *
142 vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition)
143 {
144 assert(intel->gen >= 6);
145
146 vec4_instruction *inst;
147
148 resolve_ud_negate(&src0);
149 resolve_ud_negate(&src1);
150
151 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF, dst_null_d(),
152 src0, src1);
153 inst->conditional_mod = condition;
154
155 return inst;
156 }
157
158 /**
159 * CMP: Sets the low bit of the destination channels with the result
160 * of the comparison, while the upper bits are undefined, and updates
161 * the flag register with the packed 16 bits of the result.
162 */
163 vec4_instruction *
164 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition)
165 {
166 vec4_instruction *inst;
167
168 /* original gen4 does type conversion to the destination type
169 * before before comparison, producing garbage results for floating
170 * point comparisons.
171 */
172 if (intel->gen == 4) {
173 dst.type = src0.type;
174 if (dst.file == HW_REG)
175 dst.fixed_hw_reg.type = dst.type;
176 }
177
178 resolve_ud_negate(&src0);
179 resolve_ud_negate(&src1);
180
181 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_CMP, dst, src0, src1);
182 inst->conditional_mod = condition;
183
184 return inst;
185 }
186
187 vec4_instruction *
188 vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
189 {
190 vec4_instruction *inst;
191
192 inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_READ,
193 dst, index);
194 inst->base_mrf = 14;
195 inst->mlen = 2;
196
197 return inst;
198 }
199
200 vec4_instruction *
201 vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index)
202 {
203 vec4_instruction *inst;
204
205 inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_WRITE,
206 dst, src, index);
207 inst->base_mrf = 13;
208 inst->mlen = 3;
209
210 return inst;
211 }
212
213 void
214 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
215 {
216 static enum opcode dot_opcodes[] = {
217 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
218 };
219
220 emit(dot_opcodes[elements - 2], dst, src0, src1);
221 }
222
223 src_reg
224 vec4_visitor::fix_math_operand(src_reg src)
225 {
226 /* The gen6 math instruction ignores the source modifiers --
227 * swizzle, abs, negate, and at least some parts of the register
228 * region description.
229 *
230 * Rather than trying to enumerate all these cases, *always* expand the
231 * operand to a temp GRF for gen6.
232 *
233 * For gen7, keep the operand as-is, except if immediate, which gen7 still
234 * can't use.
235 */
236
237 if (intel->gen == 7 && src.file != IMM)
238 return src;
239
240 dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
241 expanded.type = src.type;
242 emit(MOV(expanded, src));
243 return src_reg(expanded);
244 }
245
246 void
247 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
248 {
249 src = fix_math_operand(src);
250
251 if (dst.writemask != WRITEMASK_XYZW) {
252 /* The gen6 math instruction must be align1, so we can't do
253 * writemasks.
254 */
255 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
256
257 emit(opcode, temp_dst, src);
258
259 emit(MOV(dst, src_reg(temp_dst)));
260 } else {
261 emit(opcode, dst, src);
262 }
263 }
264
265 void
266 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
267 {
268 vec4_instruction *inst = emit(opcode, dst, src);
269 inst->base_mrf = 1;
270 inst->mlen = 1;
271 }
272
273 void
274 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
275 {
276 switch (opcode) {
277 case SHADER_OPCODE_RCP:
278 case SHADER_OPCODE_RSQ:
279 case SHADER_OPCODE_SQRT:
280 case SHADER_OPCODE_EXP2:
281 case SHADER_OPCODE_LOG2:
282 case SHADER_OPCODE_SIN:
283 case SHADER_OPCODE_COS:
284 break;
285 default:
286 assert(!"not reached: bad math opcode");
287 return;
288 }
289
290 if (intel->gen >= 6) {
291 return emit_math1_gen6(opcode, dst, src);
292 } else {
293 return emit_math1_gen4(opcode, dst, src);
294 }
295 }
296
297 void
298 vec4_visitor::emit_math2_gen6(enum opcode opcode,
299 dst_reg dst, src_reg src0, src_reg src1)
300 {
301 src0 = fix_math_operand(src0);
302 src1 = fix_math_operand(src1);
303
304 if (dst.writemask != WRITEMASK_XYZW) {
305 /* The gen6 math instruction must be align1, so we can't do
306 * writemasks.
307 */
308 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
309 temp_dst.type = dst.type;
310
311 emit(opcode, temp_dst, src0, src1);
312
313 emit(MOV(dst, src_reg(temp_dst)));
314 } else {
315 emit(opcode, dst, src0, src1);
316 }
317 }
318
319 void
320 vec4_visitor::emit_math2_gen4(enum opcode opcode,
321 dst_reg dst, src_reg src0, src_reg src1)
322 {
323 vec4_instruction *inst = emit(opcode, dst, src0, src1);
324 inst->base_mrf = 1;
325 inst->mlen = 2;
326 }
327
328 void
329 vec4_visitor::emit_math(enum opcode opcode,
330 dst_reg dst, src_reg src0, src_reg src1)
331 {
332 switch (opcode) {
333 case SHADER_OPCODE_POW:
334 case SHADER_OPCODE_INT_QUOTIENT:
335 case SHADER_OPCODE_INT_REMAINDER:
336 break;
337 default:
338 assert(!"not reached: unsupported binary math opcode");
339 return;
340 }
341
342 if (intel->gen >= 6) {
343 return emit_math2_gen6(opcode, dst, src0, src1);
344 } else {
345 return emit_math2_gen4(opcode, dst, src0, src1);
346 }
347 }
348
349 void
350 vec4_visitor::visit_instructions(const exec_list *list)
351 {
352 foreach_list(node, list) {
353 ir_instruction *ir = (ir_instruction *)node;
354
355 base_ir = ir;
356 ir->accept(this);
357 }
358 }
359
360
361 static int
362 type_size(const struct glsl_type *type)
363 {
364 unsigned int i;
365 int size;
366
367 switch (type->base_type) {
368 case GLSL_TYPE_UINT:
369 case GLSL_TYPE_INT:
370 case GLSL_TYPE_FLOAT:
371 case GLSL_TYPE_BOOL:
372 if (type->is_matrix()) {
373 return type->matrix_columns;
374 } else {
375 /* Regardless of size of vector, it gets a vec4. This is bad
376 * packing for things like floats, but otherwise arrays become a
377 * mess. Hopefully a later pass over the code can pack scalars
378 * down if appropriate.
379 */
380 return 1;
381 }
382 case GLSL_TYPE_ARRAY:
383 assert(type->length > 0);
384 return type_size(type->fields.array) * type->length;
385 case GLSL_TYPE_STRUCT:
386 size = 0;
387 for (i = 0; i < type->length; i++) {
388 size += type_size(type->fields.structure[i].type);
389 }
390 return size;
391 case GLSL_TYPE_SAMPLER:
392 /* Samplers take up one slot in UNIFORMS[], but they're baked in
393 * at link time.
394 */
395 return 1;
396 default:
397 assert(0);
398 return 0;
399 }
400 }
401
402 int
403 vec4_visitor::virtual_grf_alloc(int size)
404 {
405 if (virtual_grf_array_size <= virtual_grf_count) {
406 if (virtual_grf_array_size == 0)
407 virtual_grf_array_size = 16;
408 else
409 virtual_grf_array_size *= 2;
410 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
411 virtual_grf_array_size);
412 virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int,
413 virtual_grf_array_size);
414 }
415 virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count;
416 virtual_grf_reg_count += size;
417 virtual_grf_sizes[virtual_grf_count] = size;
418 return virtual_grf_count++;
419 }
420
421 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
422 {
423 init();
424
425 this->file = GRF;
426 this->reg = v->virtual_grf_alloc(type_size(type));
427
428 if (type->is_array() || type->is_record()) {
429 this->swizzle = BRW_SWIZZLE_NOOP;
430 } else {
431 this->swizzle = swizzle_for_size(type->vector_elements);
432 }
433
434 this->type = brw_type_for_base_type(type);
435 }
436
437 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
438 {
439 init();
440
441 this->file = GRF;
442 this->reg = v->virtual_grf_alloc(type_size(type));
443
444 if (type->is_array() || type->is_record()) {
445 this->writemask = WRITEMASK_XYZW;
446 } else {
447 this->writemask = (1 << type->vector_elements) - 1;
448 }
449
450 this->type = brw_type_for_base_type(type);
451 }
452
453 /* Our support for uniforms is piggy-backed on the struct
454 * gl_fragment_program, because that's where the values actually
455 * get stored, rather than in some global gl_shader_program uniform
456 * store.
457 */
458 int
459 vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
460 {
461 unsigned int offset = 0;
462 float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
463
464 if (type->is_matrix()) {
465 const glsl_type *column = type->column_type();
466
467 for (unsigned int i = 0; i < type->matrix_columns; i++) {
468 offset += setup_uniform_values(loc + offset, column);
469 }
470
471 return offset;
472 }
473
474 switch (type->base_type) {
475 case GLSL_TYPE_FLOAT:
476 case GLSL_TYPE_UINT:
477 case GLSL_TYPE_INT:
478 case GLSL_TYPE_BOOL:
479 for (unsigned int i = 0; i < type->vector_elements; i++) {
480 c->prog_data.param[this->uniforms * 4 + i] = &values[i];
481 }
482
483 /* Set up pad elements to get things aligned to a vec4 boundary. */
484 for (unsigned int i = type->vector_elements; i < 4; i++) {
485 static float zero = 0;
486
487 c->prog_data.param[this->uniforms * 4 + i] = &zero;
488 }
489
490 /* Track the size of this uniform vector, for future packing of
491 * uniforms.
492 */
493 this->uniform_vector_size[this->uniforms] = type->vector_elements;
494 this->uniforms++;
495
496 return 1;
497
498 case GLSL_TYPE_STRUCT:
499 for (unsigned int i = 0; i < type->length; i++) {
500 offset += setup_uniform_values(loc + offset,
501 type->fields.structure[i].type);
502 }
503 return offset;
504
505 case GLSL_TYPE_ARRAY:
506 for (unsigned int i = 0; i < type->length; i++) {
507 offset += setup_uniform_values(loc + offset, type->fields.array);
508 }
509 return offset;
510
511 case GLSL_TYPE_SAMPLER:
512 /* The sampler takes up a slot, but we don't use any values from it. */
513 return 1;
514
515 default:
516 assert(!"not reached");
517 return 0;
518 }
519 }
520
521 void
522 vec4_visitor::setup_uniform_clipplane_values()
523 {
524 gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
525
526 if (intel->gen < 6) {
527 /* Pre-Gen6, we compact clip planes. For example, if the user
528 * enables just clip planes 0, 1, and 3, we will enable clip planes
529 * 0, 1, and 2 in the hardware, and we'll move clip plane 3 to clip
530 * plane 2. This simplifies the implementation of the Gen6 clip
531 * thread.
532 */
533 int compacted_clipplane_index = 0;
534 for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
535 if (!(c->key.userclip_planes_enabled_gen_4_5 & (1 << i)))
536 continue;
537
538 this->uniform_vector_size[this->uniforms] = 4;
539 this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms);
540 this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F;
541 for (int j = 0; j < 4; ++j) {
542 c->prog_data.param[this->uniforms * 4 + j] = &clip_planes[i][j];
543 }
544 ++compacted_clipplane_index;
545 ++this->uniforms;
546 }
547 } else {
548 /* In Gen6 and later, we don't compact clip planes, because this
549 * simplifies the implementation of gl_ClipDistance.
550 */
551 for (int i = 0; i < c->key.nr_userclip_plane_consts; ++i) {
552 this->uniform_vector_size[this->uniforms] = 4;
553 this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
554 this->userplane[i].type = BRW_REGISTER_TYPE_F;
555 for (int j = 0; j < 4; ++j) {
556 c->prog_data.param[this->uniforms * 4 + j] = &clip_planes[i][j];
557 }
558 ++this->uniforms;
559 }
560 }
561 }
562
563 /* Our support for builtin uniforms is even scarier than non-builtin.
564 * It sits on top of the PROG_STATE_VAR parameters that are
565 * automatically updated from GL context state.
566 */
567 void
568 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
569 {
570 const ir_state_slot *const slots = ir->state_slots;
571 assert(ir->state_slots != NULL);
572
573 for (unsigned int i = 0; i < ir->num_state_slots; i++) {
574 /* This state reference has already been setup by ir_to_mesa,
575 * but we'll get the same index back here. We can reference
576 * ParameterValues directly, since unlike brw_fs.cpp, we never
577 * add new state references during compile.
578 */
579 int index = _mesa_add_state_reference(this->vp->Base.Parameters,
580 (gl_state_index *)slots[i].tokens);
581 float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
582
583 this->uniform_vector_size[this->uniforms] = 0;
584 /* Add each of the unique swizzled channels of the element.
585 * This will end up matching the size of the glsl_type of this field.
586 */
587 int last_swiz = -1;
588 for (unsigned int j = 0; j < 4; j++) {
589 int swiz = GET_SWZ(slots[i].swizzle, j);
590 last_swiz = swiz;
591
592 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
593 if (swiz <= last_swiz)
594 this->uniform_vector_size[this->uniforms]++;
595 }
596 this->uniforms++;
597 }
598 }
599
600 dst_reg *
601 vec4_visitor::variable_storage(ir_variable *var)
602 {
603 return (dst_reg *)hash_table_find(this->variable_ht, var);
604 }
605
606 void
607 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate)
608 {
609 ir_expression *expr = ir->as_expression();
610
611 *predicate = BRW_PREDICATE_NORMAL;
612
613 if (expr) {
614 src_reg op[2];
615 vec4_instruction *inst;
616
617 assert(expr->get_num_operands() <= 2);
618 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
619 expr->operands[i]->accept(this);
620 op[i] = this->result;
621
622 resolve_ud_negate(&op[i]);
623 }
624
625 switch (expr->operation) {
626 case ir_unop_logic_not:
627 inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
628 inst->conditional_mod = BRW_CONDITIONAL_Z;
629 break;
630
631 case ir_binop_logic_xor:
632 inst = emit(XOR(dst_null_d(), op[0], op[1]));
633 inst->conditional_mod = BRW_CONDITIONAL_NZ;
634 break;
635
636 case ir_binop_logic_or:
637 inst = emit(OR(dst_null_d(), op[0], op[1]));
638 inst->conditional_mod = BRW_CONDITIONAL_NZ;
639 break;
640
641 case ir_binop_logic_and:
642 inst = emit(AND(dst_null_d(), op[0], op[1]));
643 inst->conditional_mod = BRW_CONDITIONAL_NZ;
644 break;
645
646 case ir_unop_f2b:
647 if (intel->gen >= 6) {
648 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
649 } else {
650 inst = emit(MOV(dst_null_f(), op[0]));
651 inst->conditional_mod = BRW_CONDITIONAL_NZ;
652 }
653 break;
654
655 case ir_unop_i2b:
656 if (intel->gen >= 6) {
657 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
658 } else {
659 inst = emit(MOV(dst_null_d(), op[0]));
660 inst->conditional_mod = BRW_CONDITIONAL_NZ;
661 }
662 break;
663
664 case ir_binop_all_equal:
665 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
666 *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
667 break;
668
669 case ir_binop_any_nequal:
670 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
671 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
672 break;
673
674 case ir_unop_any:
675 inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
676 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
677 break;
678
679 case ir_binop_greater:
680 case ir_binop_gequal:
681 case ir_binop_less:
682 case ir_binop_lequal:
683 case ir_binop_equal:
684 case ir_binop_nequal:
685 emit(CMP(dst_null_d(), op[0], op[1],
686 brw_conditional_for_comparison(expr->operation)));
687 break;
688
689 default:
690 assert(!"not reached");
691 break;
692 }
693 return;
694 }
695
696 ir->accept(this);
697
698 resolve_ud_negate(&this->result);
699
700 if (intel->gen >= 6) {
701 vec4_instruction *inst = emit(AND(dst_null_d(),
702 this->result, src_reg(1)));
703 inst->conditional_mod = BRW_CONDITIONAL_NZ;
704 } else {
705 vec4_instruction *inst = emit(MOV(dst_null_d(), this->result));
706 inst->conditional_mod = BRW_CONDITIONAL_NZ;
707 }
708 }
709
710 /**
711 * Emit a gen6 IF statement with the comparison folded into the IF
712 * instruction.
713 */
714 void
715 vec4_visitor::emit_if_gen6(ir_if *ir)
716 {
717 ir_expression *expr = ir->condition->as_expression();
718
719 if (expr) {
720 src_reg op[2];
721 dst_reg temp;
722
723 assert(expr->get_num_operands() <= 2);
724 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
725 expr->operands[i]->accept(this);
726 op[i] = this->result;
727 }
728
729 switch (expr->operation) {
730 case ir_unop_logic_not:
731 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
732 return;
733
734 case ir_binop_logic_xor:
735 emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
736 return;
737
738 case ir_binop_logic_or:
739 temp = dst_reg(this, glsl_type::bool_type);
740 emit(OR(temp, op[0], op[1]));
741 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
742 return;
743
744 case ir_binop_logic_and:
745 temp = dst_reg(this, glsl_type::bool_type);
746 emit(AND(temp, op[0], op[1]));
747 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
748 return;
749
750 case ir_unop_f2b:
751 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
752 return;
753
754 case ir_unop_i2b:
755 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
756 return;
757
758 case ir_binop_greater:
759 case ir_binop_gequal:
760 case ir_binop_less:
761 case ir_binop_lequal:
762 case ir_binop_equal:
763 case ir_binop_nequal:
764 emit(IF(op[0], op[1],
765 brw_conditional_for_comparison(expr->operation)));
766 return;
767
768 case ir_binop_all_equal:
769 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
770 emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
771 return;
772
773 case ir_binop_any_nequal:
774 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
775 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
776 return;
777
778 case ir_unop_any:
779 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
780 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
781 return;
782
783 default:
784 assert(!"not reached");
785 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
786 return;
787 }
788 return;
789 }
790
791 ir->condition->accept(this);
792
793 emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
794 }
795
796 static dst_reg
797 with_writemask(dst_reg const & r, int mask)
798 {
799 dst_reg result = r;
800 result.writemask = mask;
801 return result;
802 }
803
804 void
805 vec4_visitor::emit_attribute_fixups()
806 {
807 dst_reg sign_recovery_shift;
808 dst_reg normalize_factor;
809 dst_reg es3_normalize_factor;
810
811 for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
812 if (prog_data->inputs_read & BITFIELD64_BIT(i)) {
813 uint8_t wa_flags = c->key.gl_attrib_wa_flags[i];
814 dst_reg reg(ATTR, i);
815 dst_reg reg_d = reg;
816 reg_d.type = BRW_REGISTER_TYPE_D;
817 dst_reg reg_ud = reg;
818 reg_ud.type = BRW_REGISTER_TYPE_UD;
819
820 /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED attributes
821 * come in as floating point conversions of the integer values.
822 */
823 if (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK) {
824 dst_reg dst = reg;
825 dst.type = brw_type_for_base_type(glsl_type::vec4_type);
826 dst.writemask = (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK)) - 1;
827 emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
828 }
829
830 /* Do sign recovery for 2101010 formats if required. */
831 if (wa_flags & BRW_ATTRIB_WA_SIGN) {
832 if (sign_recovery_shift.file == BAD_FILE) {
833 /* shift constant: <22,22,22,30> */
834 sign_recovery_shift = dst_reg(this, glsl_type::uvec4_type);
835 emit(MOV(with_writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u)));
836 emit(MOV(with_writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u)));
837 }
838
839 emit(SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift)));
840 emit(ASR(reg_d, src_reg(reg_d), src_reg(sign_recovery_shift)));
841 }
842
843 /* Apply BGRA swizzle if required. */
844 if (wa_flags & BRW_ATTRIB_WA_BGRA) {
845 src_reg temp = src_reg(reg);
846 temp.swizzle = BRW_SWIZZLE4(2,1,0,3);
847 emit(MOV(reg, temp));
848 }
849
850 if (wa_flags & BRW_ATTRIB_WA_NORMALIZE) {
851 /* ES 3.0 has different rules for converting signed normalized
852 * fixed-point numbers than desktop GL.
853 */
854 if (_mesa_is_gles3(ctx) && (wa_flags & BRW_ATTRIB_WA_SIGN)) {
855 /* According to equation 2.2 of the ES 3.0 specification,
856 * signed normalization conversion is done by:
857 *
858 * f = c / (2^(b-1)-1)
859 */
860 if (es3_normalize_factor.file == BAD_FILE) {
861 /* mul constant: 1 / (2^(b-1) - 1) */
862 es3_normalize_factor = dst_reg(this, glsl_type::vec4_type);
863 emit(MOV(with_writemask(es3_normalize_factor, WRITEMASK_XYZ),
864 src_reg(1.0f / ((1<<9) - 1))));
865 emit(MOV(with_writemask(es3_normalize_factor, WRITEMASK_W),
866 src_reg(1.0f / ((1<<1) - 1))));
867 }
868
869 dst_reg dst = reg;
870 dst.type = brw_type_for_base_type(glsl_type::vec4_type);
871 emit(MOV(dst, src_reg(reg_d)));
872 emit(MUL(dst, src_reg(dst), src_reg(es3_normalize_factor)));
873 emit_minmax(BRW_CONDITIONAL_G, dst, src_reg(dst), src_reg(-1.0f));
874 } else {
875 /* The following equations are from the OpenGL 3.2 specification:
876 *
877 * 2.1 unsigned normalization
878 * f = c/(2^n-1)
879 *
880 * 2.2 signed normalization
881 * f = (2c+1)/(2^n-1)
882 *
883 * Both of these share a common divisor, which is represented by
884 * "normalize_factor" in the code below.
885 */
886 if (normalize_factor.file == BAD_FILE) {
887 /* 1 / (2^b - 1) for b=<10,10,10,2> */
888 normalize_factor = dst_reg(this, glsl_type::vec4_type);
889 emit(MOV(with_writemask(normalize_factor, WRITEMASK_XYZ),
890 src_reg(1.0f / ((1<<10) - 1))));
891 emit(MOV(with_writemask(normalize_factor, WRITEMASK_W),
892 src_reg(1.0f / ((1<<2) - 1))));
893 }
894
895 dst_reg dst = reg;
896 dst.type = brw_type_for_base_type(glsl_type::vec4_type);
897 emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));
898
899 /* For signed normalization, we want the numerator to be 2c+1. */
900 if (wa_flags & BRW_ATTRIB_WA_SIGN) {
901 emit(MUL(dst, src_reg(dst), src_reg(2.0f)));
902 emit(ADD(dst, src_reg(dst), src_reg(1.0f)));
903 }
904
905 emit(MUL(dst, src_reg(dst), src_reg(normalize_factor)));
906 }
907 }
908
909 if (wa_flags & BRW_ATTRIB_WA_SCALE) {
910 dst_reg dst = reg;
911 dst.type = brw_type_for_base_type(glsl_type::vec4_type);
912 emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));
913 }
914 }
915 }
916 }
917
918 void
919 vec4_visitor::visit(ir_variable *ir)
920 {
921 dst_reg *reg = NULL;
922
923 if (variable_storage(ir))
924 return;
925
926 switch (ir->mode) {
927 case ir_var_in:
928 reg = new(mem_ctx) dst_reg(ATTR, ir->location);
929 break;
930
931 case ir_var_out:
932 reg = new(mem_ctx) dst_reg(this, ir->type);
933
934 for (int i = 0; i < type_size(ir->type); i++) {
935 output_reg[ir->location + i] = *reg;
936 output_reg[ir->location + i].reg_offset = i;
937 output_reg[ir->location + i].type =
938 brw_type_for_base_type(ir->type->get_scalar_type());
939 output_reg_annotation[ir->location + i] = ir->name;
940 }
941 break;
942
943 case ir_var_auto:
944 case ir_var_temporary:
945 reg = new(mem_ctx) dst_reg(this, ir->type);
946 break;
947
948 case ir_var_uniform:
949 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
950
951 /* Thanks to the lower_ubo_reference pass, we will see only
952 * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
953 * variables, so no need for them to be in variable_ht.
954 */
955 if (ir->uniform_block != -1)
956 return;
957
958 /* Track how big the whole uniform variable is, in case we need to put a
959 * copy of its data into pull constants for array access.
960 */
961 this->uniform_size[this->uniforms] = type_size(ir->type);
962
963 if (!strncmp(ir->name, "gl_", 3)) {
964 setup_builtin_uniform_values(ir);
965 } else {
966 setup_uniform_values(ir->location, ir->type);
967 }
968 break;
969
970 case ir_var_system_value:
971 /* VertexID is stored by the VF as the last vertex element, but
972 * we don't represent it with a flag in inputs_read, so we call
973 * it VERT_ATTRIB_MAX, which setup_attributes() picks up on.
974 */
975 reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX);
976 prog_data->uses_vertexid = true;
977
978 switch (ir->location) {
979 case SYSTEM_VALUE_VERTEX_ID:
980 reg->writemask = WRITEMASK_X;
981 break;
982 case SYSTEM_VALUE_INSTANCE_ID:
983 reg->writemask = WRITEMASK_Y;
984 break;
985 default:
986 assert(!"not reached");
987 break;
988 }
989 break;
990
991 default:
992 assert(!"not reached");
993 }
994
995 reg->type = brw_type_for_base_type(ir->type);
996 hash_table_insert(this->variable_ht, reg, ir);
997 }
998
999 void
1000 vec4_visitor::visit(ir_loop *ir)
1001 {
1002 dst_reg counter;
1003
1004 /* We don't want debugging output to print the whole body of the
1005 * loop as the annotation.
1006 */
1007 this->base_ir = NULL;
1008
1009 if (ir->counter != NULL) {
1010 this->base_ir = ir->counter;
1011 ir->counter->accept(this);
1012 counter = *(variable_storage(ir->counter));
1013
1014 if (ir->from != NULL) {
1015 this->base_ir = ir->from;
1016 ir->from->accept(this);
1017
1018 emit(MOV(counter, this->result));
1019 }
1020 }
1021
1022 emit(BRW_OPCODE_DO);
1023
1024 if (ir->to) {
1025 this->base_ir = ir->to;
1026 ir->to->accept(this);
1027
1028 emit(CMP(dst_null_d(), src_reg(counter), this->result,
1029 brw_conditional_for_comparison(ir->cmp)));
1030
1031 vec4_instruction *inst = emit(BRW_OPCODE_BREAK);
1032 inst->predicate = BRW_PREDICATE_NORMAL;
1033 }
1034
1035 visit_instructions(&ir->body_instructions);
1036
1037
1038 if (ir->increment) {
1039 this->base_ir = ir->increment;
1040 ir->increment->accept(this);
1041 emit(ADD(counter, src_reg(counter), this->result));
1042 }
1043
1044 emit(BRW_OPCODE_WHILE);
1045 }
1046
1047 void
1048 vec4_visitor::visit(ir_loop_jump *ir)
1049 {
1050 switch (ir->mode) {
1051 case ir_loop_jump::jump_break:
1052 emit(BRW_OPCODE_BREAK);
1053 break;
1054 case ir_loop_jump::jump_continue:
1055 emit(BRW_OPCODE_CONTINUE);
1056 break;
1057 }
1058 }
1059
1060
1061 void
1062 vec4_visitor::visit(ir_function_signature *ir)
1063 {
1064 assert(0);
1065 (void)ir;
1066 }
1067
1068 void
1069 vec4_visitor::visit(ir_function *ir)
1070 {
1071 /* Ignore function bodies other than main() -- we shouldn't see calls to
1072 * them since they should all be inlined.
1073 */
1074 if (strcmp(ir->name, "main") == 0) {
1075 const ir_function_signature *sig;
1076 exec_list empty;
1077
1078 sig = ir->matching_signature(&empty);
1079
1080 assert(sig);
1081
1082 visit_instructions(&sig->body);
1083 }
1084 }
1085
1086 bool
1087 vec4_visitor::try_emit_sat(ir_expression *ir)
1088 {
1089 ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
1090 if (!sat_src)
1091 return false;
1092
1093 sat_src->accept(this);
1094 src_reg src = this->result;
1095
1096 this->result = src_reg(this, ir->type);
1097 vec4_instruction *inst;
1098 inst = emit(MOV(dst_reg(this->result), src));
1099 inst->saturate = true;
1100
1101 return true;
1102 }
1103
1104 void
1105 vec4_visitor::emit_bool_comparison(unsigned int op,
1106 dst_reg dst, src_reg src0, src_reg src1)
1107 {
1108 /* original gen4 does destination conversion before comparison. */
1109 if (intel->gen < 5)
1110 dst.type = src0.type;
1111
1112 emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op)));
1113
1114 dst.type = BRW_REGISTER_TYPE_D;
1115 emit(AND(dst, src_reg(dst), src_reg(0x1)));
1116 }
1117
1118 void
1119 vec4_visitor::emit_minmax(uint32_t conditionalmod, dst_reg dst,
1120 src_reg src0, src_reg src1)
1121 {
1122 vec4_instruction *inst;
1123
1124 if (intel->gen >= 6) {
1125 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
1126 inst->conditional_mod = conditionalmod;
1127 } else {
1128 emit(CMP(dst, src0, src1, conditionalmod));
1129
1130 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
1131 inst->predicate = BRW_PREDICATE_NORMAL;
1132 }
1133 }
1134
1135 void
1136 vec4_visitor::visit(ir_expression *ir)
1137 {
1138 unsigned int operand;
1139 src_reg op[Elements(ir->operands)];
1140 src_reg result_src;
1141 dst_reg result_dst;
1142 vec4_instruction *inst;
1143
1144 if (try_emit_sat(ir))
1145 return;
1146
1147 for (operand = 0; operand < ir->get_num_operands(); operand++) {
1148 this->result.file = BAD_FILE;
1149 ir->operands[operand]->accept(this);
1150 if (this->result.file == BAD_FILE) {
1151 printf("Failed to get tree for expression operand:\n");
1152 ir->operands[operand]->print();
1153 exit(1);
1154 }
1155 op[operand] = this->result;
1156
1157 /* Matrix expression operands should have been broken down to vector
1158 * operations already.
1159 */
1160 assert(!ir->operands[operand]->type->is_matrix());
1161 }
1162
1163 int vector_elements = ir->operands[0]->type->vector_elements;
1164 if (ir->operands[1]) {
1165 vector_elements = MAX2(vector_elements,
1166 ir->operands[1]->type->vector_elements);
1167 }
1168
1169 this->result.file = BAD_FILE;
1170
1171 /* Storage for our result. Ideally for an assignment we'd be using
1172 * the actual storage for the result here, instead.
1173 */
1174 result_src = src_reg(this, ir->type);
1175 /* convenience for the emit functions below. */
1176 result_dst = dst_reg(result_src);
1177 /* If nothing special happens, this is the result. */
1178 this->result = result_src;
1179 /* Limit writes to the channels that will be used by result_src later.
1180 * This does limit this temp's use as a temporary for multi-instruction
1181 * sequences.
1182 */
1183 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1184
1185 switch (ir->operation) {
1186 case ir_unop_logic_not:
1187 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
1188 * ones complement of the whole register, not just bit 0.
1189 */
1190 emit(XOR(result_dst, op[0], src_reg(1)));
1191 break;
1192 case ir_unop_neg:
1193 op[0].negate = !op[0].negate;
1194 this->result = op[0];
1195 break;
1196 case ir_unop_abs:
1197 op[0].abs = true;
1198 op[0].negate = false;
1199 this->result = op[0];
1200 break;
1201
1202 case ir_unop_sign:
1203 emit(MOV(result_dst, src_reg(0.0f)));
1204
1205 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G));
1206 inst = emit(MOV(result_dst, src_reg(1.0f)));
1207 inst->predicate = BRW_PREDICATE_NORMAL;
1208
1209 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L));
1210 inst = emit(MOV(result_dst, src_reg(-1.0f)));
1211 inst->predicate = BRW_PREDICATE_NORMAL;
1212
1213 break;
1214
1215 case ir_unop_rcp:
1216 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
1217 break;
1218
1219 case ir_unop_exp2:
1220 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
1221 break;
1222 case ir_unop_log2:
1223 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
1224 break;
1225 case ir_unop_exp:
1226 case ir_unop_log:
1227 assert(!"not reached: should be handled by ir_explog_to_explog2");
1228 break;
1229 case ir_unop_sin:
1230 case ir_unop_sin_reduced:
1231 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
1232 break;
1233 case ir_unop_cos:
1234 case ir_unop_cos_reduced:
1235 emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
1236 break;
1237
1238 case ir_unop_dFdx:
1239 case ir_unop_dFdy:
1240 assert(!"derivatives not valid in vertex shader");
1241 break;
1242
1243 case ir_unop_noise:
1244 assert(!"not reached: should be handled by lower_noise");
1245 break;
1246
1247 case ir_binop_add:
1248 emit(ADD(result_dst, op[0], op[1]));
1249 break;
1250 case ir_binop_sub:
1251 assert(!"not reached: should be handled by ir_sub_to_add_neg");
1252 break;
1253
1254 case ir_binop_mul:
1255 if (ir->type->is_integer()) {
1256 /* For integer multiplication, the MUL uses the low 16 bits
1257 * of one of the operands (src0 on gen6, src1 on gen7). The
1258 * MACH accumulates in the contribution of the upper 16 bits
1259 * of that operand.
1260 *
1261 * FINISHME: Emit just the MUL if we know an operand is small
1262 * enough.
1263 */
1264 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
1265
1266 emit(MUL(acc, op[0], op[1]));
1267 emit(MACH(dst_null_d(), op[0], op[1]));
1268 emit(MOV(result_dst, src_reg(acc)));
1269 } else {
1270 emit(MUL(result_dst, op[0], op[1]));
1271 }
1272 break;
1273 case ir_binop_div:
1274 /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
1275 assert(ir->type->is_integer());
1276 emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
1277 break;
1278 case ir_binop_mod:
1279 /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
1280 assert(ir->type->is_integer());
1281 emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
1282 break;
1283
1284 case ir_binop_less:
1285 case ir_binop_greater:
1286 case ir_binop_lequal:
1287 case ir_binop_gequal:
1288 case ir_binop_equal:
1289 case ir_binop_nequal: {
1290 emit(CMP(result_dst, op[0], op[1],
1291 brw_conditional_for_comparison(ir->operation)));
1292 emit(AND(result_dst, result_src, src_reg(0x1)));
1293 break;
1294 }
1295
1296 case ir_binop_all_equal:
1297 /* "==" operator producing a scalar boolean. */
1298 if (ir->operands[0]->type->is_vector() ||
1299 ir->operands[1]->type->is_vector()) {
1300 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
1301 emit(MOV(result_dst, src_reg(0)));
1302 inst = emit(MOV(result_dst, src_reg(1)));
1303 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
1304 } else {
1305 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
1306 emit(AND(result_dst, result_src, src_reg(0x1)));
1307 }
1308 break;
1309 case ir_binop_any_nequal:
1310 /* "!=" operator producing a scalar boolean. */
1311 if (ir->operands[0]->type->is_vector() ||
1312 ir->operands[1]->type->is_vector()) {
1313 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
1314
1315 emit(MOV(result_dst, src_reg(0)));
1316 inst = emit(MOV(result_dst, src_reg(1)));
1317 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1318 } else {
1319 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
1320 emit(AND(result_dst, result_src, src_reg(0x1)));
1321 }
1322 break;
1323
1324 case ir_unop_any:
1325 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1326 emit(MOV(result_dst, src_reg(0)));
1327
1328 inst = emit(MOV(result_dst, src_reg(1)));
1329 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1330 break;
1331
1332 case ir_binop_logic_xor:
1333 emit(XOR(result_dst, op[0], op[1]));
1334 break;
1335
1336 case ir_binop_logic_or:
1337 emit(OR(result_dst, op[0], op[1]));
1338 break;
1339
1340 case ir_binop_logic_and:
1341 emit(AND(result_dst, op[0], op[1]));
1342 break;
1343
1344 case ir_binop_dot:
1345 assert(ir->operands[0]->type->is_vector());
1346 assert(ir->operands[0]->type == ir->operands[1]->type);
1347 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1348 break;
1349
1350 case ir_unop_sqrt:
1351 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1352 break;
1353 case ir_unop_rsq:
1354 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1355 break;
1356
1357 case ir_unop_bitcast_i2f:
1358 case ir_unop_bitcast_u2f:
1359 this->result = op[0];
1360 this->result.type = BRW_REGISTER_TYPE_F;
1361 break;
1362
1363 case ir_unop_bitcast_f2i:
1364 this->result = op[0];
1365 this->result.type = BRW_REGISTER_TYPE_D;
1366 break;
1367
1368 case ir_unop_bitcast_f2u:
1369 this->result = op[0];
1370 this->result.type = BRW_REGISTER_TYPE_UD;
1371 break;
1372
1373 case ir_unop_i2f:
1374 case ir_unop_i2u:
1375 case ir_unop_u2i:
1376 case ir_unop_u2f:
1377 case ir_unop_b2f:
1378 case ir_unop_b2i:
1379 case ir_unop_f2i:
1380 case ir_unop_f2u:
1381 emit(MOV(result_dst, op[0]));
1382 break;
1383 case ir_unop_f2b:
1384 case ir_unop_i2b: {
1385 emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1386 emit(AND(result_dst, result_src, src_reg(1)));
1387 break;
1388 }
1389
1390 case ir_unop_trunc:
1391 emit(RNDZ(result_dst, op[0]));
1392 break;
1393 case ir_unop_ceil:
1394 op[0].negate = !op[0].negate;
1395 inst = emit(RNDD(result_dst, op[0]));
1396 this->result.negate = true;
1397 break;
1398 case ir_unop_floor:
1399 inst = emit(RNDD(result_dst, op[0]));
1400 break;
1401 case ir_unop_fract:
1402 inst = emit(FRC(result_dst, op[0]));
1403 break;
1404 case ir_unop_round_even:
1405 emit(RNDE(result_dst, op[0]));
1406 break;
1407
1408 case ir_binop_min:
1409 emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]);
1410 break;
1411 case ir_binop_max:
1412 emit_minmax(BRW_CONDITIONAL_G, result_dst, op[0], op[1]);
1413 break;
1414
1415 case ir_binop_pow:
1416 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1417 break;
1418
1419 case ir_unop_bit_not:
1420 inst = emit(NOT(result_dst, op[0]));
1421 break;
1422 case ir_binop_bit_and:
1423 inst = emit(AND(result_dst, op[0], op[1]));
1424 break;
1425 case ir_binop_bit_xor:
1426 inst = emit(XOR(result_dst, op[0], op[1]));
1427 break;
1428 case ir_binop_bit_or:
1429 inst = emit(OR(result_dst, op[0], op[1]));
1430 break;
1431
1432 case ir_binop_lshift:
1433 inst = emit(SHL(result_dst, op[0], op[1]));
1434 break;
1435
1436 case ir_binop_rshift:
1437 if (ir->type->base_type == GLSL_TYPE_INT)
1438 inst = emit(ASR(result_dst, op[0], op[1]));
1439 else
1440 inst = emit(SHR(result_dst, op[0], op[1]));
1441 break;
1442
1443 case ir_binop_ubo_load: {
1444 ir_constant *uniform_block = ir->operands[0]->as_constant();
1445 ir_constant *const_offset_ir = ir->operands[1]->as_constant();
1446 unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
1447 src_reg offset = op[1];
1448
1449 /* Now, load the vector from that offset. */
1450 assert(ir->type->is_vector() || ir->type->is_scalar());
1451
1452 src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
1453 packed_consts.type = result.type;
1454 src_reg surf_index =
1455 src_reg(SURF_INDEX_VS_UBO(uniform_block->value.u[0]));
1456 if (const_offset_ir) {
1457 offset = src_reg(const_offset / 16);
1458 } else {
1459 emit(SHR(dst_reg(offset), offset, src_reg(4)));
1460 }
1461
1462 vec4_instruction *pull =
1463 emit(new(mem_ctx) vec4_instruction(this,
1464 VS_OPCODE_PULL_CONSTANT_LOAD,
1465 dst_reg(packed_consts),
1466 surf_index,
1467 offset));
1468 pull->base_mrf = 14;
1469 pull->mlen = 1;
1470
1471 packed_consts.swizzle = swizzle_for_size(ir->type->vector_elements);
1472 packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4,
1473 const_offset % 16 / 4,
1474 const_offset % 16 / 4,
1475 const_offset % 16 / 4);
1476
1477 /* UBO bools are any nonzero int. We store bools as either 0 or 1. */
1478 if (ir->type->base_type == GLSL_TYPE_BOOL) {
1479 emit(CMP(result_dst, packed_consts, src_reg(0u),
1480 BRW_CONDITIONAL_NZ));
1481 emit(AND(result_dst, result, src_reg(0x1)));
1482 } else {
1483 emit(MOV(result_dst, packed_consts));
1484 }
1485 break;
1486 }
1487
1488 case ir_quadop_vector:
1489 assert(!"not reached: should be handled by lower_quadop_vector");
1490 break;
1491 }
1492 }
1493
1494
1495 void
1496 vec4_visitor::visit(ir_swizzle *ir)
1497 {
1498 src_reg src;
1499 int i = 0;
1500 int swizzle[4];
1501
1502 /* Note that this is only swizzles in expressions, not those on the left
1503 * hand side of an assignment, which do write masking. See ir_assignment
1504 * for that.
1505 */
1506
1507 ir->val->accept(this);
1508 src = this->result;
1509 assert(src.file != BAD_FILE);
1510
1511 for (i = 0; i < ir->type->vector_elements; i++) {
1512 switch (i) {
1513 case 0:
1514 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1515 break;
1516 case 1:
1517 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1518 break;
1519 case 2:
1520 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1521 break;
1522 case 3:
1523 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1524 break;
1525 }
1526 }
1527 for (; i < 4; i++) {
1528 /* Replicate the last channel out. */
1529 swizzle[i] = swizzle[ir->type->vector_elements - 1];
1530 }
1531
1532 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1533
1534 this->result = src;
1535 }
1536
1537 void
1538 vec4_visitor::visit(ir_dereference_variable *ir)
1539 {
1540 const struct glsl_type *type = ir->type;
1541 dst_reg *reg = variable_storage(ir->var);
1542
1543 if (!reg) {
1544 fail("Failed to find variable storage for %s\n", ir->var->name);
1545 this->result = src_reg(brw_null_reg());
1546 return;
1547 }
1548
1549 this->result = src_reg(*reg);
1550
1551 /* System values get their swizzle from the dst_reg writemask */
1552 if (ir->var->mode == ir_var_system_value)
1553 return;
1554
1555 if (type->is_scalar() || type->is_vector() || type->is_matrix())
1556 this->result.swizzle = swizzle_for_size(type->vector_elements);
1557 }
1558
1559 void
1560 vec4_visitor::visit(ir_dereference_array *ir)
1561 {
1562 ir_constant *constant_index;
1563 src_reg src;
1564 int element_size = type_size(ir->type);
1565
1566 constant_index = ir->array_index->constant_expression_value();
1567
1568 ir->array->accept(this);
1569 src = this->result;
1570
1571 if (constant_index) {
1572 src.reg_offset += constant_index->value.i[0] * element_size;
1573 } else {
1574 /* Variable index array dereference. It eats the "vec4" of the
1575 * base of the array and an index that offsets the Mesa register
1576 * index.
1577 */
1578 ir->array_index->accept(this);
1579
1580 src_reg index_reg;
1581
1582 if (element_size == 1) {
1583 index_reg = this->result;
1584 } else {
1585 index_reg = src_reg(this, glsl_type::int_type);
1586
1587 emit(MUL(dst_reg(index_reg), this->result, src_reg(element_size)));
1588 }
1589
1590 if (src.reladdr) {
1591 src_reg temp = src_reg(this, glsl_type::int_type);
1592
1593 emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
1594
1595 index_reg = temp;
1596 }
1597
1598 src.reladdr = ralloc(mem_ctx, src_reg);
1599 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1600 }
1601
1602 /* If the type is smaller than a vec4, replicate the last channel out. */
1603 if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1604 src.swizzle = swizzle_for_size(ir->type->vector_elements);
1605 else
1606 src.swizzle = BRW_SWIZZLE_NOOP;
1607 src.type = brw_type_for_base_type(ir->type);
1608
1609 this->result = src;
1610 }
1611
1612 void
1613 vec4_visitor::visit(ir_dereference_record *ir)
1614 {
1615 unsigned int i;
1616 const glsl_type *struct_type = ir->record->type;
1617 int offset = 0;
1618
1619 ir->record->accept(this);
1620
1621 for (i = 0; i < struct_type->length; i++) {
1622 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1623 break;
1624 offset += type_size(struct_type->fields.structure[i].type);
1625 }
1626
1627 /* If the type is smaller than a vec4, replicate the last channel out. */
1628 if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1629 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1630 else
1631 this->result.swizzle = BRW_SWIZZLE_NOOP;
1632 this->result.type = brw_type_for_base_type(ir->type);
1633
1634 this->result.reg_offset += offset;
1635 }
1636
1637 /**
1638 * We want to be careful in assignment setup to hit the actual storage
1639 * instead of potentially using a temporary like we might with the
1640 * ir_dereference handler.
1641 */
1642 static dst_reg
1643 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1644 {
1645 /* The LHS must be a dereference. If the LHS is a variable indexed array
1646 * access of a vector, it must be separated into a series conditional moves
1647 * before reaching this point (see ir_vec_index_to_cond_assign).
1648 */
1649 assert(ir->as_dereference());
1650 ir_dereference_array *deref_array = ir->as_dereference_array();
1651 if (deref_array) {
1652 assert(!deref_array->array->type->is_vector());
1653 }
1654
1655 /* Use the rvalue deref handler for the most part. We'll ignore
1656 * swizzles in it and write swizzles using writemask, though.
1657 */
1658 ir->accept(v);
1659 return dst_reg(v->result);
1660 }
1661
1662 void
1663 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1664 const struct glsl_type *type, uint32_t predicate)
1665 {
1666 if (type->base_type == GLSL_TYPE_STRUCT) {
1667 for (unsigned int i = 0; i < type->length; i++) {
1668 emit_block_move(dst, src, type->fields.structure[i].type, predicate);
1669 }
1670 return;
1671 }
1672
1673 if (type->is_array()) {
1674 for (unsigned int i = 0; i < type->length; i++) {
1675 emit_block_move(dst, src, type->fields.array, predicate);
1676 }
1677 return;
1678 }
1679
1680 if (type->is_matrix()) {
1681 const struct glsl_type *vec_type;
1682
1683 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1684 type->vector_elements, 1);
1685
1686 for (int i = 0; i < type->matrix_columns; i++) {
1687 emit_block_move(dst, src, vec_type, predicate);
1688 }
1689 return;
1690 }
1691
1692 assert(type->is_scalar() || type->is_vector());
1693
1694 dst->type = brw_type_for_base_type(type);
1695 src->type = dst->type;
1696
1697 dst->writemask = (1 << type->vector_elements) - 1;
1698
1699 src->swizzle = swizzle_for_size(type->vector_elements);
1700
1701 vec4_instruction *inst = emit(MOV(*dst, *src));
1702 inst->predicate = predicate;
1703
1704 dst->reg_offset++;
1705 src->reg_offset++;
1706 }
1707
1708
1709 /* If the RHS processing resulted in an instruction generating a
1710 * temporary value, and it would be easy to rewrite the instruction to
1711 * generate its result right into the LHS instead, do so. This ends
1712 * up reliably removing instructions where it can be tricky to do so
1713 * later without real UD chain information.
1714 */
1715 bool
1716 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
1717 dst_reg dst,
1718 src_reg src,
1719 vec4_instruction *pre_rhs_inst,
1720 vec4_instruction *last_rhs_inst)
1721 {
1722 /* This could be supported, but it would take more smarts. */
1723 if (ir->condition)
1724 return false;
1725
1726 if (pre_rhs_inst == last_rhs_inst)
1727 return false; /* No instructions generated to work with. */
1728
1729 /* Make sure the last instruction generated our source reg. */
1730 if (src.file != GRF ||
1731 src.file != last_rhs_inst->dst.file ||
1732 src.reg != last_rhs_inst->dst.reg ||
1733 src.reg_offset != last_rhs_inst->dst.reg_offset ||
1734 src.reladdr ||
1735 src.abs ||
1736 src.negate ||
1737 last_rhs_inst->predicate != BRW_PREDICATE_NONE)
1738 return false;
1739
1740 /* Check that that last instruction fully initialized the channels
1741 * we want to use, in the order we want to use them. We could
1742 * potentially reswizzle the operands of many instructions so that
1743 * we could handle out of order channels, but don't yet.
1744 */
1745
1746 for (unsigned i = 0; i < 4; i++) {
1747 if (dst.writemask & (1 << i)) {
1748 if (!(last_rhs_inst->dst.writemask & (1 << i)))
1749 return false;
1750
1751 if (BRW_GET_SWZ(src.swizzle, i) != i)
1752 return false;
1753 }
1754 }
1755
1756 /* Success! Rewrite the instruction. */
1757 last_rhs_inst->dst.file = dst.file;
1758 last_rhs_inst->dst.reg = dst.reg;
1759 last_rhs_inst->dst.reg_offset = dst.reg_offset;
1760 last_rhs_inst->dst.reladdr = dst.reladdr;
1761 last_rhs_inst->dst.writemask &= dst.writemask;
1762
1763 return true;
1764 }
1765
1766 void
1767 vec4_visitor::visit(ir_assignment *ir)
1768 {
1769 dst_reg dst = get_assignment_lhs(ir->lhs, this);
1770 uint32_t predicate = BRW_PREDICATE_NONE;
1771
1772 if (!ir->lhs->type->is_scalar() &&
1773 !ir->lhs->type->is_vector()) {
1774 ir->rhs->accept(this);
1775 src_reg src = this->result;
1776
1777 if (ir->condition) {
1778 emit_bool_to_cond_code(ir->condition, &predicate);
1779 }
1780
1781 /* emit_block_move doesn't account for swizzles in the source register.
1782 * This should be ok, since the source register is a structure or an
1783 * array, and those can't be swizzled. But double-check to be sure.
1784 */
1785 assert(src.swizzle ==
1786 (ir->rhs->type->is_matrix()
1787 ? swizzle_for_size(ir->rhs->type->vector_elements)
1788 : BRW_SWIZZLE_NOOP));
1789
1790 emit_block_move(&dst, &src, ir->rhs->type, predicate);
1791 return;
1792 }
1793
1794 /* Now we're down to just a scalar/vector with writemasks. */
1795 int i;
1796
1797 vec4_instruction *pre_rhs_inst, *last_rhs_inst;
1798 pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1799
1800 ir->rhs->accept(this);
1801
1802 last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1803
1804 src_reg src = this->result;
1805
1806 int swizzles[4];
1807 int first_enabled_chan = 0;
1808 int src_chan = 0;
1809
1810 assert(ir->lhs->type->is_vector() ||
1811 ir->lhs->type->is_scalar());
1812 dst.writemask = ir->write_mask;
1813
1814 for (int i = 0; i < 4; i++) {
1815 if (dst.writemask & (1 << i)) {
1816 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1817 break;
1818 }
1819 }
1820
1821 /* Swizzle a small RHS vector into the channels being written.
1822 *
1823 * glsl ir treats write_mask as dictating how many channels are
1824 * present on the RHS while in our instructions we need to make
1825 * those channels appear in the slots of the vec4 they're written to.
1826 */
1827 for (int i = 0; i < 4; i++) {
1828 if (dst.writemask & (1 << i))
1829 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1830 else
1831 swizzles[i] = first_enabled_chan;
1832 }
1833 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1834 swizzles[2], swizzles[3]);
1835
1836 if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
1837 return;
1838 }
1839
1840 if (ir->condition) {
1841 emit_bool_to_cond_code(ir->condition, &predicate);
1842 }
1843
1844 for (i = 0; i < type_size(ir->lhs->type); i++) {
1845 vec4_instruction *inst = emit(MOV(dst, src));
1846 inst->predicate = predicate;
1847
1848 dst.reg_offset++;
1849 src.reg_offset++;
1850 }
1851 }
1852
1853 void
1854 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
1855 {
1856 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1857 foreach_list(node, &ir->components) {
1858 ir_constant *field_value = (ir_constant *)node;
1859
1860 emit_constant_values(dst, field_value);
1861 }
1862 return;
1863 }
1864
1865 if (ir->type->is_array()) {
1866 for (unsigned int i = 0; i < ir->type->length; i++) {
1867 emit_constant_values(dst, ir->array_elements[i]);
1868 }
1869 return;
1870 }
1871
1872 if (ir->type->is_matrix()) {
1873 for (int i = 0; i < ir->type->matrix_columns; i++) {
1874 float *vec = &ir->value.f[i * ir->type->vector_elements];
1875
1876 for (int j = 0; j < ir->type->vector_elements; j++) {
1877 dst->writemask = 1 << j;
1878 dst->type = BRW_REGISTER_TYPE_F;
1879
1880 emit(MOV(*dst, src_reg(vec[j])));
1881 }
1882 dst->reg_offset++;
1883 }
1884 return;
1885 }
1886
1887 int remaining_writemask = (1 << ir->type->vector_elements) - 1;
1888
1889 for (int i = 0; i < ir->type->vector_elements; i++) {
1890 if (!(remaining_writemask & (1 << i)))
1891 continue;
1892
1893 dst->writemask = 1 << i;
1894 dst->type = brw_type_for_base_type(ir->type);
1895
1896 /* Find other components that match the one we're about to
1897 * write. Emits fewer instructions for things like vec4(0.5,
1898 * 1.5, 1.5, 1.5).
1899 */
1900 for (int j = i + 1; j < ir->type->vector_elements; j++) {
1901 if (ir->type->base_type == GLSL_TYPE_BOOL) {
1902 if (ir->value.b[i] == ir->value.b[j])
1903 dst->writemask |= (1 << j);
1904 } else {
1905 /* u, i, and f storage all line up, so no need for a
1906 * switch case for comparing each type.
1907 */
1908 if (ir->value.u[i] == ir->value.u[j])
1909 dst->writemask |= (1 << j);
1910 }
1911 }
1912
1913 switch (ir->type->base_type) {
1914 case GLSL_TYPE_FLOAT:
1915 emit(MOV(*dst, src_reg(ir->value.f[i])));
1916 break;
1917 case GLSL_TYPE_INT:
1918 emit(MOV(*dst, src_reg(ir->value.i[i])));
1919 break;
1920 case GLSL_TYPE_UINT:
1921 emit(MOV(*dst, src_reg(ir->value.u[i])));
1922 break;
1923 case GLSL_TYPE_BOOL:
1924 emit(MOV(*dst, src_reg(ir->value.b[i])));
1925 break;
1926 default:
1927 assert(!"Non-float/uint/int/bool constant");
1928 break;
1929 }
1930
1931 remaining_writemask &= ~dst->writemask;
1932 }
1933 dst->reg_offset++;
1934 }
1935
1936 void
1937 vec4_visitor::visit(ir_constant *ir)
1938 {
1939 dst_reg dst = dst_reg(this, ir->type);
1940 this->result = src_reg(dst);
1941
1942 emit_constant_values(&dst, ir);
1943 }
1944
1945 void
1946 vec4_visitor::visit(ir_call *ir)
1947 {
1948 assert(!"not reached");
1949 }
1950
1951 void
1952 vec4_visitor::visit(ir_texture *ir)
1953 {
1954 int sampler = _mesa_get_sampler_uniform_value(ir->sampler, prog, &vp->Base);
1955
1956 /* Should be lowered by do_lower_texture_projection */
1957 assert(!ir->projector);
1958
1959 /* Generate code to compute all the subexpression trees. This has to be
1960 * done before loading any values into MRFs for the sampler message since
1961 * generating these values may involve SEND messages that need the MRFs.
1962 */
1963 src_reg coordinate;
1964 if (ir->coordinate) {
1965 ir->coordinate->accept(this);
1966 coordinate = this->result;
1967 }
1968
1969 src_reg shadow_comparitor;
1970 if (ir->shadow_comparitor) {
1971 ir->shadow_comparitor->accept(this);
1972 shadow_comparitor = this->result;
1973 }
1974
1975 src_reg lod, dPdx, dPdy;
1976 switch (ir->op) {
1977 case ir_txf:
1978 case ir_txl:
1979 case ir_txs:
1980 ir->lod_info.lod->accept(this);
1981 lod = this->result;
1982 break;
1983 case ir_txd:
1984 ir->lod_info.grad.dPdx->accept(this);
1985 dPdx = this->result;
1986
1987 ir->lod_info.grad.dPdy->accept(this);
1988 dPdy = this->result;
1989 break;
1990 case ir_tex:
1991 case ir_txb:
1992 break;
1993 }
1994
1995 vec4_instruction *inst = NULL;
1996 switch (ir->op) {
1997 case ir_tex:
1998 case ir_txl:
1999 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXL);
2000 break;
2001 case ir_txd:
2002 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXD);
2003 break;
2004 case ir_txf:
2005 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF);
2006 break;
2007 case ir_txs:
2008 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXS);
2009 break;
2010 case ir_txb:
2011 assert(!"TXB is not valid for vertex shaders.");
2012 }
2013
2014 /* Texel offsets go in the message header; Gen4 also requires headers. */
2015 inst->header_present = ir->offset || intel->gen < 5;
2016 inst->base_mrf = 2;
2017 inst->mlen = inst->header_present + 1; /* always at least one */
2018 inst->sampler = sampler;
2019 inst->dst = dst_reg(this, ir->type);
2020 inst->shadow_compare = ir->shadow_comparitor != NULL;
2021
2022 if (ir->offset != NULL && ir->op != ir_txf)
2023 inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
2024
2025 /* MRF for the first parameter */
2026 int param_base = inst->base_mrf + inst->header_present;
2027
2028 if (ir->op == ir_txs) {
2029 int writemask = intel->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
2030 emit(MOV(dst_reg(MRF, param_base, ir->lod_info.lod->type, writemask),
2031 lod));
2032 } else {
2033 int i, coord_mask = 0, zero_mask = 0;
2034 /* Load the coordinate */
2035 /* FINISHME: gl_clamp_mask and saturate */
2036 for (i = 0; i < ir->coordinate->type->vector_elements; i++)
2037 coord_mask |= (1 << i);
2038 for (; i < 4; i++)
2039 zero_mask |= (1 << i);
2040
2041 if (ir->offset && ir->op == ir_txf) {
2042 /* It appears that the ld instruction used for txf does its
2043 * address bounds check before adding in the offset. To work
2044 * around this, just add the integer offset to the integer
2045 * texel coordinate, and don't put the offset in the header.
2046 */
2047 ir_constant *offset = ir->offset->as_constant();
2048 assert(offset);
2049
2050 for (int j = 0; j < ir->coordinate->type->vector_elements; j++) {
2051 src_reg src = coordinate;
2052 src.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(src.swizzle, j),
2053 BRW_GET_SWZ(src.swizzle, j),
2054 BRW_GET_SWZ(src.swizzle, j),
2055 BRW_GET_SWZ(src.swizzle, j));
2056 emit(ADD(dst_reg(MRF, param_base, ir->coordinate->type, 1 << j),
2057 src, offset->value.i[j]));
2058 }
2059 } else {
2060 emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
2061 coordinate));
2062 }
2063 emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
2064 src_reg(0)));
2065 /* Load the shadow comparitor */
2066 if (ir->shadow_comparitor) {
2067 emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
2068 WRITEMASK_X),
2069 shadow_comparitor));
2070 inst->mlen++;
2071 }
2072
2073 /* Load the LOD info */
2074 if (ir->op == ir_txl) {
2075 int mrf, writemask;
2076 if (intel->gen >= 5) {
2077 mrf = param_base + 1;
2078 if (ir->shadow_comparitor) {
2079 writemask = WRITEMASK_Y;
2080 /* mlen already incremented */
2081 } else {
2082 writemask = WRITEMASK_X;
2083 inst->mlen++;
2084 }
2085 } else /* intel->gen == 4 */ {
2086 mrf = param_base;
2087 writemask = WRITEMASK_Z;
2088 }
2089 emit(MOV(dst_reg(MRF, mrf, ir->lod_info.lod->type, writemask), lod));
2090 } else if (ir->op == ir_txf) {
2091 emit(MOV(dst_reg(MRF, param_base, ir->lod_info.lod->type, WRITEMASK_W),
2092 lod));
2093 } else if (ir->op == ir_txd) {
2094 const glsl_type *type = ir->lod_info.grad.dPdx->type;
2095
2096 if (intel->gen >= 5) {
2097 dPdx.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
2098 dPdy.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
2099 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), dPdx));
2100 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), dPdy));
2101 inst->mlen++;
2102
2103 if (ir->type->vector_elements == 3) {
2104 dPdx.swizzle = BRW_SWIZZLE_ZZZZ;
2105 dPdy.swizzle = BRW_SWIZZLE_ZZZZ;
2106 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), dPdx));
2107 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), dPdy));
2108 inst->mlen++;
2109 }
2110 } else /* intel->gen == 4 */ {
2111 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), dPdx));
2112 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), dPdy));
2113 inst->mlen += 2;
2114 }
2115 }
2116 }
2117
2118 emit(inst);
2119
2120 swizzle_result(ir, src_reg(inst->dst), sampler);
2121 }
2122
2123 void
2124 vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, int sampler)
2125 {
2126 this->result = orig_val;
2127
2128 int s = c->key.tex.swizzles[sampler];
2129
2130 if (ir->op == ir_txs || ir->type == glsl_type::float_type
2131 || s == SWIZZLE_NOOP)
2132 return;
2133
2134 int zero_mask = 0, one_mask = 0, copy_mask = 0;
2135 int swizzle[4];
2136
2137 for (int i = 0; i < 4; i++) {
2138 switch (GET_SWZ(s, i)) {
2139 case SWIZZLE_ZERO:
2140 zero_mask |= (1 << i);
2141 break;
2142 case SWIZZLE_ONE:
2143 one_mask |= (1 << i);
2144 break;
2145 default:
2146 copy_mask |= (1 << i);
2147 swizzle[i] = GET_SWZ(s, i);
2148 break;
2149 }
2150 }
2151
2152 this->result = src_reg(this, ir->type);
2153 dst_reg swizzled_result(this->result);
2154
2155 if (copy_mask) {
2156 orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
2157 swizzled_result.writemask = copy_mask;
2158 emit(MOV(swizzled_result, orig_val));
2159 }
2160
2161 if (zero_mask) {
2162 swizzled_result.writemask = zero_mask;
2163 emit(MOV(swizzled_result, src_reg(0.0f)));
2164 }
2165
2166 if (one_mask) {
2167 swizzled_result.writemask = one_mask;
2168 emit(MOV(swizzled_result, src_reg(1.0f)));
2169 }
2170 }
2171
2172 void
2173 vec4_visitor::visit(ir_return *ir)
2174 {
2175 assert(!"not reached");
2176 }
2177
2178 void
2179 vec4_visitor::visit(ir_discard *ir)
2180 {
2181 assert(!"not reached");
2182 }
2183
2184 void
2185 vec4_visitor::visit(ir_if *ir)
2186 {
2187 /* Don't point the annotation at the if statement, because then it plus
2188 * the then and else blocks get printed.
2189 */
2190 this->base_ir = ir->condition;
2191
2192 if (intel->gen == 6) {
2193 emit_if_gen6(ir);
2194 } else {
2195 uint32_t predicate;
2196 emit_bool_to_cond_code(ir->condition, &predicate);
2197 emit(IF(predicate));
2198 }
2199
2200 visit_instructions(&ir->then_instructions);
2201
2202 if (!ir->else_instructions.is_empty()) {
2203 this->base_ir = ir->condition;
2204 emit(BRW_OPCODE_ELSE);
2205
2206 visit_instructions(&ir->else_instructions);
2207 }
2208
2209 this->base_ir = ir->condition;
2210 emit(BRW_OPCODE_ENDIF);
2211 }
2212
2213 void
2214 vec4_visitor::emit_ndc_computation()
2215 {
2216 /* Get the position */
2217 src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
2218
2219 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
2220 dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
2221 output_reg[BRW_VERT_RESULT_NDC] = ndc;
2222
2223 current_annotation = "NDC";
2224 dst_reg ndc_w = ndc;
2225 ndc_w.writemask = WRITEMASK_W;
2226 src_reg pos_w = pos;
2227 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
2228 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
2229
2230 dst_reg ndc_xyz = ndc;
2231 ndc_xyz.writemask = WRITEMASK_XYZ;
2232
2233 emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
2234 }
2235
2236 void
2237 vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
2238 {
2239 if (intel->gen < 6 &&
2240 ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
2241 c->key.userclip_active || brw->has_negative_rhw_bug)) {
2242 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
2243 dst_reg header1_w = header1;
2244 header1_w.writemask = WRITEMASK_W;
2245 GLuint i;
2246
2247 emit(MOV(header1, 0u));
2248
2249 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
2250 src_reg psiz = src_reg(output_reg[VERT_RESULT_PSIZ]);
2251
2252 current_annotation = "Point size";
2253 emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
2254 emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
2255 }
2256
2257 current_annotation = "Clipping flags";
2258 for (i = 0; i < c->key.nr_userclip_plane_consts; i++) {
2259 vec4_instruction *inst;
2260
2261 inst = emit(DP4(dst_null_f(), src_reg(output_reg[VERT_RESULT_HPOS]),
2262 src_reg(this->userplane[i])));
2263 inst->conditional_mod = BRW_CONDITIONAL_L;
2264
2265 inst = emit(OR(header1_w, src_reg(header1_w), 1u << i));
2266 inst->predicate = BRW_PREDICATE_NORMAL;
2267 }
2268
2269 /* i965 clipping workaround:
2270 * 1) Test for -ve rhw
2271 * 2) If set,
2272 * set ndc = (0,0,0,0)
2273 * set ucp[6] = 1
2274 *
2275 * Later, clipping will detect ucp[6] and ensure the primitive is
2276 * clipped against all fixed planes.
2277 */
2278 if (brw->has_negative_rhw_bug) {
2279 #if 0
2280 /* FINISHME */
2281 brw_CMP(p,
2282 vec8(brw_null_reg()),
2283 BRW_CONDITIONAL_L,
2284 brw_swizzle1(output_reg[BRW_VERT_RESULT_NDC], 3),
2285 brw_imm_f(0));
2286
2287 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
2288 brw_MOV(p, output_reg[BRW_VERT_RESULT_NDC], brw_imm_f(0));
2289 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2290 #endif
2291 }
2292
2293 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
2294 } else if (intel->gen < 6) {
2295 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
2296 } else {
2297 emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
2298 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
2299 emit(MOV(brw_writemask(reg, WRITEMASK_W),
2300 src_reg(output_reg[VERT_RESULT_PSIZ])));
2301 }
2302 }
2303 }
2304
2305 void
2306 vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset)
2307 {
2308 if (intel->gen < 6) {
2309 /* Clip distance slots are set aside in gen5, but they are not used. It
2310 * is not clear whether we actually need to set aside space for them,
2311 * but the performance cost is negligible.
2312 */
2313 return;
2314 }
2315
2316 /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
2317 *
2318 * "If a linked set of shaders forming the vertex stage contains no
2319 * static write to gl_ClipVertex or gl_ClipDistance, but the
2320 * application has requested clipping against user clip planes through
2321 * the API, then the coordinate written to gl_Position is used for
2322 * comparison against the user clip planes."
2323 *
2324 * This function is only called if the shader didn't write to
2325 * gl_ClipDistance. Accordingly, we use gl_ClipVertex to perform clipping
2326 * if the user wrote to it; otherwise we use gl_Position.
2327 */
2328 gl_vert_result clip_vertex = VERT_RESULT_CLIP_VERTEX;
2329 if (!(c->prog_data.outputs_written
2330 & BITFIELD64_BIT(VERT_RESULT_CLIP_VERTEX))) {
2331 clip_vertex = VERT_RESULT_HPOS;
2332 }
2333
2334 for (int i = 0; i + offset < c->key.nr_userclip_plane_consts && i < 4;
2335 ++i) {
2336 emit(DP4(dst_reg(brw_writemask(reg, 1 << i)),
2337 src_reg(output_reg[clip_vertex]),
2338 src_reg(this->userplane[i + offset])));
2339 }
2340 }
2341
2342 void
2343 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int vert_result)
2344 {
2345 assert (vert_result < VERT_RESULT_MAX);
2346 reg.type = output_reg[vert_result].type;
2347 current_annotation = output_reg_annotation[vert_result];
2348 /* Copy the register, saturating if necessary */
2349 vec4_instruction *inst = emit(MOV(reg,
2350 src_reg(output_reg[vert_result])));
2351 if ((vert_result == VERT_RESULT_COL0 ||
2352 vert_result == VERT_RESULT_COL1 ||
2353 vert_result == VERT_RESULT_BFC0 ||
2354 vert_result == VERT_RESULT_BFC1) &&
2355 c->key.clamp_vertex_color) {
2356 inst->saturate = true;
2357 }
2358 }
2359
2360 void
2361 vec4_visitor::emit_urb_slot(int mrf, int vert_result)
2362 {
2363 struct brw_reg hw_reg = brw_message_reg(mrf);
2364 dst_reg reg = dst_reg(MRF, mrf);
2365 reg.type = BRW_REGISTER_TYPE_F;
2366
2367 switch (vert_result) {
2368 case VERT_RESULT_PSIZ:
2369 /* PSIZ is always in slot 0, and is coupled with other flags. */
2370 current_annotation = "indices, point width, clip flags";
2371 emit_psiz_and_flags(hw_reg);
2372 break;
2373 case BRW_VERT_RESULT_NDC:
2374 current_annotation = "NDC";
2375 emit(MOV(reg, src_reg(output_reg[BRW_VERT_RESULT_NDC])));
2376 break;
2377 case BRW_VERT_RESULT_HPOS_DUPLICATE:
2378 case VERT_RESULT_HPOS:
2379 current_annotation = "gl_Position";
2380 emit(MOV(reg, src_reg(output_reg[VERT_RESULT_HPOS])));
2381 break;
2382 case VERT_RESULT_CLIP_DIST0:
2383 case VERT_RESULT_CLIP_DIST1:
2384 if (this->c->key.uses_clip_distance) {
2385 emit_generic_urb_slot(reg, vert_result);
2386 } else {
2387 current_annotation = "user clip distances";
2388 emit_clip_distances(hw_reg, (vert_result - VERT_RESULT_CLIP_DIST0) * 4);
2389 }
2390 break;
2391 case VERT_RESULT_EDGE:
2392 /* This is present when doing unfilled polygons. We're supposed to copy
2393 * the edge flag from the user-provided vertex array
2394 * (glEdgeFlagPointer), or otherwise we'll copy from the current value
2395 * of that attribute (starts as 1.0f). This is then used in clipping to
2396 * determine which edges should be drawn as wireframe.
2397 */
2398 current_annotation = "edge flag";
2399 emit(MOV(reg, src_reg(dst_reg(ATTR, VERT_ATTRIB_EDGEFLAG,
2400 glsl_type::float_type, WRITEMASK_XYZW))));
2401 break;
2402 case BRW_VERT_RESULT_PAD:
2403 /* No need to write to this slot */
2404 break;
2405 default:
2406 emit_generic_urb_slot(reg, vert_result);
2407 break;
2408 }
2409 }
2410
2411 static int
2412 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
2413 {
2414 struct intel_context *intel = &brw->intel;
2415
2416 if (intel->gen >= 6) {
2417 /* URB data written (does not include the message header reg) must
2418 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
2419 * section 5.4.3.2.2: URB_INTERLEAVED.
2420 *
2421 * URB entries are allocated on a multiple of 1024 bits, so an
2422 * extra 128 bits written here to make the end align to 256 is
2423 * no problem.
2424 */
2425 if ((mlen % 2) != 1)
2426 mlen++;
2427 }
2428
2429 return mlen;
2430 }
2431
2432 /**
2433 * Generates the VUE payload plus the 1 or 2 URB write instructions to
2434 * complete the VS thread.
2435 *
2436 * The VUE layout is documented in Volume 2a.
2437 */
2438 void
2439 vec4_visitor::emit_urb_writes()
2440 {
2441 /* MRF 0 is reserved for the debugger, so start with message header
2442 * in MRF 1.
2443 */
2444 int base_mrf = 1;
2445 int mrf = base_mrf;
2446 /* In the process of generating our URB write message contents, we
2447 * may need to unspill a register or load from an array. Those
2448 * reads would use MRFs 14-15.
2449 */
2450 int max_usable_mrf = 13;
2451
2452 /* The following assertion verifies that max_usable_mrf causes an
2453 * even-numbered amount of URB write data, which will meet gen6's
2454 * requirements for length alignment.
2455 */
2456 assert ((max_usable_mrf - base_mrf) % 2 == 0);
2457
2458 /* First mrf is the g0-based message header containing URB handles and such,
2459 * which is implied in VS_OPCODE_URB_WRITE.
2460 */
2461 mrf++;
2462
2463 if (intel->gen < 6) {
2464 emit_ndc_computation();
2465 }
2466
2467 /* Set up the VUE data for the first URB write */
2468 int slot;
2469 for (slot = 0; slot < c->prog_data.vue_map.num_slots; ++slot) {
2470 emit_urb_slot(mrf++, c->prog_data.vue_map.slot_to_vert_result[slot]);
2471
2472 /* If this was max_usable_mrf, we can't fit anything more into this URB
2473 * WRITE.
2474 */
2475 if (mrf > max_usable_mrf) {
2476 slot++;
2477 break;
2478 }
2479 }
2480
2481 current_annotation = "URB write";
2482 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
2483 inst->base_mrf = base_mrf;
2484 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2485 inst->eot = (slot >= c->prog_data.vue_map.num_slots);
2486
2487 /* Optional second URB write */
2488 if (!inst->eot) {
2489 mrf = base_mrf + 1;
2490
2491 for (; slot < c->prog_data.vue_map.num_slots; ++slot) {
2492 assert(mrf < max_usable_mrf);
2493
2494 emit_urb_slot(mrf++, c->prog_data.vue_map.slot_to_vert_result[slot]);
2495 }
2496
2497 current_annotation = "URB write";
2498 inst = emit(VS_OPCODE_URB_WRITE);
2499 inst->base_mrf = base_mrf;
2500 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2501 inst->eot = true;
2502 /* URB destination offset. In the previous write, we got MRFs
2503 * 2-13 minus the one header MRF, so 12 regs. URB offset is in
2504 * URB row increments, and each of our MRFs is half of one of
2505 * those, since we're doing interleaved writes.
2506 */
2507 inst->offset = (max_usable_mrf - base_mrf) / 2;
2508 }
2509 }
2510
2511 src_reg
2512 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
2513 src_reg *reladdr, int reg_offset)
2514 {
2515 /* Because we store the values to scratch interleaved like our
2516 * vertex data, we need to scale the vec4 index by 2.
2517 */
2518 int message_header_scale = 2;
2519
2520 /* Pre-gen6, the message header uses byte offsets instead of vec4
2521 * (16-byte) offset units.
2522 */
2523 if (intel->gen < 6)
2524 message_header_scale *= 16;
2525
2526 if (reladdr) {
2527 src_reg index = src_reg(this, glsl_type::int_type);
2528
2529 emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2530 emit_before(inst, MUL(dst_reg(index),
2531 index, src_reg(message_header_scale)));
2532
2533 return index;
2534 } else {
2535 return src_reg(reg_offset * message_header_scale);
2536 }
2537 }
2538
2539 src_reg
2540 vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
2541 src_reg *reladdr, int reg_offset)
2542 {
2543 if (reladdr) {
2544 src_reg index = src_reg(this, glsl_type::int_type);
2545
2546 emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2547
2548 /* Pre-gen6, the message header uses byte offsets instead of vec4
2549 * (16-byte) offset units.
2550 */
2551 if (intel->gen < 6) {
2552 emit_before(inst, MUL(dst_reg(index), index, src_reg(16)));
2553 }
2554
2555 return index;
2556 } else {
2557 int message_header_scale = intel->gen < 6 ? 16 : 1;
2558 return src_reg(reg_offset * message_header_scale);
2559 }
2560 }
2561
2562 /**
2563 * Emits an instruction before @inst to load the value named by @orig_src
2564 * from scratch space at @base_offset to @temp.
2565 *
2566 * @base_offset is measured in 32-byte units (the size of a register).
2567 */
2568 void
2569 vec4_visitor::emit_scratch_read(vec4_instruction *inst,
2570 dst_reg temp, src_reg orig_src,
2571 int base_offset)
2572 {
2573 int reg_offset = base_offset + orig_src.reg_offset;
2574 src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
2575
2576 emit_before(inst, SCRATCH_READ(temp, index));
2577 }
2578
2579 /**
2580 * Emits an instruction after @inst to store the value to be written
2581 * to @orig_dst to scratch space at @base_offset, from @temp.
2582 *
2583 * @base_offset is measured in 32-byte units (the size of a register).
2584 */
2585 void
2586 vec4_visitor::emit_scratch_write(vec4_instruction *inst, int base_offset)
2587 {
2588 int reg_offset = base_offset + inst->dst.reg_offset;
2589 src_reg index = get_scratch_offset(inst, inst->dst.reladdr, reg_offset);
2590
2591 /* Create a temporary register to store *inst's result in.
2592 *
2593 * We have to be careful in MOVing from our temporary result register in
2594 * the scratch write. If we swizzle from channels of the temporary that
2595 * weren't initialized, it will confuse live interval analysis, which will
2596 * make spilling fail to make progress.
2597 */
2598 src_reg temp = src_reg(this, glsl_type::vec4_type);
2599 temp.type = inst->dst.type;
2600 int first_writemask_chan = ffs(inst->dst.writemask) - 1;
2601 int swizzles[4];
2602 for (int i = 0; i < 4; i++)
2603 if (inst->dst.writemask & (1 << i))
2604 swizzles[i] = i;
2605 else
2606 swizzles[i] = first_writemask_chan;
2607 temp.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
2608 swizzles[2], swizzles[3]);
2609
2610 dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
2611 inst->dst.writemask));
2612 vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
2613 write->predicate = inst->predicate;
2614 write->ir = inst->ir;
2615 write->annotation = inst->annotation;
2616 inst->insert_after(write);
2617
2618 inst->dst.file = temp.file;
2619 inst->dst.reg = temp.reg;
2620 inst->dst.reg_offset = temp.reg_offset;
2621 inst->dst.reladdr = NULL;
2622 }
2623
2624 /**
2625 * We can't generally support array access in GRF space, because a
2626 * single instruction's destination can only span 2 contiguous
2627 * registers. So, we send all GRF arrays that get variable index
2628 * access to scratch space.
2629 */
2630 void
2631 vec4_visitor::move_grf_array_access_to_scratch()
2632 {
2633 int scratch_loc[this->virtual_grf_count];
2634
2635 for (int i = 0; i < this->virtual_grf_count; i++) {
2636 scratch_loc[i] = -1;
2637 }
2638
2639 /* First, calculate the set of virtual GRFs that need to be punted
2640 * to scratch due to having any array access on them, and where in
2641 * scratch.
2642 */
2643 foreach_list(node, &this->instructions) {
2644 vec4_instruction *inst = (vec4_instruction *)node;
2645
2646 if (inst->dst.file == GRF && inst->dst.reladdr &&
2647 scratch_loc[inst->dst.reg] == -1) {
2648 scratch_loc[inst->dst.reg] = c->last_scratch;
2649 c->last_scratch += this->virtual_grf_sizes[inst->dst.reg];
2650 }
2651
2652 for (int i = 0 ; i < 3; i++) {
2653 src_reg *src = &inst->src[i];
2654
2655 if (src->file == GRF && src->reladdr &&
2656 scratch_loc[src->reg] == -1) {
2657 scratch_loc[src->reg] = c->last_scratch;
2658 c->last_scratch += this->virtual_grf_sizes[src->reg];
2659 }
2660 }
2661 }
2662
2663 /* Now, for anything that will be accessed through scratch, rewrite
2664 * it to load/store. Note that this is a _safe list walk, because
2665 * we may generate a new scratch_write instruction after the one
2666 * we're processing.
2667 */
2668 foreach_list_safe(node, &this->instructions) {
2669 vec4_instruction *inst = (vec4_instruction *)node;
2670
2671 /* Set up the annotation tracking for new generated instructions. */
2672 base_ir = inst->ir;
2673 current_annotation = inst->annotation;
2674
2675 if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
2676 emit_scratch_write(inst, scratch_loc[inst->dst.reg]);
2677 }
2678
2679 for (int i = 0 ; i < 3; i++) {
2680 if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
2681 continue;
2682
2683 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2684
2685 emit_scratch_read(inst, temp, inst->src[i],
2686 scratch_loc[inst->src[i].reg]);
2687
2688 inst->src[i].file = temp.file;
2689 inst->src[i].reg = temp.reg;
2690 inst->src[i].reg_offset = temp.reg_offset;
2691 inst->src[i].reladdr = NULL;
2692 }
2693 }
2694 }
2695
2696 /**
2697 * Emits an instruction before @inst to load the value named by @orig_src
2698 * from the pull constant buffer (surface) at @base_offset to @temp.
2699 */
2700 void
2701 vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
2702 dst_reg temp, src_reg orig_src,
2703 int base_offset)
2704 {
2705 int reg_offset = base_offset + orig_src.reg_offset;
2706 src_reg index = src_reg((unsigned)SURF_INDEX_VERT_CONST_BUFFER);
2707 src_reg offset = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
2708 vec4_instruction *load;
2709
2710 load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
2711 temp, index, offset);
2712 load->base_mrf = 14;
2713 load->mlen = 1;
2714 emit_before(inst, load);
2715 }
2716
2717 /**
2718 * Implements array access of uniforms by inserting a
2719 * PULL_CONSTANT_LOAD instruction.
2720 *
2721 * Unlike temporary GRF array access (where we don't support it due to
2722 * the difficulty of doing relative addressing on instruction
2723 * destinations), we could potentially do array access of uniforms
2724 * that were loaded in GRF space as push constants. In real-world
2725 * usage we've seen, though, the arrays being used are always larger
2726 * than we could load as push constants, so just always move all
2727 * uniform array access out to a pull constant buffer.
2728 */
2729 void
2730 vec4_visitor::move_uniform_array_access_to_pull_constants()
2731 {
2732 int pull_constant_loc[this->uniforms];
2733
2734 for (int i = 0; i < this->uniforms; i++) {
2735 pull_constant_loc[i] = -1;
2736 }
2737
2738 /* Walk through and find array access of uniforms. Put a copy of that
2739 * uniform in the pull constant buffer.
2740 *
2741 * Note that we don't move constant-indexed accesses to arrays. No
2742 * testing has been done of the performance impact of this choice.
2743 */
2744 foreach_list_safe(node, &this->instructions) {
2745 vec4_instruction *inst = (vec4_instruction *)node;
2746
2747 for (int i = 0 ; i < 3; i++) {
2748 if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
2749 continue;
2750
2751 int uniform = inst->src[i].reg;
2752
2753 /* If this array isn't already present in the pull constant buffer,
2754 * add it.
2755 */
2756 if (pull_constant_loc[uniform] == -1) {
2757 const float **values = &prog_data->param[uniform * 4];
2758
2759 pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
2760
2761 for (int j = 0; j < uniform_size[uniform] * 4; j++) {
2762 prog_data->pull_param[prog_data->nr_pull_params++] = values[j];
2763 }
2764 }
2765
2766 /* Set up the annotation tracking for new generated instructions. */
2767 base_ir = inst->ir;
2768 current_annotation = inst->annotation;
2769
2770 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2771
2772 emit_pull_constant_load(inst, temp, inst->src[i],
2773 pull_constant_loc[uniform]);
2774
2775 inst->src[i].file = temp.file;
2776 inst->src[i].reg = temp.reg;
2777 inst->src[i].reg_offset = temp.reg_offset;
2778 inst->src[i].reladdr = NULL;
2779 }
2780 }
2781
2782 /* Now there are no accesses of the UNIFORM file with a reladdr, so
2783 * no need to track them as larger-than-vec4 objects. This will be
2784 * relied on in cutting out unused uniform vectors from push
2785 * constants.
2786 */
2787 split_uniform_registers();
2788 }
2789
2790 void
2791 vec4_visitor::resolve_ud_negate(src_reg *reg)
2792 {
2793 if (reg->type != BRW_REGISTER_TYPE_UD ||
2794 !reg->negate)
2795 return;
2796
2797 src_reg temp = src_reg(this, glsl_type::uvec4_type);
2798 emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
2799 *reg = temp;
2800 }
2801
2802 vec4_visitor::vec4_visitor(struct brw_context *brw,
2803 struct brw_vs_compile *c,
2804 struct gl_shader_program *prog,
2805 struct brw_shader *shader,
2806 void *mem_ctx)
2807 {
2808 this->c = c;
2809 this->brw = brw;
2810 this->intel = &brw->intel;
2811 this->ctx = &intel->ctx;
2812 this->prog = prog;
2813 this->shader = shader;
2814
2815 this->mem_ctx = mem_ctx;
2816 this->failed = false;
2817
2818 this->base_ir = NULL;
2819 this->current_annotation = NULL;
2820 memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
2821
2822 this->c = c;
2823 this->vp = &c->vp->program;
2824 this->prog_data = &c->prog_data;
2825
2826 this->variable_ht = hash_table_ctor(0,
2827 hash_table_pointer_hash,
2828 hash_table_pointer_compare);
2829
2830 this->virtual_grf_def = NULL;
2831 this->virtual_grf_use = NULL;
2832 this->virtual_grf_sizes = NULL;
2833 this->virtual_grf_count = 0;
2834 this->virtual_grf_reg_map = NULL;
2835 this->virtual_grf_reg_count = 0;
2836 this->virtual_grf_array_size = 0;
2837 this->live_intervals_valid = false;
2838
2839 this->max_grf = intel->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
2840
2841 this->uniforms = 0;
2842 }
2843
2844 vec4_visitor::~vec4_visitor()
2845 {
2846 hash_table_dtor(this->variable_ht);
2847 }
2848
2849
2850 void
2851 vec4_visitor::fail(const char *format, ...)
2852 {
2853 va_list va;
2854 char *msg;
2855
2856 if (failed)
2857 return;
2858
2859 failed = true;
2860
2861 va_start(va, format);
2862 msg = ralloc_vasprintf(mem_ctx, format, va);
2863 va_end(va);
2864 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
2865
2866 this->fail_msg = msg;
2867
2868 if (INTEL_DEBUG & DEBUG_VS) {
2869 fprintf(stderr, "%s", msg);
2870 }
2871 }
2872
2873 } /* namespace brw */