i965/vs: add support for emitting SHL, SHR, ASR
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_visitor.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25 extern "C" {
26 #include "main/macros.h"
27 #include "program/prog_parameter.h"
28 #include "program/sampler.h"
29 }
30
31 namespace brw {
32
33 vec4_instruction::vec4_instruction(vec4_visitor *v,
34 enum opcode opcode, dst_reg dst,
35 src_reg src0, src_reg src1, src_reg src2)
36 {
37 this->opcode = opcode;
38 this->dst = dst;
39 this->src[0] = src0;
40 this->src[1] = src1;
41 this->src[2] = src2;
42 this->ir = v->base_ir;
43 this->annotation = v->current_annotation;
44 }
45
46 vec4_instruction *
47 vec4_visitor::emit(vec4_instruction *inst)
48 {
49 this->instructions.push_tail(inst);
50
51 return inst;
52 }
53
54 vec4_instruction *
55 vec4_visitor::emit_before(vec4_instruction *inst, vec4_instruction *new_inst)
56 {
57 new_inst->ir = inst->ir;
58 new_inst->annotation = inst->annotation;
59
60 inst->insert_before(new_inst);
61
62 return inst;
63 }
64
65 vec4_instruction *
66 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
67 src_reg src0, src_reg src1, src_reg src2)
68 {
69 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
70 src0, src1, src2));
71 }
72
73
74 vec4_instruction *
75 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
76 {
77 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
78 }
79
80 vec4_instruction *
81 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
82 {
83 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
84 }
85
86 vec4_instruction *
87 vec4_visitor::emit(enum opcode opcode)
88 {
89 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
90 }
91
92 #define ALU1(op) \
93 vec4_instruction * \
94 vec4_visitor::op(dst_reg dst, src_reg src0) \
95 { \
96 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
97 src0); \
98 }
99
100 #define ALU2(op) \
101 vec4_instruction * \
102 vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \
103 { \
104 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
105 src0, src1); \
106 }
107
108 ALU1(NOT)
109 ALU1(MOV)
110 ALU1(FRC)
111 ALU1(RNDD)
112 ALU1(RNDE)
113 ALU1(RNDZ)
114 ALU2(ADD)
115 ALU2(MUL)
116 ALU2(MACH)
117 ALU2(AND)
118 ALU2(OR)
119 ALU2(XOR)
120 ALU2(DP3)
121 ALU2(DP4)
122 ALU2(DPH)
123 ALU2(SHL)
124 ALU2(SHR)
125 ALU2(ASR)
126
127 /** Gen4 predicated IF. */
128 vec4_instruction *
129 vec4_visitor::IF(uint32_t predicate)
130 {
131 vec4_instruction *inst;
132
133 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF);
134 inst->predicate = predicate;
135
136 return inst;
137 }
138
139 /** Gen6+ IF with embedded comparison. */
140 vec4_instruction *
141 vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition)
142 {
143 assert(intel->gen >= 6);
144
145 vec4_instruction *inst;
146
147 resolve_ud_negate(&src0);
148 resolve_ud_negate(&src1);
149
150 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF, dst_null_d(),
151 src0, src1);
152 inst->conditional_mod = condition;
153
154 return inst;
155 }
156
157 /**
158 * CMP: Sets the low bit of the destination channels with the result
159 * of the comparison, while the upper bits are undefined, and updates
160 * the flag register with the packed 16 bits of the result.
161 */
162 vec4_instruction *
163 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition)
164 {
165 vec4_instruction *inst;
166
167 /* original gen4 does type conversion to the destination type
168 * before before comparison, producing garbage results for floating
169 * point comparisons.
170 */
171 if (intel->gen == 4) {
172 dst.type = src0.type;
173 if (dst.file == HW_REG)
174 dst.fixed_hw_reg.type = dst.type;
175 }
176
177 resolve_ud_negate(&src0);
178 resolve_ud_negate(&src1);
179
180 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_CMP, dst, src0, src1);
181 inst->conditional_mod = condition;
182
183 return inst;
184 }
185
186 vec4_instruction *
187 vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
188 {
189 vec4_instruction *inst;
190
191 inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_READ,
192 dst, index);
193 inst->base_mrf = 14;
194 inst->mlen = 2;
195
196 return inst;
197 }
198
199 vec4_instruction *
200 vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index)
201 {
202 vec4_instruction *inst;
203
204 inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_WRITE,
205 dst, src, index);
206 inst->base_mrf = 13;
207 inst->mlen = 3;
208
209 return inst;
210 }
211
212 void
213 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
214 {
215 static enum opcode dot_opcodes[] = {
216 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
217 };
218
219 emit(dot_opcodes[elements - 2], dst, src0, src1);
220 }
221
222 void
223 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
224 {
225 /* The gen6 math instruction ignores the source modifiers --
226 * swizzle, abs, negate, and at least some parts of the register
227 * region description.
228 *
229 * While it would seem that this MOV could be avoided at this point
230 * in the case that the swizzle is matched up with the destination
231 * writemask, note that uniform packing and register allocation
232 * could rearrange our swizzle, so let's leave this matter up to
233 * copy propagation later.
234 */
235 src_reg temp_src = src_reg(this, glsl_type::vec4_type);
236 emit(MOV(dst_reg(temp_src), src));
237
238 if (dst.writemask != WRITEMASK_XYZW) {
239 /* The gen6 math instruction must be align1, so we can't do
240 * writemasks.
241 */
242 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
243
244 emit(opcode, temp_dst, temp_src);
245
246 emit(MOV(dst, src_reg(temp_dst)));
247 } else {
248 emit(opcode, dst, temp_src);
249 }
250 }
251
252 void
253 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
254 {
255 vec4_instruction *inst = emit(opcode, dst, src);
256 inst->base_mrf = 1;
257 inst->mlen = 1;
258 }
259
260 void
261 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
262 {
263 switch (opcode) {
264 case SHADER_OPCODE_RCP:
265 case SHADER_OPCODE_RSQ:
266 case SHADER_OPCODE_SQRT:
267 case SHADER_OPCODE_EXP2:
268 case SHADER_OPCODE_LOG2:
269 case SHADER_OPCODE_SIN:
270 case SHADER_OPCODE_COS:
271 break;
272 default:
273 assert(!"not reached: bad math opcode");
274 return;
275 }
276
277 if (intel->gen >= 7) {
278 emit(opcode, dst, src);
279 } else if (intel->gen == 6) {
280 return emit_math1_gen6(opcode, dst, src);
281 } else {
282 return emit_math1_gen4(opcode, dst, src);
283 }
284 }
285
286 void
287 vec4_visitor::emit_math2_gen6(enum opcode opcode,
288 dst_reg dst, src_reg src0, src_reg src1)
289 {
290 src_reg expanded;
291
292 /* The gen6 math instruction ignores the source modifiers --
293 * swizzle, abs, negate, and at least some parts of the register
294 * region description. Move the sources to temporaries to make it
295 * generally work.
296 */
297
298 expanded = src_reg(this, glsl_type::vec4_type);
299 expanded.type = src0.type;
300 emit(MOV(dst_reg(expanded), src0));
301 src0 = expanded;
302
303 expanded = src_reg(this, glsl_type::vec4_type);
304 expanded.type = src1.type;
305 emit(MOV(dst_reg(expanded), src1));
306 src1 = expanded;
307
308 if (dst.writemask != WRITEMASK_XYZW) {
309 /* The gen6 math instruction must be align1, so we can't do
310 * writemasks.
311 */
312 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
313 temp_dst.type = dst.type;
314
315 emit(opcode, temp_dst, src0, src1);
316
317 emit(MOV(dst, src_reg(temp_dst)));
318 } else {
319 emit(opcode, dst, src0, src1);
320 }
321 }
322
323 void
324 vec4_visitor::emit_math2_gen4(enum opcode opcode,
325 dst_reg dst, src_reg src0, src_reg src1)
326 {
327 vec4_instruction *inst = emit(opcode, dst, src0, src1);
328 inst->base_mrf = 1;
329 inst->mlen = 2;
330 }
331
332 void
333 vec4_visitor::emit_math(enum opcode opcode,
334 dst_reg dst, src_reg src0, src_reg src1)
335 {
336 switch (opcode) {
337 case SHADER_OPCODE_POW:
338 case SHADER_OPCODE_INT_QUOTIENT:
339 case SHADER_OPCODE_INT_REMAINDER:
340 break;
341 default:
342 assert(!"not reached: unsupported binary math opcode");
343 return;
344 }
345
346 if (intel->gen >= 7) {
347 emit(opcode, dst, src0, src1);
348 } else if (intel->gen == 6) {
349 return emit_math2_gen6(opcode, dst, src0, src1);
350 } else {
351 return emit_math2_gen4(opcode, dst, src0, src1);
352 }
353 }
354
355 void
356 vec4_visitor::visit_instructions(const exec_list *list)
357 {
358 foreach_list(node, list) {
359 ir_instruction *ir = (ir_instruction *)node;
360
361 base_ir = ir;
362 ir->accept(this);
363 }
364 }
365
366
367 static int
368 type_size(const struct glsl_type *type)
369 {
370 unsigned int i;
371 int size;
372
373 switch (type->base_type) {
374 case GLSL_TYPE_UINT:
375 case GLSL_TYPE_INT:
376 case GLSL_TYPE_FLOAT:
377 case GLSL_TYPE_BOOL:
378 if (type->is_matrix()) {
379 return type->matrix_columns;
380 } else {
381 /* Regardless of size of vector, it gets a vec4. This is bad
382 * packing for things like floats, but otherwise arrays become a
383 * mess. Hopefully a later pass over the code can pack scalars
384 * down if appropriate.
385 */
386 return 1;
387 }
388 case GLSL_TYPE_ARRAY:
389 assert(type->length > 0);
390 return type_size(type->fields.array) * type->length;
391 case GLSL_TYPE_STRUCT:
392 size = 0;
393 for (i = 0; i < type->length; i++) {
394 size += type_size(type->fields.structure[i].type);
395 }
396 return size;
397 case GLSL_TYPE_SAMPLER:
398 /* Samplers take up one slot in UNIFORMS[], but they're baked in
399 * at link time.
400 */
401 return 1;
402 default:
403 assert(0);
404 return 0;
405 }
406 }
407
408 int
409 vec4_visitor::virtual_grf_alloc(int size)
410 {
411 if (virtual_grf_array_size <= virtual_grf_count) {
412 if (virtual_grf_array_size == 0)
413 virtual_grf_array_size = 16;
414 else
415 virtual_grf_array_size *= 2;
416 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
417 virtual_grf_array_size);
418 virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int,
419 virtual_grf_array_size);
420 }
421 virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count;
422 virtual_grf_reg_count += size;
423 virtual_grf_sizes[virtual_grf_count] = size;
424 return virtual_grf_count++;
425 }
426
427 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
428 {
429 init();
430
431 this->file = GRF;
432 this->reg = v->virtual_grf_alloc(type_size(type));
433
434 if (type->is_array() || type->is_record()) {
435 this->swizzle = BRW_SWIZZLE_NOOP;
436 } else {
437 this->swizzle = swizzle_for_size(type->vector_elements);
438 }
439
440 this->type = brw_type_for_base_type(type);
441 }
442
443 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
444 {
445 init();
446
447 this->file = GRF;
448 this->reg = v->virtual_grf_alloc(type_size(type));
449
450 if (type->is_array() || type->is_record()) {
451 this->writemask = WRITEMASK_XYZW;
452 } else {
453 this->writemask = (1 << type->vector_elements) - 1;
454 }
455
456 this->type = brw_type_for_base_type(type);
457 }
458
459 /* Our support for uniforms is piggy-backed on the struct
460 * gl_fragment_program, because that's where the values actually
461 * get stored, rather than in some global gl_shader_program uniform
462 * store.
463 */
464 int
465 vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
466 {
467 unsigned int offset = 0;
468 float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
469
470 if (type->is_matrix()) {
471 const glsl_type *column = type->column_type();
472
473 for (unsigned int i = 0; i < type->matrix_columns; i++) {
474 offset += setup_uniform_values(loc + offset, column);
475 }
476
477 return offset;
478 }
479
480 switch (type->base_type) {
481 case GLSL_TYPE_FLOAT:
482 case GLSL_TYPE_UINT:
483 case GLSL_TYPE_INT:
484 case GLSL_TYPE_BOOL:
485 for (unsigned int i = 0; i < type->vector_elements; i++) {
486 c->prog_data.param[this->uniforms * 4 + i] = &values[i];
487 }
488
489 /* Set up pad elements to get things aligned to a vec4 boundary. */
490 for (unsigned int i = type->vector_elements; i < 4; i++) {
491 static float zero = 0;
492
493 c->prog_data.param[this->uniforms * 4 + i] = &zero;
494 }
495
496 /* Track the size of this uniform vector, for future packing of
497 * uniforms.
498 */
499 this->uniform_vector_size[this->uniforms] = type->vector_elements;
500 this->uniforms++;
501
502 return 1;
503
504 case GLSL_TYPE_STRUCT:
505 for (unsigned int i = 0; i < type->length; i++) {
506 offset += setup_uniform_values(loc + offset,
507 type->fields.structure[i].type);
508 }
509 return offset;
510
511 case GLSL_TYPE_ARRAY:
512 for (unsigned int i = 0; i < type->length; i++) {
513 offset += setup_uniform_values(loc + offset, type->fields.array);
514 }
515 return offset;
516
517 case GLSL_TYPE_SAMPLER:
518 /* The sampler takes up a slot, but we don't use any values from it. */
519 return 1;
520
521 default:
522 assert(!"not reached");
523 return 0;
524 }
525 }
526
527 void
528 vec4_visitor::setup_uniform_clipplane_values()
529 {
530 gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
531
532 if (intel->gen < 6) {
533 /* Pre-Gen6, we compact clip planes. For example, if the user
534 * enables just clip planes 0, 1, and 3, we will enable clip planes
535 * 0, 1, and 2 in the hardware, and we'll move clip plane 3 to clip
536 * plane 2. This simplifies the implementation of the Gen6 clip
537 * thread.
538 */
539 int compacted_clipplane_index = 0;
540 for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
541 if (!(c->key.userclip_planes_enabled_gen_4_5 & (1 << i)))
542 continue;
543
544 this->uniform_vector_size[this->uniforms] = 4;
545 this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms);
546 this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F;
547 for (int j = 0; j < 4; ++j) {
548 c->prog_data.param[this->uniforms * 4 + j] = &clip_planes[i][j];
549 }
550 ++compacted_clipplane_index;
551 ++this->uniforms;
552 }
553 } else {
554 /* In Gen6 and later, we don't compact clip planes, because this
555 * simplifies the implementation of gl_ClipDistance.
556 */
557 for (int i = 0; i < c->key.nr_userclip_plane_consts; ++i) {
558 this->uniform_vector_size[this->uniforms] = 4;
559 this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
560 this->userplane[i].type = BRW_REGISTER_TYPE_F;
561 for (int j = 0; j < 4; ++j) {
562 c->prog_data.param[this->uniforms * 4 + j] = &clip_planes[i][j];
563 }
564 ++this->uniforms;
565 }
566 }
567 }
568
569 /* Our support for builtin uniforms is even scarier than non-builtin.
570 * It sits on top of the PROG_STATE_VAR parameters that are
571 * automatically updated from GL context state.
572 */
573 void
574 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
575 {
576 const ir_state_slot *const slots = ir->state_slots;
577 assert(ir->state_slots != NULL);
578
579 for (unsigned int i = 0; i < ir->num_state_slots; i++) {
580 /* This state reference has already been setup by ir_to_mesa,
581 * but we'll get the same index back here. We can reference
582 * ParameterValues directly, since unlike brw_fs.cpp, we never
583 * add new state references during compile.
584 */
585 int index = _mesa_add_state_reference(this->vp->Base.Parameters,
586 (gl_state_index *)slots[i].tokens);
587 float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
588
589 this->uniform_vector_size[this->uniforms] = 0;
590 /* Add each of the unique swizzled channels of the element.
591 * This will end up matching the size of the glsl_type of this field.
592 */
593 int last_swiz = -1;
594 for (unsigned int j = 0; j < 4; j++) {
595 int swiz = GET_SWZ(slots[i].swizzle, j);
596 last_swiz = swiz;
597
598 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
599 if (swiz <= last_swiz)
600 this->uniform_vector_size[this->uniforms]++;
601 }
602 this->uniforms++;
603 }
604 }
605
606 dst_reg *
607 vec4_visitor::variable_storage(ir_variable *var)
608 {
609 return (dst_reg *)hash_table_find(this->variable_ht, var);
610 }
611
612 void
613 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate)
614 {
615 ir_expression *expr = ir->as_expression();
616
617 *predicate = BRW_PREDICATE_NORMAL;
618
619 if (expr) {
620 src_reg op[2];
621 vec4_instruction *inst;
622
623 assert(expr->get_num_operands() <= 2);
624 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
625 expr->operands[i]->accept(this);
626 op[i] = this->result;
627
628 resolve_ud_negate(&op[i]);
629 }
630
631 switch (expr->operation) {
632 case ir_unop_logic_not:
633 inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
634 inst->conditional_mod = BRW_CONDITIONAL_Z;
635 break;
636
637 case ir_binop_logic_xor:
638 inst = emit(XOR(dst_null_d(), op[0], op[1]));
639 inst->conditional_mod = BRW_CONDITIONAL_NZ;
640 break;
641
642 case ir_binop_logic_or:
643 inst = emit(OR(dst_null_d(), op[0], op[1]));
644 inst->conditional_mod = BRW_CONDITIONAL_NZ;
645 break;
646
647 case ir_binop_logic_and:
648 inst = emit(AND(dst_null_d(), op[0], op[1]));
649 inst->conditional_mod = BRW_CONDITIONAL_NZ;
650 break;
651
652 case ir_unop_f2b:
653 if (intel->gen >= 6) {
654 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
655 } else {
656 inst = emit(MOV(dst_null_f(), op[0]));
657 inst->conditional_mod = BRW_CONDITIONAL_NZ;
658 }
659 break;
660
661 case ir_unop_i2b:
662 if (intel->gen >= 6) {
663 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
664 } else {
665 inst = emit(MOV(dst_null_d(), op[0]));
666 inst->conditional_mod = BRW_CONDITIONAL_NZ;
667 }
668 break;
669
670 case ir_binop_all_equal:
671 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
672 *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
673 break;
674
675 case ir_binop_any_nequal:
676 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
677 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
678 break;
679
680 case ir_unop_any:
681 inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
682 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
683 break;
684
685 case ir_binop_greater:
686 case ir_binop_gequal:
687 case ir_binop_less:
688 case ir_binop_lequal:
689 case ir_binop_equal:
690 case ir_binop_nequal:
691 emit(CMP(dst_null_d(), op[0], op[1],
692 brw_conditional_for_comparison(expr->operation)));
693 break;
694
695 default:
696 assert(!"not reached");
697 break;
698 }
699 return;
700 }
701
702 ir->accept(this);
703
704 resolve_ud_negate(&this->result);
705
706 if (intel->gen >= 6) {
707 vec4_instruction *inst = emit(AND(dst_null_d(),
708 this->result, src_reg(1)));
709 inst->conditional_mod = BRW_CONDITIONAL_NZ;
710 } else {
711 vec4_instruction *inst = emit(MOV(dst_null_d(), this->result));
712 inst->conditional_mod = BRW_CONDITIONAL_NZ;
713 }
714 }
715
716 /**
717 * Emit a gen6 IF statement with the comparison folded into the IF
718 * instruction.
719 */
720 void
721 vec4_visitor::emit_if_gen6(ir_if *ir)
722 {
723 ir_expression *expr = ir->condition->as_expression();
724
725 if (expr) {
726 src_reg op[2];
727 dst_reg temp;
728
729 assert(expr->get_num_operands() <= 2);
730 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
731 expr->operands[i]->accept(this);
732 op[i] = this->result;
733 }
734
735 switch (expr->operation) {
736 case ir_unop_logic_not:
737 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
738 return;
739
740 case ir_binop_logic_xor:
741 emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
742 return;
743
744 case ir_binop_logic_or:
745 temp = dst_reg(this, glsl_type::bool_type);
746 emit(OR(temp, op[0], op[1]));
747 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
748 return;
749
750 case ir_binop_logic_and:
751 temp = dst_reg(this, glsl_type::bool_type);
752 emit(AND(temp, op[0], op[1]));
753 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
754 return;
755
756 case ir_unop_f2b:
757 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
758 return;
759
760 case ir_unop_i2b:
761 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
762 return;
763
764 case ir_binop_greater:
765 case ir_binop_gequal:
766 case ir_binop_less:
767 case ir_binop_lequal:
768 case ir_binop_equal:
769 case ir_binop_nequal:
770 emit(IF(op[0], op[1],
771 brw_conditional_for_comparison(expr->operation)));
772 return;
773
774 case ir_binop_all_equal:
775 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
776 emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
777 return;
778
779 case ir_binop_any_nequal:
780 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
781 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
782 return;
783
784 case ir_unop_any:
785 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
786 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
787 return;
788
789 default:
790 assert(!"not reached");
791 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
792 return;
793 }
794 return;
795 }
796
797 ir->condition->accept(this);
798
799 emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
800 }
801
802 void
803 vec4_visitor::visit(ir_variable *ir)
804 {
805 dst_reg *reg = NULL;
806
807 if (variable_storage(ir))
808 return;
809
810 switch (ir->mode) {
811 case ir_var_in:
812 reg = new(mem_ctx) dst_reg(ATTR, ir->location);
813
814 /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED attributes
815 * come in as floating point conversions of the integer values.
816 */
817 for (int i = ir->location; i < ir->location + type_size(ir->type); i++) {
818 if (!c->key.gl_fixed_input_size[i])
819 continue;
820
821 dst_reg dst = *reg;
822 dst.type = brw_type_for_base_type(ir->type);
823 dst.writemask = (1 << c->key.gl_fixed_input_size[i]) - 1;
824 emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
825 }
826 break;
827
828 case ir_var_out:
829 reg = new(mem_ctx) dst_reg(this, ir->type);
830
831 for (int i = 0; i < type_size(ir->type); i++) {
832 output_reg[ir->location + i] = *reg;
833 output_reg[ir->location + i].reg_offset = i;
834 output_reg[ir->location + i].type =
835 brw_type_for_base_type(ir->type->get_scalar_type());
836 output_reg_annotation[ir->location + i] = ir->name;
837 }
838 break;
839
840 case ir_var_auto:
841 case ir_var_temporary:
842 reg = new(mem_ctx) dst_reg(this, ir->type);
843 break;
844
845 case ir_var_uniform:
846 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
847
848 /* Thanks to the lower_ubo_reference pass, we will see only
849 * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
850 * variables, so no need for them to be in variable_ht.
851 */
852 if (ir->uniform_block != -1)
853 return;
854
855 /* Track how big the whole uniform variable is, in case we need to put a
856 * copy of its data into pull constants for array access.
857 */
858 this->uniform_size[this->uniforms] = type_size(ir->type);
859
860 if (!strncmp(ir->name, "gl_", 3)) {
861 setup_builtin_uniform_values(ir);
862 } else {
863 setup_uniform_values(ir->location, ir->type);
864 }
865 break;
866
867 case ir_var_system_value:
868 /* VertexID is stored by the VF as the last vertex element, but
869 * we don't represent it with a flag in inputs_read, so we call
870 * it VERT_ATTRIB_MAX, which setup_attributes() picks up on.
871 */
872 reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX);
873 prog_data->uses_vertexid = true;
874
875 switch (ir->location) {
876 case SYSTEM_VALUE_VERTEX_ID:
877 reg->writemask = WRITEMASK_X;
878 break;
879 case SYSTEM_VALUE_INSTANCE_ID:
880 reg->writemask = WRITEMASK_Y;
881 break;
882 default:
883 assert(!"not reached");
884 break;
885 }
886 break;
887
888 default:
889 assert(!"not reached");
890 }
891
892 reg->type = brw_type_for_base_type(ir->type);
893 hash_table_insert(this->variable_ht, reg, ir);
894 }
895
896 void
897 vec4_visitor::visit(ir_loop *ir)
898 {
899 dst_reg counter;
900
901 /* We don't want debugging output to print the whole body of the
902 * loop as the annotation.
903 */
904 this->base_ir = NULL;
905
906 if (ir->counter != NULL) {
907 this->base_ir = ir->counter;
908 ir->counter->accept(this);
909 counter = *(variable_storage(ir->counter));
910
911 if (ir->from != NULL) {
912 this->base_ir = ir->from;
913 ir->from->accept(this);
914
915 emit(MOV(counter, this->result));
916 }
917 }
918
919 emit(BRW_OPCODE_DO);
920
921 if (ir->to) {
922 this->base_ir = ir->to;
923 ir->to->accept(this);
924
925 emit(CMP(dst_null_d(), src_reg(counter), this->result,
926 brw_conditional_for_comparison(ir->cmp)));
927
928 vec4_instruction *inst = emit(BRW_OPCODE_BREAK);
929 inst->predicate = BRW_PREDICATE_NORMAL;
930 }
931
932 visit_instructions(&ir->body_instructions);
933
934
935 if (ir->increment) {
936 this->base_ir = ir->increment;
937 ir->increment->accept(this);
938 emit(ADD(counter, src_reg(counter), this->result));
939 }
940
941 emit(BRW_OPCODE_WHILE);
942 }
943
944 void
945 vec4_visitor::visit(ir_loop_jump *ir)
946 {
947 switch (ir->mode) {
948 case ir_loop_jump::jump_break:
949 emit(BRW_OPCODE_BREAK);
950 break;
951 case ir_loop_jump::jump_continue:
952 emit(BRW_OPCODE_CONTINUE);
953 break;
954 }
955 }
956
957
958 void
959 vec4_visitor::visit(ir_function_signature *ir)
960 {
961 assert(0);
962 (void)ir;
963 }
964
965 void
966 vec4_visitor::visit(ir_function *ir)
967 {
968 /* Ignore function bodies other than main() -- we shouldn't see calls to
969 * them since they should all be inlined.
970 */
971 if (strcmp(ir->name, "main") == 0) {
972 const ir_function_signature *sig;
973 exec_list empty;
974
975 sig = ir->matching_signature(&empty);
976
977 assert(sig);
978
979 visit_instructions(&sig->body);
980 }
981 }
982
983 bool
984 vec4_visitor::try_emit_sat(ir_expression *ir)
985 {
986 ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
987 if (!sat_src)
988 return false;
989
990 sat_src->accept(this);
991 src_reg src = this->result;
992
993 this->result = src_reg(this, ir->type);
994 vec4_instruction *inst;
995 inst = emit(MOV(dst_reg(this->result), src));
996 inst->saturate = true;
997
998 return true;
999 }
1000
1001 void
1002 vec4_visitor::emit_bool_comparison(unsigned int op,
1003 dst_reg dst, src_reg src0, src_reg src1)
1004 {
1005 /* original gen4 does destination conversion before comparison. */
1006 if (intel->gen < 5)
1007 dst.type = src0.type;
1008
1009 emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op)));
1010
1011 dst.type = BRW_REGISTER_TYPE_D;
1012 emit(AND(dst, src_reg(dst), src_reg(0x1)));
1013 }
1014
1015 void
1016 vec4_visitor::emit_minmax(uint32_t conditionalmod, dst_reg dst,
1017 src_reg src0, src_reg src1)
1018 {
1019 vec4_instruction *inst;
1020
1021 if (intel->gen >= 6) {
1022 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
1023 inst->conditional_mod = conditionalmod;
1024 } else {
1025 emit(CMP(dst, src0, src1, conditionalmod));
1026
1027 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
1028 inst->predicate = BRW_PREDICATE_NORMAL;
1029 }
1030 }
1031
1032 void
1033 vec4_visitor::visit(ir_expression *ir)
1034 {
1035 unsigned int operand;
1036 src_reg op[Elements(ir->operands)];
1037 src_reg result_src;
1038 dst_reg result_dst;
1039 vec4_instruction *inst;
1040
1041 if (try_emit_sat(ir))
1042 return;
1043
1044 for (operand = 0; operand < ir->get_num_operands(); operand++) {
1045 this->result.file = BAD_FILE;
1046 ir->operands[operand]->accept(this);
1047 if (this->result.file == BAD_FILE) {
1048 printf("Failed to get tree for expression operand:\n");
1049 ir->operands[operand]->print();
1050 exit(1);
1051 }
1052 op[operand] = this->result;
1053
1054 /* Matrix expression operands should have been broken down to vector
1055 * operations already.
1056 */
1057 assert(!ir->operands[operand]->type->is_matrix());
1058 }
1059
1060 int vector_elements = ir->operands[0]->type->vector_elements;
1061 if (ir->operands[1]) {
1062 vector_elements = MAX2(vector_elements,
1063 ir->operands[1]->type->vector_elements);
1064 }
1065
1066 this->result.file = BAD_FILE;
1067
1068 /* Storage for our result. Ideally for an assignment we'd be using
1069 * the actual storage for the result here, instead.
1070 */
1071 result_src = src_reg(this, ir->type);
1072 /* convenience for the emit functions below. */
1073 result_dst = dst_reg(result_src);
1074 /* If nothing special happens, this is the result. */
1075 this->result = result_src;
1076 /* Limit writes to the channels that will be used by result_src later.
1077 * This does limit this temp's use as a temporary for multi-instruction
1078 * sequences.
1079 */
1080 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1081
1082 switch (ir->operation) {
1083 case ir_unop_logic_not:
1084 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
1085 * ones complement of the whole register, not just bit 0.
1086 */
1087 emit(XOR(result_dst, op[0], src_reg(1)));
1088 break;
1089 case ir_unop_neg:
1090 op[0].negate = !op[0].negate;
1091 this->result = op[0];
1092 break;
1093 case ir_unop_abs:
1094 op[0].abs = true;
1095 op[0].negate = false;
1096 this->result = op[0];
1097 break;
1098
1099 case ir_unop_sign:
1100 emit(MOV(result_dst, src_reg(0.0f)));
1101
1102 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G));
1103 inst = emit(MOV(result_dst, src_reg(1.0f)));
1104 inst->predicate = BRW_PREDICATE_NORMAL;
1105
1106 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L));
1107 inst = emit(MOV(result_dst, src_reg(-1.0f)));
1108 inst->predicate = BRW_PREDICATE_NORMAL;
1109
1110 break;
1111
1112 case ir_unop_rcp:
1113 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
1114 break;
1115
1116 case ir_unop_exp2:
1117 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
1118 break;
1119 case ir_unop_log2:
1120 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
1121 break;
1122 case ir_unop_exp:
1123 case ir_unop_log:
1124 assert(!"not reached: should be handled by ir_explog_to_explog2");
1125 break;
1126 case ir_unop_sin:
1127 case ir_unop_sin_reduced:
1128 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
1129 break;
1130 case ir_unop_cos:
1131 case ir_unop_cos_reduced:
1132 emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
1133 break;
1134
1135 case ir_unop_dFdx:
1136 case ir_unop_dFdy:
1137 assert(!"derivatives not valid in vertex shader");
1138 break;
1139
1140 case ir_unop_noise:
1141 assert(!"not reached: should be handled by lower_noise");
1142 break;
1143
1144 case ir_binop_add:
1145 emit(ADD(result_dst, op[0], op[1]));
1146 break;
1147 case ir_binop_sub:
1148 assert(!"not reached: should be handled by ir_sub_to_add_neg");
1149 break;
1150
1151 case ir_binop_mul:
1152 if (ir->type->is_integer()) {
1153 /* For integer multiplication, the MUL uses the low 16 bits
1154 * of one of the operands (src0 on gen6, src1 on gen7). The
1155 * MACH accumulates in the contribution of the upper 16 bits
1156 * of that operand.
1157 *
1158 * FINISHME: Emit just the MUL if we know an operand is small
1159 * enough.
1160 */
1161 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
1162
1163 emit(MUL(acc, op[0], op[1]));
1164 emit(MACH(dst_null_d(), op[0], op[1]));
1165 emit(MOV(result_dst, src_reg(acc)));
1166 } else {
1167 emit(MUL(result_dst, op[0], op[1]));
1168 }
1169 break;
1170 case ir_binop_div:
1171 /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
1172 assert(ir->type->is_integer());
1173 emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
1174 break;
1175 case ir_binop_mod:
1176 /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
1177 assert(ir->type->is_integer());
1178 emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
1179 break;
1180
1181 case ir_binop_less:
1182 case ir_binop_greater:
1183 case ir_binop_lequal:
1184 case ir_binop_gequal:
1185 case ir_binop_equal:
1186 case ir_binop_nequal: {
1187 emit(CMP(result_dst, op[0], op[1],
1188 brw_conditional_for_comparison(ir->operation)));
1189 emit(AND(result_dst, result_src, src_reg(0x1)));
1190 break;
1191 }
1192
1193 case ir_binop_all_equal:
1194 /* "==" operator producing a scalar boolean. */
1195 if (ir->operands[0]->type->is_vector() ||
1196 ir->operands[1]->type->is_vector()) {
1197 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
1198 emit(MOV(result_dst, src_reg(0)));
1199 inst = emit(MOV(result_dst, src_reg(1)));
1200 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
1201 } else {
1202 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
1203 emit(AND(result_dst, result_src, src_reg(0x1)));
1204 }
1205 break;
1206 case ir_binop_any_nequal:
1207 /* "!=" operator producing a scalar boolean. */
1208 if (ir->operands[0]->type->is_vector() ||
1209 ir->operands[1]->type->is_vector()) {
1210 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
1211
1212 emit(MOV(result_dst, src_reg(0)));
1213 inst = emit(MOV(result_dst, src_reg(1)));
1214 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1215 } else {
1216 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
1217 emit(AND(result_dst, result_src, src_reg(0x1)));
1218 }
1219 break;
1220
1221 case ir_unop_any:
1222 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1223 emit(MOV(result_dst, src_reg(0)));
1224
1225 inst = emit(MOV(result_dst, src_reg(1)));
1226 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1227 break;
1228
1229 case ir_binop_logic_xor:
1230 emit(XOR(result_dst, op[0], op[1]));
1231 break;
1232
1233 case ir_binop_logic_or:
1234 emit(OR(result_dst, op[0], op[1]));
1235 break;
1236
1237 case ir_binop_logic_and:
1238 emit(AND(result_dst, op[0], op[1]));
1239 break;
1240
1241 case ir_binop_dot:
1242 assert(ir->operands[0]->type->is_vector());
1243 assert(ir->operands[0]->type == ir->operands[1]->type);
1244 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1245 break;
1246
1247 case ir_unop_sqrt:
1248 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1249 break;
1250 case ir_unop_rsq:
1251 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1252 break;
1253
1254 case ir_unop_bitcast_i2f:
1255 case ir_unop_bitcast_u2f:
1256 this->result = op[0];
1257 this->result.type = BRW_REGISTER_TYPE_F;
1258 break;
1259
1260 case ir_unop_bitcast_f2i:
1261 this->result = op[0];
1262 this->result.type = BRW_REGISTER_TYPE_D;
1263 break;
1264
1265 case ir_unop_bitcast_f2u:
1266 this->result = op[0];
1267 this->result.type = BRW_REGISTER_TYPE_UD;
1268 break;
1269
1270 case ir_unop_i2f:
1271 case ir_unop_i2u:
1272 case ir_unop_u2i:
1273 case ir_unop_u2f:
1274 case ir_unop_b2f:
1275 case ir_unop_b2i:
1276 case ir_unop_f2i:
1277 case ir_unop_f2u:
1278 emit(MOV(result_dst, op[0]));
1279 break;
1280 case ir_unop_f2b:
1281 case ir_unop_i2b: {
1282 emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1283 emit(AND(result_dst, result_src, src_reg(1)));
1284 break;
1285 }
1286
1287 case ir_unop_trunc:
1288 emit(RNDZ(result_dst, op[0]));
1289 break;
1290 case ir_unop_ceil:
1291 op[0].negate = !op[0].negate;
1292 inst = emit(RNDD(result_dst, op[0]));
1293 this->result.negate = true;
1294 break;
1295 case ir_unop_floor:
1296 inst = emit(RNDD(result_dst, op[0]));
1297 break;
1298 case ir_unop_fract:
1299 inst = emit(FRC(result_dst, op[0]));
1300 break;
1301 case ir_unop_round_even:
1302 emit(RNDE(result_dst, op[0]));
1303 break;
1304
1305 case ir_binop_min:
1306 emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]);
1307 break;
1308 case ir_binop_max:
1309 emit_minmax(BRW_CONDITIONAL_G, result_dst, op[0], op[1]);
1310 break;
1311
1312 case ir_binop_pow:
1313 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1314 break;
1315
1316 case ir_unop_bit_not:
1317 inst = emit(NOT(result_dst, op[0]));
1318 break;
1319 case ir_binop_bit_and:
1320 inst = emit(AND(result_dst, op[0], op[1]));
1321 break;
1322 case ir_binop_bit_xor:
1323 inst = emit(XOR(result_dst, op[0], op[1]));
1324 break;
1325 case ir_binop_bit_or:
1326 inst = emit(OR(result_dst, op[0], op[1]));
1327 break;
1328
1329 case ir_binop_lshift:
1330 inst = emit(SHL(result_dst, op[0], op[1]));
1331 break;
1332
1333 case ir_binop_rshift:
1334 if (ir->type->base_type == GLSL_TYPE_INT)
1335 inst = emit(ASR(result_dst, op[0], op[1]));
1336 else
1337 inst = emit(SHR(result_dst, op[0], op[1]));
1338 break;
1339
1340 case ir_binop_ubo_load: {
1341 ir_constant *uniform_block = ir->operands[0]->as_constant();
1342 ir_constant *const_offset_ir = ir->operands[1]->as_constant();
1343 unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
1344 src_reg offset = op[1];
1345
1346 /* Now, load the vector from that offset. */
1347 assert(ir->type->is_vector() || ir->type->is_scalar());
1348
1349 src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
1350 packed_consts.type = result.type;
1351 src_reg surf_index =
1352 src_reg(SURF_INDEX_VS_UBO(uniform_block->value.u[0]));
1353 if (const_offset_ir) {
1354 offset = src_reg(const_offset / 16);
1355 } else {
1356 emit(SHR(dst_reg(offset), offset, src_reg(4)));
1357 }
1358
1359 vec4_instruction *pull =
1360 emit(new(mem_ctx) vec4_instruction(this,
1361 VS_OPCODE_PULL_CONSTANT_LOAD,
1362 dst_reg(packed_consts),
1363 surf_index,
1364 offset));
1365 pull->base_mrf = 14;
1366 pull->mlen = 1;
1367
1368 packed_consts.swizzle = swizzle_for_size(ir->type->vector_elements);
1369 packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4,
1370 const_offset % 16 / 4,
1371 const_offset % 16 / 4,
1372 const_offset % 16 / 4);
1373
1374 /* UBO bools are any nonzero int. We store bools as either 0 or 1. */
1375 if (ir->type->base_type == GLSL_TYPE_BOOL) {
1376 emit(CMP(result_dst, packed_consts, src_reg(0u),
1377 BRW_CONDITIONAL_NZ));
1378 emit(AND(result_dst, result, src_reg(0x1)));
1379 } else {
1380 emit(MOV(result_dst, packed_consts));
1381 }
1382 break;
1383 }
1384
1385 case ir_quadop_vector:
1386 assert(!"not reached: should be handled by lower_quadop_vector");
1387 break;
1388 }
1389 }
1390
1391
1392 void
1393 vec4_visitor::visit(ir_swizzle *ir)
1394 {
1395 src_reg src;
1396 int i = 0;
1397 int swizzle[4];
1398
1399 /* Note that this is only swizzles in expressions, not those on the left
1400 * hand side of an assignment, which do write masking. See ir_assignment
1401 * for that.
1402 */
1403
1404 ir->val->accept(this);
1405 src = this->result;
1406 assert(src.file != BAD_FILE);
1407
1408 for (i = 0; i < ir->type->vector_elements; i++) {
1409 switch (i) {
1410 case 0:
1411 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1412 break;
1413 case 1:
1414 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1415 break;
1416 case 2:
1417 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1418 break;
1419 case 3:
1420 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1421 break;
1422 }
1423 }
1424 for (; i < 4; i++) {
1425 /* Replicate the last channel out. */
1426 swizzle[i] = swizzle[ir->type->vector_elements - 1];
1427 }
1428
1429 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1430
1431 this->result = src;
1432 }
1433
1434 void
1435 vec4_visitor::visit(ir_dereference_variable *ir)
1436 {
1437 const struct glsl_type *type = ir->type;
1438 dst_reg *reg = variable_storage(ir->var);
1439
1440 if (!reg) {
1441 fail("Failed to find variable storage for %s\n", ir->var->name);
1442 this->result = src_reg(brw_null_reg());
1443 return;
1444 }
1445
1446 this->result = src_reg(*reg);
1447
1448 /* System values get their swizzle from the dst_reg writemask */
1449 if (ir->var->mode == ir_var_system_value)
1450 return;
1451
1452 if (type->is_scalar() || type->is_vector() || type->is_matrix())
1453 this->result.swizzle = swizzle_for_size(type->vector_elements);
1454 }
1455
1456 void
1457 vec4_visitor::visit(ir_dereference_array *ir)
1458 {
1459 ir_constant *constant_index;
1460 src_reg src;
1461 int element_size = type_size(ir->type);
1462
1463 constant_index = ir->array_index->constant_expression_value();
1464
1465 ir->array->accept(this);
1466 src = this->result;
1467
1468 if (constant_index) {
1469 src.reg_offset += constant_index->value.i[0] * element_size;
1470 } else {
1471 /* Variable index array dereference. It eats the "vec4" of the
1472 * base of the array and an index that offsets the Mesa register
1473 * index.
1474 */
1475 ir->array_index->accept(this);
1476
1477 src_reg index_reg;
1478
1479 if (element_size == 1) {
1480 index_reg = this->result;
1481 } else {
1482 index_reg = src_reg(this, glsl_type::int_type);
1483
1484 emit(MUL(dst_reg(index_reg), this->result, src_reg(element_size)));
1485 }
1486
1487 if (src.reladdr) {
1488 src_reg temp = src_reg(this, glsl_type::int_type);
1489
1490 emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
1491
1492 index_reg = temp;
1493 }
1494
1495 src.reladdr = ralloc(mem_ctx, src_reg);
1496 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1497 }
1498
1499 /* If the type is smaller than a vec4, replicate the last channel out. */
1500 if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1501 src.swizzle = swizzle_for_size(ir->type->vector_elements);
1502 else
1503 src.swizzle = BRW_SWIZZLE_NOOP;
1504 src.type = brw_type_for_base_type(ir->type);
1505
1506 this->result = src;
1507 }
1508
1509 void
1510 vec4_visitor::visit(ir_dereference_record *ir)
1511 {
1512 unsigned int i;
1513 const glsl_type *struct_type = ir->record->type;
1514 int offset = 0;
1515
1516 ir->record->accept(this);
1517
1518 for (i = 0; i < struct_type->length; i++) {
1519 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1520 break;
1521 offset += type_size(struct_type->fields.structure[i].type);
1522 }
1523
1524 /* If the type is smaller than a vec4, replicate the last channel out. */
1525 if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1526 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1527 else
1528 this->result.swizzle = BRW_SWIZZLE_NOOP;
1529 this->result.type = brw_type_for_base_type(ir->type);
1530
1531 this->result.reg_offset += offset;
1532 }
1533
1534 /**
1535 * We want to be careful in assignment setup to hit the actual storage
1536 * instead of potentially using a temporary like we might with the
1537 * ir_dereference handler.
1538 */
1539 static dst_reg
1540 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1541 {
1542 /* The LHS must be a dereference. If the LHS is a variable indexed array
1543 * access of a vector, it must be separated into a series conditional moves
1544 * before reaching this point (see ir_vec_index_to_cond_assign).
1545 */
1546 assert(ir->as_dereference());
1547 ir_dereference_array *deref_array = ir->as_dereference_array();
1548 if (deref_array) {
1549 assert(!deref_array->array->type->is_vector());
1550 }
1551
1552 /* Use the rvalue deref handler for the most part. We'll ignore
1553 * swizzles in it and write swizzles using writemask, though.
1554 */
1555 ir->accept(v);
1556 return dst_reg(v->result);
1557 }
1558
1559 void
1560 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1561 const struct glsl_type *type, uint32_t predicate)
1562 {
1563 if (type->base_type == GLSL_TYPE_STRUCT) {
1564 for (unsigned int i = 0; i < type->length; i++) {
1565 emit_block_move(dst, src, type->fields.structure[i].type, predicate);
1566 }
1567 return;
1568 }
1569
1570 if (type->is_array()) {
1571 for (unsigned int i = 0; i < type->length; i++) {
1572 emit_block_move(dst, src, type->fields.array, predicate);
1573 }
1574 return;
1575 }
1576
1577 if (type->is_matrix()) {
1578 const struct glsl_type *vec_type;
1579
1580 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1581 type->vector_elements, 1);
1582
1583 for (int i = 0; i < type->matrix_columns; i++) {
1584 emit_block_move(dst, src, vec_type, predicate);
1585 }
1586 return;
1587 }
1588
1589 assert(type->is_scalar() || type->is_vector());
1590
1591 dst->type = brw_type_for_base_type(type);
1592 src->type = dst->type;
1593
1594 dst->writemask = (1 << type->vector_elements) - 1;
1595
1596 src->swizzle = swizzle_for_size(type->vector_elements);
1597
1598 vec4_instruction *inst = emit(MOV(*dst, *src));
1599 inst->predicate = predicate;
1600
1601 dst->reg_offset++;
1602 src->reg_offset++;
1603 }
1604
1605
1606 /* If the RHS processing resulted in an instruction generating a
1607 * temporary value, and it would be easy to rewrite the instruction to
1608 * generate its result right into the LHS instead, do so. This ends
1609 * up reliably removing instructions where it can be tricky to do so
1610 * later without real UD chain information.
1611 */
1612 bool
1613 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
1614 dst_reg dst,
1615 src_reg src,
1616 vec4_instruction *pre_rhs_inst,
1617 vec4_instruction *last_rhs_inst)
1618 {
1619 /* This could be supported, but it would take more smarts. */
1620 if (ir->condition)
1621 return false;
1622
1623 if (pre_rhs_inst == last_rhs_inst)
1624 return false; /* No instructions generated to work with. */
1625
1626 /* Make sure the last instruction generated our source reg. */
1627 if (src.file != GRF ||
1628 src.file != last_rhs_inst->dst.file ||
1629 src.reg != last_rhs_inst->dst.reg ||
1630 src.reg_offset != last_rhs_inst->dst.reg_offset ||
1631 src.reladdr ||
1632 src.abs ||
1633 src.negate ||
1634 last_rhs_inst->predicate != BRW_PREDICATE_NONE)
1635 return false;
1636
1637 /* Check that that last instruction fully initialized the channels
1638 * we want to use, in the order we want to use them. We could
1639 * potentially reswizzle the operands of many instructions so that
1640 * we could handle out of order channels, but don't yet.
1641 */
1642
1643 for (unsigned i = 0; i < 4; i++) {
1644 if (dst.writemask & (1 << i)) {
1645 if (!(last_rhs_inst->dst.writemask & (1 << i)))
1646 return false;
1647
1648 if (BRW_GET_SWZ(src.swizzle, i) != i)
1649 return false;
1650 }
1651 }
1652
1653 /* Success! Rewrite the instruction. */
1654 last_rhs_inst->dst.file = dst.file;
1655 last_rhs_inst->dst.reg = dst.reg;
1656 last_rhs_inst->dst.reg_offset = dst.reg_offset;
1657 last_rhs_inst->dst.reladdr = dst.reladdr;
1658 last_rhs_inst->dst.writemask &= dst.writemask;
1659
1660 return true;
1661 }
1662
1663 void
1664 vec4_visitor::visit(ir_assignment *ir)
1665 {
1666 dst_reg dst = get_assignment_lhs(ir->lhs, this);
1667 uint32_t predicate = BRW_PREDICATE_NONE;
1668
1669 if (!ir->lhs->type->is_scalar() &&
1670 !ir->lhs->type->is_vector()) {
1671 ir->rhs->accept(this);
1672 src_reg src = this->result;
1673
1674 if (ir->condition) {
1675 emit_bool_to_cond_code(ir->condition, &predicate);
1676 }
1677
1678 /* emit_block_move doesn't account for swizzles in the source register.
1679 * This should be ok, since the source register is a structure or an
1680 * array, and those can't be swizzled. But double-check to be sure.
1681 */
1682 assert(src.swizzle ==
1683 (ir->rhs->type->is_matrix()
1684 ? swizzle_for_size(ir->rhs->type->vector_elements)
1685 : BRW_SWIZZLE_NOOP));
1686
1687 emit_block_move(&dst, &src, ir->rhs->type, predicate);
1688 return;
1689 }
1690
1691 /* Now we're down to just a scalar/vector with writemasks. */
1692 int i;
1693
1694 vec4_instruction *pre_rhs_inst, *last_rhs_inst;
1695 pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1696
1697 ir->rhs->accept(this);
1698
1699 last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
1700
1701 src_reg src = this->result;
1702
1703 int swizzles[4];
1704 int first_enabled_chan = 0;
1705 int src_chan = 0;
1706
1707 assert(ir->lhs->type->is_vector() ||
1708 ir->lhs->type->is_scalar());
1709 dst.writemask = ir->write_mask;
1710
1711 for (int i = 0; i < 4; i++) {
1712 if (dst.writemask & (1 << i)) {
1713 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1714 break;
1715 }
1716 }
1717
1718 /* Swizzle a small RHS vector into the channels being written.
1719 *
1720 * glsl ir treats write_mask as dictating how many channels are
1721 * present on the RHS while in our instructions we need to make
1722 * those channels appear in the slots of the vec4 they're written to.
1723 */
1724 for (int i = 0; i < 4; i++) {
1725 if (dst.writemask & (1 << i))
1726 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1727 else
1728 swizzles[i] = first_enabled_chan;
1729 }
1730 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1731 swizzles[2], swizzles[3]);
1732
1733 if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
1734 return;
1735 }
1736
1737 if (ir->condition) {
1738 emit_bool_to_cond_code(ir->condition, &predicate);
1739 }
1740
1741 for (i = 0; i < type_size(ir->lhs->type); i++) {
1742 vec4_instruction *inst = emit(MOV(dst, src));
1743 inst->predicate = predicate;
1744
1745 dst.reg_offset++;
1746 src.reg_offset++;
1747 }
1748 }
1749
1750 void
1751 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
1752 {
1753 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1754 foreach_list(node, &ir->components) {
1755 ir_constant *field_value = (ir_constant *)node;
1756
1757 emit_constant_values(dst, field_value);
1758 }
1759 return;
1760 }
1761
1762 if (ir->type->is_array()) {
1763 for (unsigned int i = 0; i < ir->type->length; i++) {
1764 emit_constant_values(dst, ir->array_elements[i]);
1765 }
1766 return;
1767 }
1768
1769 if (ir->type->is_matrix()) {
1770 for (int i = 0; i < ir->type->matrix_columns; i++) {
1771 float *vec = &ir->value.f[i * ir->type->vector_elements];
1772
1773 for (int j = 0; j < ir->type->vector_elements; j++) {
1774 dst->writemask = 1 << j;
1775 dst->type = BRW_REGISTER_TYPE_F;
1776
1777 emit(MOV(*dst, src_reg(vec[j])));
1778 }
1779 dst->reg_offset++;
1780 }
1781 return;
1782 }
1783
1784 int remaining_writemask = (1 << ir->type->vector_elements) - 1;
1785
1786 for (int i = 0; i < ir->type->vector_elements; i++) {
1787 if (!(remaining_writemask & (1 << i)))
1788 continue;
1789
1790 dst->writemask = 1 << i;
1791 dst->type = brw_type_for_base_type(ir->type);
1792
1793 /* Find other components that match the one we're about to
1794 * write. Emits fewer instructions for things like vec4(0.5,
1795 * 1.5, 1.5, 1.5).
1796 */
1797 for (int j = i + 1; j < ir->type->vector_elements; j++) {
1798 if (ir->type->base_type == GLSL_TYPE_BOOL) {
1799 if (ir->value.b[i] == ir->value.b[j])
1800 dst->writemask |= (1 << j);
1801 } else {
1802 /* u, i, and f storage all line up, so no need for a
1803 * switch case for comparing each type.
1804 */
1805 if (ir->value.u[i] == ir->value.u[j])
1806 dst->writemask |= (1 << j);
1807 }
1808 }
1809
1810 switch (ir->type->base_type) {
1811 case GLSL_TYPE_FLOAT:
1812 emit(MOV(*dst, src_reg(ir->value.f[i])));
1813 break;
1814 case GLSL_TYPE_INT:
1815 emit(MOV(*dst, src_reg(ir->value.i[i])));
1816 break;
1817 case GLSL_TYPE_UINT:
1818 emit(MOV(*dst, src_reg(ir->value.u[i])));
1819 break;
1820 case GLSL_TYPE_BOOL:
1821 emit(MOV(*dst, src_reg(ir->value.b[i])));
1822 break;
1823 default:
1824 assert(!"Non-float/uint/int/bool constant");
1825 break;
1826 }
1827
1828 remaining_writemask &= ~dst->writemask;
1829 }
1830 dst->reg_offset++;
1831 }
1832
1833 void
1834 vec4_visitor::visit(ir_constant *ir)
1835 {
1836 dst_reg dst = dst_reg(this, ir->type);
1837 this->result = src_reg(dst);
1838
1839 emit_constant_values(&dst, ir);
1840 }
1841
1842 void
1843 vec4_visitor::visit(ir_call *ir)
1844 {
1845 assert(!"not reached");
1846 }
1847
1848 void
1849 vec4_visitor::visit(ir_texture *ir)
1850 {
1851 int sampler = _mesa_get_sampler_uniform_value(ir->sampler, prog, &vp->Base);
1852
1853 /* Should be lowered by do_lower_texture_projection */
1854 assert(!ir->projector);
1855
1856 /* Generate code to compute all the subexpression trees. This has to be
1857 * done before loading any values into MRFs for the sampler message since
1858 * generating these values may involve SEND messages that need the MRFs.
1859 */
1860 src_reg coordinate;
1861 if (ir->coordinate) {
1862 ir->coordinate->accept(this);
1863 coordinate = this->result;
1864 }
1865
1866 src_reg shadow_comparitor;
1867 if (ir->shadow_comparitor) {
1868 ir->shadow_comparitor->accept(this);
1869 shadow_comparitor = this->result;
1870 }
1871
1872 src_reg lod, dPdx, dPdy;
1873 switch (ir->op) {
1874 case ir_txf:
1875 case ir_txl:
1876 case ir_txs:
1877 ir->lod_info.lod->accept(this);
1878 lod = this->result;
1879 break;
1880 case ir_txd:
1881 ir->lod_info.grad.dPdx->accept(this);
1882 dPdx = this->result;
1883
1884 ir->lod_info.grad.dPdy->accept(this);
1885 dPdy = this->result;
1886 break;
1887 case ir_tex:
1888 case ir_txb:
1889 break;
1890 }
1891
1892 vec4_instruction *inst = NULL;
1893 switch (ir->op) {
1894 case ir_tex:
1895 case ir_txl:
1896 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXL);
1897 break;
1898 case ir_txd:
1899 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXD);
1900 break;
1901 case ir_txf:
1902 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF);
1903 break;
1904 case ir_txs:
1905 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXS);
1906 break;
1907 case ir_txb:
1908 assert(!"TXB is not valid for vertex shaders.");
1909 }
1910
1911 /* Texel offsets go in the message header; Gen4 also requires headers. */
1912 inst->header_present = ir->offset || intel->gen < 5;
1913 inst->base_mrf = 2;
1914 inst->mlen = inst->header_present + 1; /* always at least one */
1915 inst->sampler = sampler;
1916 inst->dst = dst_reg(this, ir->type);
1917 inst->shadow_compare = ir->shadow_comparitor != NULL;
1918
1919 if (ir->offset != NULL && ir->op != ir_txf)
1920 inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
1921
1922 /* MRF for the first parameter */
1923 int param_base = inst->base_mrf + inst->header_present;
1924
1925 if (ir->op == ir_txs) {
1926 int writemask = intel->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
1927 emit(MOV(dst_reg(MRF, param_base, ir->lod_info.lod->type, writemask),
1928 lod));
1929 } else {
1930 int i, coord_mask = 0, zero_mask = 0;
1931 /* Load the coordinate */
1932 /* FINISHME: gl_clamp_mask and saturate */
1933 for (i = 0; i < ir->coordinate->type->vector_elements; i++)
1934 coord_mask |= (1 << i);
1935 for (; i < 4; i++)
1936 zero_mask |= (1 << i);
1937
1938 if (ir->offset && ir->op == ir_txf) {
1939 /* It appears that the ld instruction used for txf does its
1940 * address bounds check before adding in the offset. To work
1941 * around this, just add the integer offset to the integer
1942 * texel coordinate, and don't put the offset in the header.
1943 */
1944 ir_constant *offset = ir->offset->as_constant();
1945 assert(offset);
1946
1947 for (int j = 0; j < ir->coordinate->type->vector_elements; j++) {
1948 src_reg src = coordinate;
1949 src.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(src.swizzle, j),
1950 BRW_GET_SWZ(src.swizzle, j),
1951 BRW_GET_SWZ(src.swizzle, j),
1952 BRW_GET_SWZ(src.swizzle, j));
1953 emit(ADD(dst_reg(MRF, param_base, ir->coordinate->type, 1 << j),
1954 src, offset->value.i[j]));
1955 }
1956 } else {
1957 emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
1958 coordinate));
1959 }
1960 emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
1961 src_reg(0)));
1962 /* Load the shadow comparitor */
1963 if (ir->shadow_comparitor) {
1964 emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
1965 WRITEMASK_X),
1966 shadow_comparitor));
1967 inst->mlen++;
1968 }
1969
1970 /* Load the LOD info */
1971 if (ir->op == ir_txl) {
1972 int mrf, writemask;
1973 if (intel->gen >= 5) {
1974 mrf = param_base + 1;
1975 if (ir->shadow_comparitor) {
1976 writemask = WRITEMASK_Y;
1977 /* mlen already incremented */
1978 } else {
1979 writemask = WRITEMASK_X;
1980 inst->mlen++;
1981 }
1982 } else /* intel->gen == 4 */ {
1983 mrf = param_base;
1984 writemask = WRITEMASK_Z;
1985 }
1986 emit(MOV(dst_reg(MRF, mrf, ir->lod_info.lod->type, writemask), lod));
1987 } else if (ir->op == ir_txf) {
1988 emit(MOV(dst_reg(MRF, param_base, ir->lod_info.lod->type, WRITEMASK_W),
1989 lod));
1990 } else if (ir->op == ir_txd) {
1991 const glsl_type *type = ir->lod_info.grad.dPdx->type;
1992
1993 if (intel->gen >= 5) {
1994 dPdx.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
1995 dPdy.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
1996 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), dPdx));
1997 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), dPdy));
1998 inst->mlen++;
1999
2000 if (ir->type->vector_elements == 3) {
2001 dPdx.swizzle = BRW_SWIZZLE_ZZZZ;
2002 dPdy.swizzle = BRW_SWIZZLE_ZZZZ;
2003 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), dPdx));
2004 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), dPdy));
2005 inst->mlen++;
2006 }
2007 } else /* intel->gen == 4 */ {
2008 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), dPdx));
2009 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), dPdy));
2010 inst->mlen += 2;
2011 }
2012 }
2013 }
2014
2015 emit(inst);
2016
2017 swizzle_result(ir, src_reg(inst->dst), sampler);
2018 }
2019
2020 void
2021 vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, int sampler)
2022 {
2023 this->result = orig_val;
2024
2025 int s = c->key.tex.swizzles[sampler];
2026
2027 if (ir->op == ir_txs || ir->type == glsl_type::float_type
2028 || s == SWIZZLE_NOOP)
2029 return;
2030
2031 int zero_mask = 0, one_mask = 0, copy_mask = 0;
2032 int swizzle[4];
2033
2034 for (int i = 0; i < 4; i++) {
2035 switch (GET_SWZ(s, i)) {
2036 case SWIZZLE_ZERO:
2037 zero_mask |= (1 << i);
2038 break;
2039 case SWIZZLE_ONE:
2040 one_mask |= (1 << i);
2041 break;
2042 default:
2043 copy_mask |= (1 << i);
2044 swizzle[i] = GET_SWZ(s, i);
2045 break;
2046 }
2047 }
2048
2049 this->result = src_reg(this, ir->type);
2050 dst_reg swizzled_result(this->result);
2051
2052 if (copy_mask) {
2053 orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
2054 swizzled_result.writemask = copy_mask;
2055 emit(MOV(swizzled_result, orig_val));
2056 }
2057
2058 if (zero_mask) {
2059 swizzled_result.writemask = zero_mask;
2060 emit(MOV(swizzled_result, src_reg(0.0f)));
2061 }
2062
2063 if (one_mask) {
2064 swizzled_result.writemask = one_mask;
2065 emit(MOV(swizzled_result, src_reg(1.0f)));
2066 }
2067 }
2068
2069 void
2070 vec4_visitor::visit(ir_return *ir)
2071 {
2072 assert(!"not reached");
2073 }
2074
2075 void
2076 vec4_visitor::visit(ir_discard *ir)
2077 {
2078 assert(!"not reached");
2079 }
2080
2081 void
2082 vec4_visitor::visit(ir_if *ir)
2083 {
2084 /* Don't point the annotation at the if statement, because then it plus
2085 * the then and else blocks get printed.
2086 */
2087 this->base_ir = ir->condition;
2088
2089 if (intel->gen == 6) {
2090 emit_if_gen6(ir);
2091 } else {
2092 uint32_t predicate;
2093 emit_bool_to_cond_code(ir->condition, &predicate);
2094 emit(IF(predicate));
2095 }
2096
2097 visit_instructions(&ir->then_instructions);
2098
2099 if (!ir->else_instructions.is_empty()) {
2100 this->base_ir = ir->condition;
2101 emit(BRW_OPCODE_ELSE);
2102
2103 visit_instructions(&ir->else_instructions);
2104 }
2105
2106 this->base_ir = ir->condition;
2107 emit(BRW_OPCODE_ENDIF);
2108 }
2109
2110 void
2111 vec4_visitor::emit_ndc_computation()
2112 {
2113 /* Get the position */
2114 src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
2115
2116 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
2117 dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
2118 output_reg[BRW_VERT_RESULT_NDC] = ndc;
2119
2120 current_annotation = "NDC";
2121 dst_reg ndc_w = ndc;
2122 ndc_w.writemask = WRITEMASK_W;
2123 src_reg pos_w = pos;
2124 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
2125 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
2126
2127 dst_reg ndc_xyz = ndc;
2128 ndc_xyz.writemask = WRITEMASK_XYZ;
2129
2130 emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
2131 }
2132
2133 void
2134 vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
2135 {
2136 if (intel->gen < 6 &&
2137 ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
2138 c->key.userclip_active || brw->has_negative_rhw_bug)) {
2139 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
2140 dst_reg header1_w = header1;
2141 header1_w.writemask = WRITEMASK_W;
2142 GLuint i;
2143
2144 emit(MOV(header1, 0u));
2145
2146 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
2147 src_reg psiz = src_reg(output_reg[VERT_RESULT_PSIZ]);
2148
2149 current_annotation = "Point size";
2150 emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
2151 emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
2152 }
2153
2154 current_annotation = "Clipping flags";
2155 for (i = 0; i < c->key.nr_userclip_plane_consts; i++) {
2156 vec4_instruction *inst;
2157
2158 inst = emit(DP4(dst_null_f(), src_reg(output_reg[VERT_RESULT_HPOS]),
2159 src_reg(this->userplane[i])));
2160 inst->conditional_mod = BRW_CONDITIONAL_L;
2161
2162 inst = emit(OR(header1_w, src_reg(header1_w), 1u << i));
2163 inst->predicate = BRW_PREDICATE_NORMAL;
2164 }
2165
2166 /* i965 clipping workaround:
2167 * 1) Test for -ve rhw
2168 * 2) If set,
2169 * set ndc = (0,0,0,0)
2170 * set ucp[6] = 1
2171 *
2172 * Later, clipping will detect ucp[6] and ensure the primitive is
2173 * clipped against all fixed planes.
2174 */
2175 if (brw->has_negative_rhw_bug) {
2176 #if 0
2177 /* FINISHME */
2178 brw_CMP(p,
2179 vec8(brw_null_reg()),
2180 BRW_CONDITIONAL_L,
2181 brw_swizzle1(output_reg[BRW_VERT_RESULT_NDC], 3),
2182 brw_imm_f(0));
2183
2184 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
2185 brw_MOV(p, output_reg[BRW_VERT_RESULT_NDC], brw_imm_f(0));
2186 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2187 #endif
2188 }
2189
2190 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
2191 } else if (intel->gen < 6) {
2192 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
2193 } else {
2194 emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
2195 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
2196 emit(MOV(brw_writemask(reg, WRITEMASK_W),
2197 src_reg(output_reg[VERT_RESULT_PSIZ])));
2198 }
2199 }
2200 }
2201
2202 void
2203 vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset)
2204 {
2205 if (intel->gen < 6) {
2206 /* Clip distance slots are set aside in gen5, but they are not used. It
2207 * is not clear whether we actually need to set aside space for them,
2208 * but the performance cost is negligible.
2209 */
2210 return;
2211 }
2212
2213 /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
2214 *
2215 * "If a linked set of shaders forming the vertex stage contains no
2216 * static write to gl_ClipVertex or gl_ClipDistance, but the
2217 * application has requested clipping against user clip planes through
2218 * the API, then the coordinate written to gl_Position is used for
2219 * comparison against the user clip planes."
2220 *
2221 * This function is only called if the shader didn't write to
2222 * gl_ClipDistance. Accordingly, we use gl_ClipVertex to perform clipping
2223 * if the user wrote to it; otherwise we use gl_Position.
2224 */
2225 gl_vert_result clip_vertex = VERT_RESULT_CLIP_VERTEX;
2226 if (!(c->prog_data.outputs_written
2227 & BITFIELD64_BIT(VERT_RESULT_CLIP_VERTEX))) {
2228 clip_vertex = VERT_RESULT_HPOS;
2229 }
2230
2231 for (int i = 0; i + offset < c->key.nr_userclip_plane_consts && i < 4;
2232 ++i) {
2233 emit(DP4(dst_reg(brw_writemask(reg, 1 << i)),
2234 src_reg(output_reg[clip_vertex]),
2235 src_reg(this->userplane[i + offset])));
2236 }
2237 }
2238
2239 void
2240 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int vert_result)
2241 {
2242 assert (vert_result < VERT_RESULT_MAX);
2243 reg.type = output_reg[vert_result].type;
2244 current_annotation = output_reg_annotation[vert_result];
2245 /* Copy the register, saturating if necessary */
2246 vec4_instruction *inst = emit(MOV(reg,
2247 src_reg(output_reg[vert_result])));
2248 if ((vert_result == VERT_RESULT_COL0 ||
2249 vert_result == VERT_RESULT_COL1 ||
2250 vert_result == VERT_RESULT_BFC0 ||
2251 vert_result == VERT_RESULT_BFC1) &&
2252 c->key.clamp_vertex_color) {
2253 inst->saturate = true;
2254 }
2255 }
2256
2257 void
2258 vec4_visitor::emit_urb_slot(int mrf, int vert_result)
2259 {
2260 struct brw_reg hw_reg = brw_message_reg(mrf);
2261 dst_reg reg = dst_reg(MRF, mrf);
2262 reg.type = BRW_REGISTER_TYPE_F;
2263
2264 switch (vert_result) {
2265 case VERT_RESULT_PSIZ:
2266 /* PSIZ is always in slot 0, and is coupled with other flags. */
2267 current_annotation = "indices, point width, clip flags";
2268 emit_psiz_and_flags(hw_reg);
2269 break;
2270 case BRW_VERT_RESULT_NDC:
2271 current_annotation = "NDC";
2272 emit(MOV(reg, src_reg(output_reg[BRW_VERT_RESULT_NDC])));
2273 break;
2274 case BRW_VERT_RESULT_HPOS_DUPLICATE:
2275 case VERT_RESULT_HPOS:
2276 current_annotation = "gl_Position";
2277 emit(MOV(reg, src_reg(output_reg[VERT_RESULT_HPOS])));
2278 break;
2279 case VERT_RESULT_CLIP_DIST0:
2280 case VERT_RESULT_CLIP_DIST1:
2281 if (this->c->key.uses_clip_distance) {
2282 emit_generic_urb_slot(reg, vert_result);
2283 } else {
2284 current_annotation = "user clip distances";
2285 emit_clip_distances(hw_reg, (vert_result - VERT_RESULT_CLIP_DIST0) * 4);
2286 }
2287 break;
2288 case VERT_RESULT_EDGE:
2289 /* This is present when doing unfilled polygons. We're supposed to copy
2290 * the edge flag from the user-provided vertex array
2291 * (glEdgeFlagPointer), or otherwise we'll copy from the current value
2292 * of that attribute (starts as 1.0f). This is then used in clipping to
2293 * determine which edges should be drawn as wireframe.
2294 */
2295 current_annotation = "edge flag";
2296 emit(MOV(reg, src_reg(dst_reg(ATTR, VERT_ATTRIB_EDGEFLAG,
2297 glsl_type::float_type, WRITEMASK_XYZW))));
2298 break;
2299 case BRW_VERT_RESULT_PAD:
2300 /* No need to write to this slot */
2301 break;
2302 default:
2303 emit_generic_urb_slot(reg, vert_result);
2304 break;
2305 }
2306 }
2307
2308 static int
2309 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
2310 {
2311 struct intel_context *intel = &brw->intel;
2312
2313 if (intel->gen >= 6) {
2314 /* URB data written (does not include the message header reg) must
2315 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
2316 * section 5.4.3.2.2: URB_INTERLEAVED.
2317 *
2318 * URB entries are allocated on a multiple of 1024 bits, so an
2319 * extra 128 bits written here to make the end align to 256 is
2320 * no problem.
2321 */
2322 if ((mlen % 2) != 1)
2323 mlen++;
2324 }
2325
2326 return mlen;
2327 }
2328
2329 /**
2330 * Generates the VUE payload plus the 1 or 2 URB write instructions to
2331 * complete the VS thread.
2332 *
2333 * The VUE layout is documented in Volume 2a.
2334 */
2335 void
2336 vec4_visitor::emit_urb_writes()
2337 {
2338 /* MRF 0 is reserved for the debugger, so start with message header
2339 * in MRF 1.
2340 */
2341 int base_mrf = 1;
2342 int mrf = base_mrf;
2343 /* In the process of generating our URB write message contents, we
2344 * may need to unspill a register or load from an array. Those
2345 * reads would use MRFs 14-15.
2346 */
2347 int max_usable_mrf = 13;
2348
2349 /* The following assertion verifies that max_usable_mrf causes an
2350 * even-numbered amount of URB write data, which will meet gen6's
2351 * requirements for length alignment.
2352 */
2353 assert ((max_usable_mrf - base_mrf) % 2 == 0);
2354
2355 /* First mrf is the g0-based message header containing URB handles and such,
2356 * which is implied in VS_OPCODE_URB_WRITE.
2357 */
2358 mrf++;
2359
2360 if (intel->gen < 6) {
2361 emit_ndc_computation();
2362 }
2363
2364 /* Set up the VUE data for the first URB write */
2365 int slot;
2366 for (slot = 0; slot < c->prog_data.vue_map.num_slots; ++slot) {
2367 emit_urb_slot(mrf++, c->prog_data.vue_map.slot_to_vert_result[slot]);
2368
2369 /* If this was max_usable_mrf, we can't fit anything more into this URB
2370 * WRITE.
2371 */
2372 if (mrf > max_usable_mrf) {
2373 slot++;
2374 break;
2375 }
2376 }
2377
2378 current_annotation = "URB write";
2379 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
2380 inst->base_mrf = base_mrf;
2381 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2382 inst->eot = (slot >= c->prog_data.vue_map.num_slots);
2383
2384 /* Optional second URB write */
2385 if (!inst->eot) {
2386 mrf = base_mrf + 1;
2387
2388 for (; slot < c->prog_data.vue_map.num_slots; ++slot) {
2389 assert(mrf < max_usable_mrf);
2390
2391 emit_urb_slot(mrf++, c->prog_data.vue_map.slot_to_vert_result[slot]);
2392 }
2393
2394 current_annotation = "URB write";
2395 inst = emit(VS_OPCODE_URB_WRITE);
2396 inst->base_mrf = base_mrf;
2397 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2398 inst->eot = true;
2399 /* URB destination offset. In the previous write, we got MRFs
2400 * 2-13 minus the one header MRF, so 12 regs. URB offset is in
2401 * URB row increments, and each of our MRFs is half of one of
2402 * those, since we're doing interleaved writes.
2403 */
2404 inst->offset = (max_usable_mrf - base_mrf) / 2;
2405 }
2406 }
2407
2408 src_reg
2409 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
2410 src_reg *reladdr, int reg_offset)
2411 {
2412 /* Because we store the values to scratch interleaved like our
2413 * vertex data, we need to scale the vec4 index by 2.
2414 */
2415 int message_header_scale = 2;
2416
2417 /* Pre-gen6, the message header uses byte offsets instead of vec4
2418 * (16-byte) offset units.
2419 */
2420 if (intel->gen < 6)
2421 message_header_scale *= 16;
2422
2423 if (reladdr) {
2424 src_reg index = src_reg(this, glsl_type::int_type);
2425
2426 emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2427 emit_before(inst, MUL(dst_reg(index),
2428 index, src_reg(message_header_scale)));
2429
2430 return index;
2431 } else {
2432 return src_reg(reg_offset * message_header_scale);
2433 }
2434 }
2435
2436 src_reg
2437 vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
2438 src_reg *reladdr, int reg_offset)
2439 {
2440 if (reladdr) {
2441 src_reg index = src_reg(this, glsl_type::int_type);
2442
2443 emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2444
2445 /* Pre-gen6, the message header uses byte offsets instead of vec4
2446 * (16-byte) offset units.
2447 */
2448 if (intel->gen < 6) {
2449 emit_before(inst, MUL(dst_reg(index), index, src_reg(16)));
2450 }
2451
2452 return index;
2453 } else {
2454 int message_header_scale = intel->gen < 6 ? 16 : 1;
2455 return src_reg(reg_offset * message_header_scale);
2456 }
2457 }
2458
2459 /**
2460 * Emits an instruction before @inst to load the value named by @orig_src
2461 * from scratch space at @base_offset to @temp.
2462 *
2463 * @base_offset is measured in 32-byte units (the size of a register).
2464 */
2465 void
2466 vec4_visitor::emit_scratch_read(vec4_instruction *inst,
2467 dst_reg temp, src_reg orig_src,
2468 int base_offset)
2469 {
2470 int reg_offset = base_offset + orig_src.reg_offset;
2471 src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
2472
2473 emit_before(inst, SCRATCH_READ(temp, index));
2474 }
2475
2476 /**
2477 * Emits an instruction after @inst to store the value to be written
2478 * to @orig_dst to scratch space at @base_offset, from @temp.
2479 *
2480 * @base_offset is measured in 32-byte units (the size of a register).
2481 */
2482 void
2483 vec4_visitor::emit_scratch_write(vec4_instruction *inst, int base_offset)
2484 {
2485 int reg_offset = base_offset + inst->dst.reg_offset;
2486 src_reg index = get_scratch_offset(inst, inst->dst.reladdr, reg_offset);
2487
2488 /* Create a temporary register to store *inst's result in.
2489 *
2490 * We have to be careful in MOVing from our temporary result register in
2491 * the scratch write. If we swizzle from channels of the temporary that
2492 * weren't initialized, it will confuse live interval analysis, which will
2493 * make spilling fail to make progress.
2494 */
2495 src_reg temp = src_reg(this, glsl_type::vec4_type);
2496 temp.type = inst->dst.type;
2497 int first_writemask_chan = ffs(inst->dst.writemask) - 1;
2498 int swizzles[4];
2499 for (int i = 0; i < 4; i++)
2500 if (inst->dst.writemask & (1 << i))
2501 swizzles[i] = i;
2502 else
2503 swizzles[i] = first_writemask_chan;
2504 temp.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
2505 swizzles[2], swizzles[3]);
2506
2507 dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
2508 inst->dst.writemask));
2509 vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
2510 write->predicate = inst->predicate;
2511 write->ir = inst->ir;
2512 write->annotation = inst->annotation;
2513 inst->insert_after(write);
2514
2515 inst->dst.file = temp.file;
2516 inst->dst.reg = temp.reg;
2517 inst->dst.reg_offset = temp.reg_offset;
2518 inst->dst.reladdr = NULL;
2519 }
2520
2521 /**
2522 * We can't generally support array access in GRF space, because a
2523 * single instruction's destination can only span 2 contiguous
2524 * registers. So, we send all GRF arrays that get variable index
2525 * access to scratch space.
2526 */
2527 void
2528 vec4_visitor::move_grf_array_access_to_scratch()
2529 {
2530 int scratch_loc[this->virtual_grf_count];
2531
2532 for (int i = 0; i < this->virtual_grf_count; i++) {
2533 scratch_loc[i] = -1;
2534 }
2535
2536 /* First, calculate the set of virtual GRFs that need to be punted
2537 * to scratch due to having any array access on them, and where in
2538 * scratch.
2539 */
2540 foreach_list(node, &this->instructions) {
2541 vec4_instruction *inst = (vec4_instruction *)node;
2542
2543 if (inst->dst.file == GRF && inst->dst.reladdr &&
2544 scratch_loc[inst->dst.reg] == -1) {
2545 scratch_loc[inst->dst.reg] = c->last_scratch;
2546 c->last_scratch += this->virtual_grf_sizes[inst->dst.reg];
2547 }
2548
2549 for (int i = 0 ; i < 3; i++) {
2550 src_reg *src = &inst->src[i];
2551
2552 if (src->file == GRF && src->reladdr &&
2553 scratch_loc[src->reg] == -1) {
2554 scratch_loc[src->reg] = c->last_scratch;
2555 c->last_scratch += this->virtual_grf_sizes[src->reg];
2556 }
2557 }
2558 }
2559
2560 /* Now, for anything that will be accessed through scratch, rewrite
2561 * it to load/store. Note that this is a _safe list walk, because
2562 * we may generate a new scratch_write instruction after the one
2563 * we're processing.
2564 */
2565 foreach_list_safe(node, &this->instructions) {
2566 vec4_instruction *inst = (vec4_instruction *)node;
2567
2568 /* Set up the annotation tracking for new generated instructions. */
2569 base_ir = inst->ir;
2570 current_annotation = inst->annotation;
2571
2572 if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
2573 emit_scratch_write(inst, scratch_loc[inst->dst.reg]);
2574 }
2575
2576 for (int i = 0 ; i < 3; i++) {
2577 if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
2578 continue;
2579
2580 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2581
2582 emit_scratch_read(inst, temp, inst->src[i],
2583 scratch_loc[inst->src[i].reg]);
2584
2585 inst->src[i].file = temp.file;
2586 inst->src[i].reg = temp.reg;
2587 inst->src[i].reg_offset = temp.reg_offset;
2588 inst->src[i].reladdr = NULL;
2589 }
2590 }
2591 }
2592
2593 /**
2594 * Emits an instruction before @inst to load the value named by @orig_src
2595 * from the pull constant buffer (surface) at @base_offset to @temp.
2596 */
2597 void
2598 vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
2599 dst_reg temp, src_reg orig_src,
2600 int base_offset)
2601 {
2602 int reg_offset = base_offset + orig_src.reg_offset;
2603 src_reg index = src_reg((unsigned)SURF_INDEX_VERT_CONST_BUFFER);
2604 src_reg offset = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
2605 vec4_instruction *load;
2606
2607 load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
2608 temp, index, offset);
2609 load->base_mrf = 14;
2610 load->mlen = 1;
2611 emit_before(inst, load);
2612 }
2613
2614 /**
2615 * Implements array access of uniforms by inserting a
2616 * PULL_CONSTANT_LOAD instruction.
2617 *
2618 * Unlike temporary GRF array access (where we don't support it due to
2619 * the difficulty of doing relative addressing on instruction
2620 * destinations), we could potentially do array access of uniforms
2621 * that were loaded in GRF space as push constants. In real-world
2622 * usage we've seen, though, the arrays being used are always larger
2623 * than we could load as push constants, so just always move all
2624 * uniform array access out to a pull constant buffer.
2625 */
2626 void
2627 vec4_visitor::move_uniform_array_access_to_pull_constants()
2628 {
2629 int pull_constant_loc[this->uniforms];
2630
2631 for (int i = 0; i < this->uniforms; i++) {
2632 pull_constant_loc[i] = -1;
2633 }
2634
2635 /* Walk through and find array access of uniforms. Put a copy of that
2636 * uniform in the pull constant buffer.
2637 *
2638 * Note that we don't move constant-indexed accesses to arrays. No
2639 * testing has been done of the performance impact of this choice.
2640 */
2641 foreach_list_safe(node, &this->instructions) {
2642 vec4_instruction *inst = (vec4_instruction *)node;
2643
2644 for (int i = 0 ; i < 3; i++) {
2645 if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
2646 continue;
2647
2648 int uniform = inst->src[i].reg;
2649
2650 /* If this array isn't already present in the pull constant buffer,
2651 * add it.
2652 */
2653 if (pull_constant_loc[uniform] == -1) {
2654 const float **values = &prog_data->param[uniform * 4];
2655
2656 pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
2657
2658 for (int j = 0; j < uniform_size[uniform] * 4; j++) {
2659 prog_data->pull_param[prog_data->nr_pull_params++] = values[j];
2660 }
2661 }
2662
2663 /* Set up the annotation tracking for new generated instructions. */
2664 base_ir = inst->ir;
2665 current_annotation = inst->annotation;
2666
2667 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
2668
2669 emit_pull_constant_load(inst, temp, inst->src[i],
2670 pull_constant_loc[uniform]);
2671
2672 inst->src[i].file = temp.file;
2673 inst->src[i].reg = temp.reg;
2674 inst->src[i].reg_offset = temp.reg_offset;
2675 inst->src[i].reladdr = NULL;
2676 }
2677 }
2678
2679 /* Now there are no accesses of the UNIFORM file with a reladdr, so
2680 * no need to track them as larger-than-vec4 objects. This will be
2681 * relied on in cutting out unused uniform vectors from push
2682 * constants.
2683 */
2684 split_uniform_registers();
2685 }
2686
2687 void
2688 vec4_visitor::resolve_ud_negate(src_reg *reg)
2689 {
2690 if (reg->type != BRW_REGISTER_TYPE_UD ||
2691 !reg->negate)
2692 return;
2693
2694 src_reg temp = src_reg(this, glsl_type::uvec4_type);
2695 emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
2696 *reg = temp;
2697 }
2698
2699 vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
2700 struct gl_shader_program *prog,
2701 struct brw_shader *shader)
2702 {
2703 this->c = c;
2704 this->p = &c->func;
2705 this->brw = p->brw;
2706 this->intel = &brw->intel;
2707 this->ctx = &intel->ctx;
2708 this->prog = prog;
2709 this->shader = shader;
2710
2711 this->mem_ctx = ralloc_context(NULL);
2712 this->failed = false;
2713
2714 this->base_ir = NULL;
2715 this->current_annotation = NULL;
2716
2717 this->c = c;
2718 this->vp = &c->vp->program;
2719 this->prog_data = &c->prog_data;
2720
2721 this->variable_ht = hash_table_ctor(0,
2722 hash_table_pointer_hash,
2723 hash_table_pointer_compare);
2724
2725 this->virtual_grf_def = NULL;
2726 this->virtual_grf_use = NULL;
2727 this->virtual_grf_sizes = NULL;
2728 this->virtual_grf_count = 0;
2729 this->virtual_grf_reg_map = NULL;
2730 this->virtual_grf_reg_count = 0;
2731 this->virtual_grf_array_size = 0;
2732 this->live_intervals_valid = false;
2733
2734 this->max_grf = intel->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
2735
2736 this->uniforms = 0;
2737 }
2738
2739 vec4_visitor::~vec4_visitor()
2740 {
2741 ralloc_free(this->mem_ctx);
2742 hash_table_dtor(this->variable_ht);
2743 }
2744
2745
2746 void
2747 vec4_visitor::fail(const char *format, ...)
2748 {
2749 va_list va;
2750 char *msg;
2751
2752 if (failed)
2753 return;
2754
2755 failed = true;
2756
2757 va_start(va, format);
2758 msg = ralloc_vasprintf(mem_ctx, format, va);
2759 va_end(va);
2760 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
2761
2762 this->fail_msg = msg;
2763
2764 if (INTEL_DEBUG & DEBUG_VS) {
2765 fprintf(stderr, "%s", msg);
2766 }
2767 }
2768
2769 } /* namespace brw */