glsl: move variables in to ir_variable::data, part I
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_visitor.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25 #include "glsl/ir_uniform.h"
26 extern "C" {
27 #include "program/sampler.h"
28 }
29
30 namespace brw {
31
32 vec4_instruction::vec4_instruction(vec4_visitor *v,
33 enum opcode opcode, dst_reg dst,
34 src_reg src0, src_reg src1, src_reg src2)
35 {
36 this->opcode = opcode;
37 this->dst = dst;
38 this->src[0] = src0;
39 this->src[1] = src1;
40 this->src[2] = src2;
41 this->saturate = false;
42 this->force_writemask_all = false;
43 this->no_dd_clear = false;
44 this->no_dd_check = false;
45 this->conditional_mod = BRW_CONDITIONAL_NONE;
46 this->sampler = 0;
47 this->texture_offset = 0;
48 this->target = 0;
49 this->shadow_compare = false;
50 this->ir = v->base_ir;
51 this->urb_write_flags = BRW_URB_WRITE_NO_FLAGS;
52 this->header_present = false;
53 this->mlen = 0;
54 this->base_mrf = 0;
55 this->offset = 0;
56 this->annotation = v->current_annotation;
57 }
58
59 vec4_instruction *
60 vec4_visitor::emit(vec4_instruction *inst)
61 {
62 this->instructions.push_tail(inst);
63
64 return inst;
65 }
66
67 vec4_instruction *
68 vec4_visitor::emit_before(vec4_instruction *inst, vec4_instruction *new_inst)
69 {
70 new_inst->ir = inst->ir;
71 new_inst->annotation = inst->annotation;
72
73 inst->insert_before(new_inst);
74
75 return inst;
76 }
77
78 vec4_instruction *
79 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
80 src_reg src0, src_reg src1, src_reg src2)
81 {
82 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
83 src0, src1, src2));
84 }
85
86
87 vec4_instruction *
88 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
89 {
90 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
91 }
92
93 vec4_instruction *
94 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
95 {
96 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
97 }
98
99 vec4_instruction *
100 vec4_visitor::emit(enum opcode opcode, dst_reg dst)
101 {
102 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst));
103 }
104
105 vec4_instruction *
106 vec4_visitor::emit(enum opcode opcode)
107 {
108 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
109 }
110
111 #define ALU1(op) \
112 vec4_instruction * \
113 vec4_visitor::op(dst_reg dst, src_reg src0) \
114 { \
115 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
116 src0); \
117 }
118
119 #define ALU2(op) \
120 vec4_instruction * \
121 vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \
122 { \
123 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
124 src0, src1); \
125 }
126
127 #define ALU3(op) \
128 vec4_instruction * \
129 vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1, src_reg src2)\
130 { \
131 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
132 src0, src1, src2); \
133 }
134
135 ALU1(NOT)
136 ALU1(MOV)
137 ALU1(FRC)
138 ALU1(RNDD)
139 ALU1(RNDE)
140 ALU1(RNDZ)
141 ALU1(F32TO16)
142 ALU1(F16TO32)
143 ALU2(ADD)
144 ALU2(MUL)
145 ALU2(MACH)
146 ALU2(AND)
147 ALU2(OR)
148 ALU2(XOR)
149 ALU2(DP3)
150 ALU2(DP4)
151 ALU2(DPH)
152 ALU2(SHL)
153 ALU2(SHR)
154 ALU2(ASR)
155 ALU3(LRP)
156 ALU1(BFREV)
157 ALU3(BFE)
158 ALU2(BFI1)
159 ALU3(BFI2)
160 ALU1(FBH)
161 ALU1(FBL)
162 ALU1(CBIT)
163 ALU3(MAD)
164 ALU2(ADDC)
165 ALU2(SUBB)
166
167 /** Gen4 predicated IF. */
168 vec4_instruction *
169 vec4_visitor::IF(uint32_t predicate)
170 {
171 vec4_instruction *inst;
172
173 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF);
174 inst->predicate = predicate;
175
176 return inst;
177 }
178
179 /** Gen6 IF with embedded comparison. */
180 vec4_instruction *
181 vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition)
182 {
183 assert(brw->gen == 6);
184
185 vec4_instruction *inst;
186
187 resolve_ud_negate(&src0);
188 resolve_ud_negate(&src1);
189
190 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF, dst_null_d(),
191 src0, src1);
192 inst->conditional_mod = condition;
193
194 return inst;
195 }
196
197 /**
198 * CMP: Sets the low bit of the destination channels with the result
199 * of the comparison, while the upper bits are undefined, and updates
200 * the flag register with the packed 16 bits of the result.
201 */
202 vec4_instruction *
203 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition)
204 {
205 vec4_instruction *inst;
206
207 /* original gen4 does type conversion to the destination type
208 * before before comparison, producing garbage results for floating
209 * point comparisons.
210 */
211 if (brw->gen == 4) {
212 dst.type = src0.type;
213 if (dst.file == HW_REG)
214 dst.fixed_hw_reg.type = dst.type;
215 }
216
217 resolve_ud_negate(&src0);
218 resolve_ud_negate(&src1);
219
220 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_CMP, dst, src0, src1);
221 inst->conditional_mod = condition;
222
223 return inst;
224 }
225
226 vec4_instruction *
227 vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
228 {
229 vec4_instruction *inst;
230
231 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_GEN4_SCRATCH_READ,
232 dst, index);
233 inst->base_mrf = 14;
234 inst->mlen = 2;
235
236 return inst;
237 }
238
239 vec4_instruction *
240 vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index)
241 {
242 vec4_instruction *inst;
243
244 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_GEN4_SCRATCH_WRITE,
245 dst, src, index);
246 inst->base_mrf = 13;
247 inst->mlen = 3;
248
249 return inst;
250 }
251
252 void
253 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
254 {
255 static enum opcode dot_opcodes[] = {
256 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
257 };
258
259 emit(dot_opcodes[elements - 2], dst, src0, src1);
260 }
261
262 src_reg
263 vec4_visitor::fix_3src_operand(src_reg src)
264 {
265 /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
266 * able to use vertical stride of zero to replicate the vec4 uniform, like
267 *
268 * g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
269 *
270 * But you can't, since vertical stride is always four in three-source
271 * instructions. Instead, insert a MOV instruction to do the replication so
272 * that the three-source instruction can consume it.
273 */
274
275 /* The MOV is only needed if the source is a uniform or immediate. */
276 if (src.file != UNIFORM && src.file != IMM)
277 return src;
278
279 dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
280 expanded.type = src.type;
281 emit(MOV(expanded, src));
282 return src_reg(expanded);
283 }
284
285 src_reg
286 vec4_visitor::fix_math_operand(src_reg src)
287 {
288 /* The gen6 math instruction ignores the source modifiers --
289 * swizzle, abs, negate, and at least some parts of the register
290 * region description.
291 *
292 * Rather than trying to enumerate all these cases, *always* expand the
293 * operand to a temp GRF for gen6.
294 *
295 * For gen7, keep the operand as-is, except if immediate, which gen7 still
296 * can't use.
297 */
298
299 if (brw->gen == 7 && src.file != IMM)
300 return src;
301
302 dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
303 expanded.type = src.type;
304 emit(MOV(expanded, src));
305 return src_reg(expanded);
306 }
307
308 void
309 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
310 {
311 src = fix_math_operand(src);
312
313 if (dst.writemask != WRITEMASK_XYZW) {
314 /* The gen6 math instruction must be align1, so we can't do
315 * writemasks.
316 */
317 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
318
319 emit(opcode, temp_dst, src);
320
321 emit(MOV(dst, src_reg(temp_dst)));
322 } else {
323 emit(opcode, dst, src);
324 }
325 }
326
327 void
328 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
329 {
330 vec4_instruction *inst = emit(opcode, dst, src);
331 inst->base_mrf = 1;
332 inst->mlen = 1;
333 }
334
335 void
336 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
337 {
338 switch (opcode) {
339 case SHADER_OPCODE_RCP:
340 case SHADER_OPCODE_RSQ:
341 case SHADER_OPCODE_SQRT:
342 case SHADER_OPCODE_EXP2:
343 case SHADER_OPCODE_LOG2:
344 case SHADER_OPCODE_SIN:
345 case SHADER_OPCODE_COS:
346 break;
347 default:
348 assert(!"not reached: bad math opcode");
349 return;
350 }
351
352 if (brw->gen >= 6) {
353 return emit_math1_gen6(opcode, dst, src);
354 } else {
355 return emit_math1_gen4(opcode, dst, src);
356 }
357 }
358
359 void
360 vec4_visitor::emit_math2_gen6(enum opcode opcode,
361 dst_reg dst, src_reg src0, src_reg src1)
362 {
363 src0 = fix_math_operand(src0);
364 src1 = fix_math_operand(src1);
365
366 if (dst.writemask != WRITEMASK_XYZW) {
367 /* The gen6 math instruction must be align1, so we can't do
368 * writemasks.
369 */
370 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
371 temp_dst.type = dst.type;
372
373 emit(opcode, temp_dst, src0, src1);
374
375 emit(MOV(dst, src_reg(temp_dst)));
376 } else {
377 emit(opcode, dst, src0, src1);
378 }
379 }
380
381 void
382 vec4_visitor::emit_math2_gen4(enum opcode opcode,
383 dst_reg dst, src_reg src0, src_reg src1)
384 {
385 vec4_instruction *inst = emit(opcode, dst, src0, src1);
386 inst->base_mrf = 1;
387 inst->mlen = 2;
388 }
389
390 void
391 vec4_visitor::emit_math(enum opcode opcode,
392 dst_reg dst, src_reg src0, src_reg src1)
393 {
394 switch (opcode) {
395 case SHADER_OPCODE_POW:
396 case SHADER_OPCODE_INT_QUOTIENT:
397 case SHADER_OPCODE_INT_REMAINDER:
398 break;
399 default:
400 assert(!"not reached: unsupported binary math opcode");
401 return;
402 }
403
404 if (brw->gen >= 6) {
405 return emit_math2_gen6(opcode, dst, src0, src1);
406 } else {
407 return emit_math2_gen4(opcode, dst, src0, src1);
408 }
409 }
410
411 void
412 vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0)
413 {
414 if (brw->gen < 7)
415 assert(!"ir_unop_pack_half_2x16 should be lowered");
416
417 assert(dst.type == BRW_REGISTER_TYPE_UD);
418 assert(src0.type == BRW_REGISTER_TYPE_F);
419
420 /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
421 *
422 * Because this instruction does not have a 16-bit floating-point type,
423 * the destination data type must be Word (W).
424 *
425 * The destination must be DWord-aligned and specify a horizontal stride
426 * (HorzStride) of 2. The 16-bit result is stored in the lower word of
427 * each destination channel and the upper word is not modified.
428 *
429 * The above restriction implies that the f32to16 instruction must use
430 * align1 mode, because only in align1 mode is it possible to specify
431 * horizontal stride. We choose here to defy the hardware docs and emit
432 * align16 instructions.
433 *
434 * (I [chadv] did attempt to emit align1 instructions for VS f32to16
435 * instructions. I was partially successful in that the code passed all
436 * tests. However, the code was dubiously correct and fragile, and the
437 * tests were not harsh enough to probe that frailty. Not trusting the
438 * code, I chose instead to remain in align16 mode in defiance of the hw
439 * docs).
440 *
441 * I've [chadv] experimentally confirmed that, on gen7 hardware and the
442 * simulator, emitting a f32to16 in align16 mode with UD as destination
443 * data type is safe. The behavior differs from that specified in the PRM
444 * in that the upper word of each destination channel is cleared to 0.
445 */
446
447 dst_reg tmp_dst(this, glsl_type::uvec2_type);
448 src_reg tmp_src(tmp_dst);
449
450 #if 0
451 /* Verify the undocumented behavior on which the following instructions
452 * rely. If f32to16 fails to clear the upper word of the X and Y channels,
453 * then the result of the bit-or instruction below will be incorrect.
454 *
455 * You should inspect the disasm output in order to verify that the MOV is
456 * not optimized away.
457 */
458 emit(MOV(tmp_dst, src_reg(0x12345678u)));
459 #endif
460
461 /* Give tmp the form below, where "." means untouched.
462 *
463 * w z y x w z y x
464 * |.|.|0x0000hhhh|0x0000llll|.|.|0x0000hhhh|0x0000llll|
465 *
466 * That the upper word of each write-channel be 0 is required for the
467 * following bit-shift and bit-or instructions to work. Note that this
468 * relies on the undocumented hardware behavior mentioned above.
469 */
470 tmp_dst.writemask = WRITEMASK_XY;
471 emit(F32TO16(tmp_dst, src0));
472
473 /* Give the write-channels of dst the form:
474 * 0xhhhh0000
475 */
476 tmp_src.swizzle = SWIZZLE_Y;
477 emit(SHL(dst, tmp_src, src_reg(16u)));
478
479 /* Finally, give the write-channels of dst the form of packHalf2x16's
480 * output:
481 * 0xhhhhllll
482 */
483 tmp_src.swizzle = SWIZZLE_X;
484 emit(OR(dst, src_reg(dst), tmp_src));
485 }
486
487 void
488 vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0)
489 {
490 if (brw->gen < 7)
491 assert(!"ir_unop_unpack_half_2x16 should be lowered");
492
493 assert(dst.type == BRW_REGISTER_TYPE_F);
494 assert(src0.type == BRW_REGISTER_TYPE_UD);
495
496 /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
497 *
498 * Because this instruction does not have a 16-bit floating-point type,
499 * the source data type must be Word (W). The destination type must be
500 * F (Float).
501 *
502 * To use W as the source data type, we must adjust horizontal strides,
503 * which is only possible in align1 mode. All my [chadv] attempts at
504 * emitting align1 instructions for unpackHalf2x16 failed to pass the
505 * Piglit tests, so I gave up.
506 *
507 * I've verified that, on gen7 hardware and the simulator, it is safe to
508 * emit f16to32 in align16 mode with UD as source data type.
509 */
510
511 dst_reg tmp_dst(this, glsl_type::uvec2_type);
512 src_reg tmp_src(tmp_dst);
513
514 tmp_dst.writemask = WRITEMASK_X;
515 emit(AND(tmp_dst, src0, src_reg(0xffffu)));
516
517 tmp_dst.writemask = WRITEMASK_Y;
518 emit(SHR(tmp_dst, src0, src_reg(16u)));
519
520 dst.writemask = WRITEMASK_XY;
521 emit(F16TO32(dst, tmp_src));
522 }
523
524 void
525 vec4_visitor::visit_instructions(const exec_list *list)
526 {
527 foreach_list(node, list) {
528 ir_instruction *ir = (ir_instruction *)node;
529
530 base_ir = ir;
531 ir->accept(this);
532 }
533 }
534
535
536 static int
537 type_size(const struct glsl_type *type)
538 {
539 unsigned int i;
540 int size;
541
542 switch (type->base_type) {
543 case GLSL_TYPE_UINT:
544 case GLSL_TYPE_INT:
545 case GLSL_TYPE_FLOAT:
546 case GLSL_TYPE_BOOL:
547 if (type->is_matrix()) {
548 return type->matrix_columns;
549 } else {
550 /* Regardless of size of vector, it gets a vec4. This is bad
551 * packing for things like floats, but otherwise arrays become a
552 * mess. Hopefully a later pass over the code can pack scalars
553 * down if appropriate.
554 */
555 return 1;
556 }
557 case GLSL_TYPE_ARRAY:
558 assert(type->length > 0);
559 return type_size(type->fields.array) * type->length;
560 case GLSL_TYPE_STRUCT:
561 size = 0;
562 for (i = 0; i < type->length; i++) {
563 size += type_size(type->fields.structure[i].type);
564 }
565 return size;
566 case GLSL_TYPE_SAMPLER:
567 /* Samplers take up one slot in UNIFORMS[], but they're baked in
568 * at link time.
569 */
570 return 1;
571 case GLSL_TYPE_ATOMIC_UINT:
572 return 0;
573 case GLSL_TYPE_VOID:
574 case GLSL_TYPE_ERROR:
575 case GLSL_TYPE_INTERFACE:
576 assert(0);
577 break;
578 }
579
580 return 0;
581 }
582
583 int
584 vec4_visitor::virtual_grf_alloc(int size)
585 {
586 if (virtual_grf_array_size <= virtual_grf_count) {
587 if (virtual_grf_array_size == 0)
588 virtual_grf_array_size = 16;
589 else
590 virtual_grf_array_size *= 2;
591 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
592 virtual_grf_array_size);
593 virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int,
594 virtual_grf_array_size);
595 }
596 virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count;
597 virtual_grf_reg_count += size;
598 virtual_grf_sizes[virtual_grf_count] = size;
599 return virtual_grf_count++;
600 }
601
602 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
603 {
604 init();
605
606 this->file = GRF;
607 this->reg = v->virtual_grf_alloc(type_size(type));
608
609 if (type->is_array() || type->is_record()) {
610 this->swizzle = BRW_SWIZZLE_NOOP;
611 } else {
612 this->swizzle = swizzle_for_size(type->vector_elements);
613 }
614
615 this->type = brw_type_for_base_type(type);
616 }
617
618 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
619 {
620 init();
621
622 this->file = GRF;
623 this->reg = v->virtual_grf_alloc(type_size(type));
624
625 if (type->is_array() || type->is_record()) {
626 this->writemask = WRITEMASK_XYZW;
627 } else {
628 this->writemask = (1 << type->vector_elements) - 1;
629 }
630
631 this->type = brw_type_for_base_type(type);
632 }
633
634 /* Our support for uniforms is piggy-backed on the struct
635 * gl_fragment_program, because that's where the values actually
636 * get stored, rather than in some global gl_shader_program uniform
637 * store.
638 */
639 void
640 vec4_visitor::setup_uniform_values(ir_variable *ir)
641 {
642 int namelen = strlen(ir->name);
643
644 /* The data for our (non-builtin) uniforms is stored in a series of
645 * gl_uniform_driver_storage structs for each subcomponent that
646 * glGetUniformLocation() could name. We know it's been set up in the same
647 * order we'd walk the type, so walk the list of storage and find anything
648 * with our name, or the prefix of a component that starts with our name.
649 */
650 for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) {
651 struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
652
653 if (strncmp(ir->name, storage->name, namelen) != 0 ||
654 (storage->name[namelen] != 0 &&
655 storage->name[namelen] != '.' &&
656 storage->name[namelen] != '[')) {
657 continue;
658 }
659
660 gl_constant_value *components = storage->storage;
661 unsigned vector_count = (MAX2(storage->array_elements, 1) *
662 storage->type->matrix_columns);
663
664 for (unsigned s = 0; s < vector_count; s++) {
665 uniform_vector_size[uniforms] = storage->type->vector_elements;
666
667 int i;
668 for (i = 0; i < uniform_vector_size[uniforms]; i++) {
669 prog_data->param[uniforms * 4 + i] = &components->f;
670 components++;
671 }
672 for (; i < 4; i++) {
673 static float zero = 0;
674 prog_data->param[uniforms * 4 + i] = &zero;
675 }
676
677 uniforms++;
678 }
679 }
680 }
681
682 void
683 vec4_visitor::setup_uniform_clipplane_values()
684 {
685 gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
686
687 for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
688 this->uniform_vector_size[this->uniforms] = 4;
689 this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
690 this->userplane[i].type = BRW_REGISTER_TYPE_F;
691 for (int j = 0; j < 4; ++j) {
692 prog_data->param[this->uniforms * 4 + j] = &clip_planes[i][j];
693 }
694 ++this->uniforms;
695 }
696 }
697
698 /* Our support for builtin uniforms is even scarier than non-builtin.
699 * It sits on top of the PROG_STATE_VAR parameters that are
700 * automatically updated from GL context state.
701 */
702 void
703 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
704 {
705 const ir_state_slot *const slots = ir->state_slots;
706 assert(ir->state_slots != NULL);
707
708 for (unsigned int i = 0; i < ir->num_state_slots; i++) {
709 /* This state reference has already been setup by ir_to_mesa,
710 * but we'll get the same index back here. We can reference
711 * ParameterValues directly, since unlike brw_fs.cpp, we never
712 * add new state references during compile.
713 */
714 int index = _mesa_add_state_reference(this->prog->Parameters,
715 (gl_state_index *)slots[i].tokens);
716 float *values = &this->prog->Parameters->ParameterValues[index][0].f;
717
718 this->uniform_vector_size[this->uniforms] = 0;
719 /* Add each of the unique swizzled channels of the element.
720 * This will end up matching the size of the glsl_type of this field.
721 */
722 int last_swiz = -1;
723 for (unsigned int j = 0; j < 4; j++) {
724 int swiz = GET_SWZ(slots[i].swizzle, j);
725 last_swiz = swiz;
726
727 prog_data->param[this->uniforms * 4 + j] = &values[swiz];
728 if (swiz <= last_swiz)
729 this->uniform_vector_size[this->uniforms]++;
730 }
731 this->uniforms++;
732 }
733 }
734
735 dst_reg *
736 vec4_visitor::variable_storage(ir_variable *var)
737 {
738 return (dst_reg *)hash_table_find(this->variable_ht, var);
739 }
740
741 void
742 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate)
743 {
744 ir_expression *expr = ir->as_expression();
745
746 *predicate = BRW_PREDICATE_NORMAL;
747
748 if (expr) {
749 src_reg op[2];
750 vec4_instruction *inst;
751
752 assert(expr->get_num_operands() <= 2);
753 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
754 expr->operands[i]->accept(this);
755 op[i] = this->result;
756
757 resolve_ud_negate(&op[i]);
758 }
759
760 switch (expr->operation) {
761 case ir_unop_logic_not:
762 inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
763 inst->conditional_mod = BRW_CONDITIONAL_Z;
764 break;
765
766 case ir_binop_logic_xor:
767 inst = emit(XOR(dst_null_d(), op[0], op[1]));
768 inst->conditional_mod = BRW_CONDITIONAL_NZ;
769 break;
770
771 case ir_binop_logic_or:
772 inst = emit(OR(dst_null_d(), op[0], op[1]));
773 inst->conditional_mod = BRW_CONDITIONAL_NZ;
774 break;
775
776 case ir_binop_logic_and:
777 inst = emit(AND(dst_null_d(), op[0], op[1]));
778 inst->conditional_mod = BRW_CONDITIONAL_NZ;
779 break;
780
781 case ir_unop_f2b:
782 if (brw->gen >= 6) {
783 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
784 } else {
785 inst = emit(MOV(dst_null_f(), op[0]));
786 inst->conditional_mod = BRW_CONDITIONAL_NZ;
787 }
788 break;
789
790 case ir_unop_i2b:
791 if (brw->gen >= 6) {
792 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
793 } else {
794 inst = emit(MOV(dst_null_d(), op[0]));
795 inst->conditional_mod = BRW_CONDITIONAL_NZ;
796 }
797 break;
798
799 case ir_binop_all_equal:
800 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
801 *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
802 break;
803
804 case ir_binop_any_nequal:
805 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
806 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
807 break;
808
809 case ir_unop_any:
810 inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
811 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
812 break;
813
814 case ir_binop_greater:
815 case ir_binop_gequal:
816 case ir_binop_less:
817 case ir_binop_lequal:
818 case ir_binop_equal:
819 case ir_binop_nequal:
820 emit(CMP(dst_null_d(), op[0], op[1],
821 brw_conditional_for_comparison(expr->operation)));
822 break;
823
824 default:
825 assert(!"not reached");
826 break;
827 }
828 return;
829 }
830
831 ir->accept(this);
832
833 resolve_ud_negate(&this->result);
834
835 if (brw->gen >= 6) {
836 vec4_instruction *inst = emit(AND(dst_null_d(),
837 this->result, src_reg(1)));
838 inst->conditional_mod = BRW_CONDITIONAL_NZ;
839 } else {
840 vec4_instruction *inst = emit(MOV(dst_null_d(), this->result));
841 inst->conditional_mod = BRW_CONDITIONAL_NZ;
842 }
843 }
844
845 /**
846 * Emit a gen6 IF statement with the comparison folded into the IF
847 * instruction.
848 */
849 void
850 vec4_visitor::emit_if_gen6(ir_if *ir)
851 {
852 ir_expression *expr = ir->condition->as_expression();
853
854 if (expr) {
855 src_reg op[2];
856 dst_reg temp;
857
858 assert(expr->get_num_operands() <= 2);
859 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
860 expr->operands[i]->accept(this);
861 op[i] = this->result;
862 }
863
864 switch (expr->operation) {
865 case ir_unop_logic_not:
866 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
867 return;
868
869 case ir_binop_logic_xor:
870 emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
871 return;
872
873 case ir_binop_logic_or:
874 temp = dst_reg(this, glsl_type::bool_type);
875 emit(OR(temp, op[0], op[1]));
876 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
877 return;
878
879 case ir_binop_logic_and:
880 temp = dst_reg(this, glsl_type::bool_type);
881 emit(AND(temp, op[0], op[1]));
882 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
883 return;
884
885 case ir_unop_f2b:
886 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
887 return;
888
889 case ir_unop_i2b:
890 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
891 return;
892
893 case ir_binop_greater:
894 case ir_binop_gequal:
895 case ir_binop_less:
896 case ir_binop_lequal:
897 case ir_binop_equal:
898 case ir_binop_nequal:
899 emit(IF(op[0], op[1],
900 brw_conditional_for_comparison(expr->operation)));
901 return;
902
903 case ir_binop_all_equal:
904 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
905 emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
906 return;
907
908 case ir_binop_any_nequal:
909 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
910 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
911 return;
912
913 case ir_unop_any:
914 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
915 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
916 return;
917
918 default:
919 assert(!"not reached");
920 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
921 return;
922 }
923 return;
924 }
925
926 ir->condition->accept(this);
927
928 emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
929 }
930
931 dst_reg
932 with_writemask(dst_reg const & r, int mask)
933 {
934 dst_reg result = r;
935 result.writemask = mask;
936 return result;
937 }
938
939
940 void
941 vec4_visitor::visit(ir_variable *ir)
942 {
943 dst_reg *reg = NULL;
944
945 if (variable_storage(ir))
946 return;
947
948 switch (ir->data.mode) {
949 case ir_var_shader_in:
950 reg = new(mem_ctx) dst_reg(ATTR, ir->location);
951 break;
952
953 case ir_var_shader_out:
954 reg = new(mem_ctx) dst_reg(this, ir->type);
955
956 for (int i = 0; i < type_size(ir->type); i++) {
957 output_reg[ir->location + i] = *reg;
958 output_reg[ir->location + i].reg_offset = i;
959 output_reg[ir->location + i].type =
960 brw_type_for_base_type(ir->type->get_scalar_type());
961 output_reg_annotation[ir->location + i] = ir->name;
962 }
963 break;
964
965 case ir_var_auto:
966 case ir_var_temporary:
967 reg = new(mem_ctx) dst_reg(this, ir->type);
968 break;
969
970 case ir_var_uniform:
971 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
972
973 /* Thanks to the lower_ubo_reference pass, we will see only
974 * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
975 * variables, so no need for them to be in variable_ht.
976 *
977 * Atomic counters take no uniform storage, no need to do
978 * anything here.
979 */
980 if (ir->is_in_uniform_block() || ir->type->contains_atomic())
981 return;
982
983 /* Track how big the whole uniform variable is, in case we need to put a
984 * copy of its data into pull constants for array access.
985 */
986 this->uniform_size[this->uniforms] = type_size(ir->type);
987
988 if (!strncmp(ir->name, "gl_", 3)) {
989 setup_builtin_uniform_values(ir);
990 } else {
991 setup_uniform_values(ir);
992 }
993 break;
994
995 case ir_var_system_value:
996 reg = make_reg_for_system_value(ir);
997 break;
998
999 default:
1000 assert(!"not reached");
1001 }
1002
1003 reg->type = brw_type_for_base_type(ir->type);
1004 hash_table_insert(this->variable_ht, reg, ir);
1005 }
1006
1007 void
1008 vec4_visitor::visit(ir_loop *ir)
1009 {
1010 /* We don't want debugging output to print the whole body of the
1011 * loop as the annotation.
1012 */
1013 this->base_ir = NULL;
1014
1015 emit(BRW_OPCODE_DO);
1016
1017 visit_instructions(&ir->body_instructions);
1018
1019 emit(BRW_OPCODE_WHILE);
1020 }
1021
1022 void
1023 vec4_visitor::visit(ir_loop_jump *ir)
1024 {
1025 switch (ir->mode) {
1026 case ir_loop_jump::jump_break:
1027 emit(BRW_OPCODE_BREAK);
1028 break;
1029 case ir_loop_jump::jump_continue:
1030 emit(BRW_OPCODE_CONTINUE);
1031 break;
1032 }
1033 }
1034
1035
1036 void
1037 vec4_visitor::visit(ir_function_signature *ir)
1038 {
1039 assert(0);
1040 (void)ir;
1041 }
1042
1043 void
1044 vec4_visitor::visit(ir_function *ir)
1045 {
1046 /* Ignore function bodies other than main() -- we shouldn't see calls to
1047 * them since they should all be inlined.
1048 */
1049 if (strcmp(ir->name, "main") == 0) {
1050 const ir_function_signature *sig;
1051 exec_list empty;
1052
1053 sig = ir->matching_signature(NULL, &empty);
1054
1055 assert(sig);
1056
1057 visit_instructions(&sig->body);
1058 }
1059 }
1060
1061 bool
1062 vec4_visitor::try_emit_sat(ir_expression *ir)
1063 {
1064 ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
1065 if (!sat_src)
1066 return false;
1067
1068 sat_src->accept(this);
1069 src_reg src = this->result;
1070
1071 this->result = src_reg(this, ir->type);
1072 vec4_instruction *inst;
1073 inst = emit(MOV(dst_reg(this->result), src));
1074 inst->saturate = true;
1075
1076 return true;
1077 }
1078
1079 bool
1080 vec4_visitor::try_emit_mad(ir_expression *ir, int mul_arg)
1081 {
1082 /* 3-src instructions were introduced in gen6. */
1083 if (brw->gen < 6)
1084 return false;
1085
1086 /* MAD can only handle floating-point data. */
1087 if (ir->type->base_type != GLSL_TYPE_FLOAT)
1088 return false;
1089
1090 ir_rvalue *nonmul = ir->operands[1 - mul_arg];
1091 ir_expression *mul = ir->operands[mul_arg]->as_expression();
1092
1093 if (!mul || mul->operation != ir_binop_mul)
1094 return false;
1095
1096 nonmul->accept(this);
1097 src_reg src0 = fix_3src_operand(this->result);
1098
1099 mul->operands[0]->accept(this);
1100 src_reg src1 = fix_3src_operand(this->result);
1101
1102 mul->operands[1]->accept(this);
1103 src_reg src2 = fix_3src_operand(this->result);
1104
1105 this->result = src_reg(this, ir->type);
1106 emit(BRW_OPCODE_MAD, dst_reg(this->result), src0, src1, src2);
1107
1108 return true;
1109 }
1110
1111 void
1112 vec4_visitor::emit_bool_comparison(unsigned int op,
1113 dst_reg dst, src_reg src0, src_reg src1)
1114 {
1115 /* original gen4 does destination conversion before comparison. */
1116 if (brw->gen < 5)
1117 dst.type = src0.type;
1118
1119 emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op)));
1120
1121 dst.type = BRW_REGISTER_TYPE_D;
1122 emit(AND(dst, src_reg(dst), src_reg(0x1)));
1123 }
1124
1125 void
1126 vec4_visitor::emit_minmax(uint32_t conditionalmod, dst_reg dst,
1127 src_reg src0, src_reg src1)
1128 {
1129 vec4_instruction *inst;
1130
1131 if (brw->gen >= 6) {
1132 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
1133 inst->conditional_mod = conditionalmod;
1134 } else {
1135 emit(CMP(dst, src0, src1, conditionalmod));
1136
1137 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
1138 inst->predicate = BRW_PREDICATE_NORMAL;
1139 }
1140 }
1141
1142 static bool
1143 is_16bit_constant(ir_rvalue *rvalue)
1144 {
1145 ir_constant *constant = rvalue->as_constant();
1146 if (!constant)
1147 return false;
1148
1149 if (constant->type != glsl_type::int_type &&
1150 constant->type != glsl_type::uint_type)
1151 return false;
1152
1153 return constant->value.u[0] < (1 << 16);
1154 }
1155
1156 void
1157 vec4_visitor::visit(ir_expression *ir)
1158 {
1159 unsigned int operand;
1160 src_reg op[Elements(ir->operands)];
1161 src_reg result_src;
1162 dst_reg result_dst;
1163 vec4_instruction *inst;
1164
1165 if (try_emit_sat(ir))
1166 return;
1167
1168 if (ir->operation == ir_binop_add) {
1169 if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1))
1170 return;
1171 }
1172
1173 for (operand = 0; operand < ir->get_num_operands(); operand++) {
1174 this->result.file = BAD_FILE;
1175 ir->operands[operand]->accept(this);
1176 if (this->result.file == BAD_FILE) {
1177 printf("Failed to get tree for expression operand:\n");
1178 ir->operands[operand]->print();
1179 exit(1);
1180 }
1181 op[operand] = this->result;
1182
1183 /* Matrix expression operands should have been broken down to vector
1184 * operations already.
1185 */
1186 assert(!ir->operands[operand]->type->is_matrix());
1187 }
1188
1189 int vector_elements = ir->operands[0]->type->vector_elements;
1190 if (ir->operands[1]) {
1191 vector_elements = MAX2(vector_elements,
1192 ir->operands[1]->type->vector_elements);
1193 }
1194
1195 this->result.file = BAD_FILE;
1196
1197 /* Storage for our result. Ideally for an assignment we'd be using
1198 * the actual storage for the result here, instead.
1199 */
1200 result_src = src_reg(this, ir->type);
1201 /* convenience for the emit functions below. */
1202 result_dst = dst_reg(result_src);
1203 /* If nothing special happens, this is the result. */
1204 this->result = result_src;
1205 /* Limit writes to the channels that will be used by result_src later.
1206 * This does limit this temp's use as a temporary for multi-instruction
1207 * sequences.
1208 */
1209 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1210
1211 switch (ir->operation) {
1212 case ir_unop_logic_not:
1213 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
1214 * ones complement of the whole register, not just bit 0.
1215 */
1216 emit(XOR(result_dst, op[0], src_reg(1)));
1217 break;
1218 case ir_unop_neg:
1219 op[0].negate = !op[0].negate;
1220 emit(MOV(result_dst, op[0]));
1221 break;
1222 case ir_unop_abs:
1223 op[0].abs = true;
1224 op[0].negate = false;
1225 emit(MOV(result_dst, op[0]));
1226 break;
1227
1228 case ir_unop_sign:
1229 if (ir->type->is_float()) {
1230 /* AND(val, 0x80000000) gives the sign bit.
1231 *
1232 * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
1233 * zero.
1234 */
1235 emit(CMP(dst_null_f(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1236
1237 op[0].type = BRW_REGISTER_TYPE_UD;
1238 result_dst.type = BRW_REGISTER_TYPE_UD;
1239 emit(AND(result_dst, op[0], src_reg(0x80000000u)));
1240
1241 inst = emit(OR(result_dst, src_reg(result_dst), src_reg(0x3f800000u)));
1242 inst->predicate = BRW_PREDICATE_NORMAL;
1243
1244 this->result.type = BRW_REGISTER_TYPE_F;
1245 } else {
1246 /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
1247 * -> non-negative val generates 0x00000000.
1248 * Predicated OR sets 1 if val is positive.
1249 */
1250 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_G));
1251
1252 emit(ASR(result_dst, op[0], src_reg(31)));
1253
1254 inst = emit(OR(result_dst, src_reg(result_dst), src_reg(1)));
1255 inst->predicate = BRW_PREDICATE_NORMAL;
1256 }
1257 break;
1258
1259 case ir_unop_rcp:
1260 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
1261 break;
1262
1263 case ir_unop_exp2:
1264 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
1265 break;
1266 case ir_unop_log2:
1267 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
1268 break;
1269 case ir_unop_exp:
1270 case ir_unop_log:
1271 assert(!"not reached: should be handled by ir_explog_to_explog2");
1272 break;
1273 case ir_unop_sin:
1274 case ir_unop_sin_reduced:
1275 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
1276 break;
1277 case ir_unop_cos:
1278 case ir_unop_cos_reduced:
1279 emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
1280 break;
1281
1282 case ir_unop_dFdx:
1283 case ir_unop_dFdy:
1284 assert(!"derivatives not valid in vertex shader");
1285 break;
1286
1287 case ir_unop_bitfield_reverse:
1288 emit(BFREV(result_dst, op[0]));
1289 break;
1290 case ir_unop_bit_count:
1291 emit(CBIT(result_dst, op[0]));
1292 break;
1293 case ir_unop_find_msb: {
1294 src_reg temp = src_reg(this, glsl_type::uint_type);
1295
1296 inst = emit(FBH(dst_reg(temp), op[0]));
1297 inst->dst.writemask = WRITEMASK_XYZW;
1298
1299 /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
1300 * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
1301 * subtract the result from 31 to convert the MSB count into an LSB count.
1302 */
1303
1304 /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
1305 temp.swizzle = BRW_SWIZZLE_NOOP;
1306 emit(MOV(result_dst, temp));
1307
1308 src_reg src_tmp = src_reg(result_dst);
1309 emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ));
1310
1311 src_tmp.negate = true;
1312 inst = emit(ADD(result_dst, src_tmp, src_reg(31)));
1313 inst->predicate = BRW_PREDICATE_NORMAL;
1314 break;
1315 }
1316 case ir_unop_find_lsb:
1317 emit(FBL(result_dst, op[0]));
1318 break;
1319
1320 case ir_unop_noise:
1321 assert(!"not reached: should be handled by lower_noise");
1322 break;
1323
1324 case ir_binop_add:
1325 emit(ADD(result_dst, op[0], op[1]));
1326 break;
1327 case ir_binop_sub:
1328 assert(!"not reached: should be handled by ir_sub_to_add_neg");
1329 break;
1330
1331 case ir_binop_mul:
1332 if (brw->gen < 8 && ir->type->is_integer()) {
1333 /* For integer multiplication, the MUL uses the low 16 bits of one of
1334 * the operands (src0 through SNB, src1 on IVB and later). The MACH
1335 * accumulates in the contribution of the upper 16 bits of that
1336 * operand. If we can determine that one of the args is in the low
1337 * 16 bits, though, we can just emit a single MUL.
1338 */
1339 if (is_16bit_constant(ir->operands[0])) {
1340 if (brw->gen < 7)
1341 emit(MUL(result_dst, op[0], op[1]));
1342 else
1343 emit(MUL(result_dst, op[1], op[0]));
1344 } else if (is_16bit_constant(ir->operands[1])) {
1345 if (brw->gen < 7)
1346 emit(MUL(result_dst, op[1], op[0]));
1347 else
1348 emit(MUL(result_dst, op[0], op[1]));
1349 } else {
1350 struct brw_reg acc = retype(brw_acc_reg(), result_dst.type);
1351
1352 emit(MUL(acc, op[0], op[1]));
1353 emit(MACH(dst_null_d(), op[0], op[1]));
1354 emit(MOV(result_dst, src_reg(acc)));
1355 }
1356 } else {
1357 emit(MUL(result_dst, op[0], op[1]));
1358 }
1359 break;
1360 case ir_binop_imul_high: {
1361 struct brw_reg acc = retype(brw_acc_reg(), result_dst.type);
1362
1363 emit(MUL(acc, op[0], op[1]));
1364 emit(MACH(result_dst, op[0], op[1]));
1365 break;
1366 }
1367 case ir_binop_div:
1368 /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
1369 assert(ir->type->is_integer());
1370 emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
1371 break;
1372 case ir_binop_carry: {
1373 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
1374
1375 emit(ADDC(dst_null_ud(), op[0], op[1]));
1376 emit(MOV(result_dst, src_reg(acc)));
1377 break;
1378 }
1379 case ir_binop_borrow: {
1380 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
1381
1382 emit(SUBB(dst_null_ud(), op[0], op[1]));
1383 emit(MOV(result_dst, src_reg(acc)));
1384 break;
1385 }
1386 case ir_binop_mod:
1387 /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
1388 assert(ir->type->is_integer());
1389 emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
1390 break;
1391
1392 case ir_binop_less:
1393 case ir_binop_greater:
1394 case ir_binop_lequal:
1395 case ir_binop_gequal:
1396 case ir_binop_equal:
1397 case ir_binop_nequal: {
1398 emit(CMP(result_dst, op[0], op[1],
1399 brw_conditional_for_comparison(ir->operation)));
1400 emit(AND(result_dst, result_src, src_reg(0x1)));
1401 break;
1402 }
1403
1404 case ir_binop_all_equal:
1405 /* "==" operator producing a scalar boolean. */
1406 if (ir->operands[0]->type->is_vector() ||
1407 ir->operands[1]->type->is_vector()) {
1408 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
1409 emit(MOV(result_dst, src_reg(0)));
1410 inst = emit(MOV(result_dst, src_reg(1)));
1411 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
1412 } else {
1413 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
1414 emit(AND(result_dst, result_src, src_reg(0x1)));
1415 }
1416 break;
1417 case ir_binop_any_nequal:
1418 /* "!=" operator producing a scalar boolean. */
1419 if (ir->operands[0]->type->is_vector() ||
1420 ir->operands[1]->type->is_vector()) {
1421 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
1422
1423 emit(MOV(result_dst, src_reg(0)));
1424 inst = emit(MOV(result_dst, src_reg(1)));
1425 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1426 } else {
1427 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
1428 emit(AND(result_dst, result_src, src_reg(0x1)));
1429 }
1430 break;
1431
1432 case ir_unop_any:
1433 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1434 emit(MOV(result_dst, src_reg(0)));
1435
1436 inst = emit(MOV(result_dst, src_reg(1)));
1437 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1438 break;
1439
1440 case ir_binop_logic_xor:
1441 emit(XOR(result_dst, op[0], op[1]));
1442 break;
1443
1444 case ir_binop_logic_or:
1445 emit(OR(result_dst, op[0], op[1]));
1446 break;
1447
1448 case ir_binop_logic_and:
1449 emit(AND(result_dst, op[0], op[1]));
1450 break;
1451
1452 case ir_binop_dot:
1453 assert(ir->operands[0]->type->is_vector());
1454 assert(ir->operands[0]->type == ir->operands[1]->type);
1455 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1456 break;
1457
1458 case ir_unop_sqrt:
1459 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1460 break;
1461 case ir_unop_rsq:
1462 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1463 break;
1464
1465 case ir_unop_bitcast_i2f:
1466 case ir_unop_bitcast_u2f:
1467 this->result = op[0];
1468 this->result.type = BRW_REGISTER_TYPE_F;
1469 break;
1470
1471 case ir_unop_bitcast_f2i:
1472 this->result = op[0];
1473 this->result.type = BRW_REGISTER_TYPE_D;
1474 break;
1475
1476 case ir_unop_bitcast_f2u:
1477 this->result = op[0];
1478 this->result.type = BRW_REGISTER_TYPE_UD;
1479 break;
1480
1481 case ir_unop_i2f:
1482 case ir_unop_i2u:
1483 case ir_unop_u2i:
1484 case ir_unop_u2f:
1485 case ir_unop_b2f:
1486 case ir_unop_b2i:
1487 case ir_unop_f2i:
1488 case ir_unop_f2u:
1489 emit(MOV(result_dst, op[0]));
1490 break;
1491 case ir_unop_f2b:
1492 case ir_unop_i2b: {
1493 emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1494 emit(AND(result_dst, result_src, src_reg(1)));
1495 break;
1496 }
1497
1498 case ir_unop_trunc:
1499 emit(RNDZ(result_dst, op[0]));
1500 break;
1501 case ir_unop_ceil:
1502 op[0].negate = !op[0].negate;
1503 inst = emit(RNDD(result_dst, op[0]));
1504 this->result.negate = true;
1505 break;
1506 case ir_unop_floor:
1507 inst = emit(RNDD(result_dst, op[0]));
1508 break;
1509 case ir_unop_fract:
1510 inst = emit(FRC(result_dst, op[0]));
1511 break;
1512 case ir_unop_round_even:
1513 emit(RNDE(result_dst, op[0]));
1514 break;
1515
1516 case ir_binop_min:
1517 emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]);
1518 break;
1519 case ir_binop_max:
1520 emit_minmax(BRW_CONDITIONAL_G, result_dst, op[0], op[1]);
1521 break;
1522
1523 case ir_binop_pow:
1524 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1525 break;
1526
1527 case ir_unop_bit_not:
1528 inst = emit(NOT(result_dst, op[0]));
1529 break;
1530 case ir_binop_bit_and:
1531 inst = emit(AND(result_dst, op[0], op[1]));
1532 break;
1533 case ir_binop_bit_xor:
1534 inst = emit(XOR(result_dst, op[0], op[1]));
1535 break;
1536 case ir_binop_bit_or:
1537 inst = emit(OR(result_dst, op[0], op[1]));
1538 break;
1539
1540 case ir_binop_lshift:
1541 inst = emit(SHL(result_dst, op[0], op[1]));
1542 break;
1543
1544 case ir_binop_rshift:
1545 if (ir->type->base_type == GLSL_TYPE_INT)
1546 inst = emit(ASR(result_dst, op[0], op[1]));
1547 else
1548 inst = emit(SHR(result_dst, op[0], op[1]));
1549 break;
1550
1551 case ir_binop_bfm:
1552 emit(BFI1(result_dst, op[0], op[1]));
1553 break;
1554
1555 case ir_binop_ubo_load: {
1556 ir_constant *uniform_block = ir->operands[0]->as_constant();
1557 ir_constant *const_offset_ir = ir->operands[1]->as_constant();
1558 unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
1559 src_reg offset;
1560
1561 /* Now, load the vector from that offset. */
1562 assert(ir->type->is_vector() || ir->type->is_scalar());
1563
1564 src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
1565 packed_consts.type = result.type;
1566 src_reg surf_index =
1567 src_reg(prog_data->base.binding_table.ubo_start + uniform_block->value.u[0]);
1568 if (const_offset_ir) {
1569 if (brw->gen >= 8) {
1570 /* Store the offset in a GRF so we can send-from-GRF. */
1571 offset = src_reg(this, glsl_type::int_type);
1572 emit(MOV(dst_reg(offset), src_reg(const_offset / 16)));
1573 } else {
1574 /* Immediates are fine on older generations since they'll be moved
1575 * to a (potentially fake) MRF at the generator level.
1576 */
1577 offset = src_reg(const_offset / 16);
1578 }
1579 } else {
1580 offset = src_reg(this, glsl_type::uint_type);
1581 emit(SHR(dst_reg(offset), op[1], src_reg(4)));
1582 }
1583
1584 vec4_instruction *pull =
1585 emit(new(mem_ctx) vec4_instruction(this,
1586 VS_OPCODE_PULL_CONSTANT_LOAD,
1587 dst_reg(packed_consts),
1588 surf_index,
1589 offset));
1590 pull->base_mrf = 14;
1591 pull->mlen = 1;
1592
1593 packed_consts.swizzle = swizzle_for_size(ir->type->vector_elements);
1594 packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4,
1595 const_offset % 16 / 4,
1596 const_offset % 16 / 4,
1597 const_offset % 16 / 4);
1598
1599 /* UBO bools are any nonzero int. We store bools as either 0 or 1. */
1600 if (ir->type->base_type == GLSL_TYPE_BOOL) {
1601 emit(CMP(result_dst, packed_consts, src_reg(0u),
1602 BRW_CONDITIONAL_NZ));
1603 emit(AND(result_dst, result, src_reg(0x1)));
1604 } else {
1605 emit(MOV(result_dst, packed_consts));
1606 }
1607 break;
1608 }
1609
1610 case ir_binop_vector_extract:
1611 assert(!"should have been lowered by vec_index_to_cond_assign");
1612 break;
1613
1614 case ir_triop_fma:
1615 op[0] = fix_3src_operand(op[0]);
1616 op[1] = fix_3src_operand(op[1]);
1617 op[2] = fix_3src_operand(op[2]);
1618 /* Note that the instruction's argument order is reversed from GLSL
1619 * and the IR.
1620 */
1621 emit(MAD(result_dst, op[2], op[1], op[0]));
1622 break;
1623
1624 case ir_triop_lrp:
1625 op[0] = fix_3src_operand(op[0]);
1626 op[1] = fix_3src_operand(op[1]);
1627 op[2] = fix_3src_operand(op[2]);
1628 /* Note that the instruction's argument order is reversed from GLSL
1629 * and the IR.
1630 */
1631 emit(LRP(result_dst, op[2], op[1], op[0]));
1632 break;
1633
1634 case ir_triop_csel:
1635 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1636 inst = emit(BRW_OPCODE_SEL, result_dst, op[1], op[2]);
1637 inst->predicate = BRW_PREDICATE_NORMAL;
1638 break;
1639
1640 case ir_triop_bfi:
1641 op[0] = fix_3src_operand(op[0]);
1642 op[1] = fix_3src_operand(op[1]);
1643 op[2] = fix_3src_operand(op[2]);
1644 emit(BFI2(result_dst, op[0], op[1], op[2]));
1645 break;
1646
1647 case ir_triop_bitfield_extract:
1648 op[0] = fix_3src_operand(op[0]);
1649 op[1] = fix_3src_operand(op[1]);
1650 op[2] = fix_3src_operand(op[2]);
1651 /* Note that the instruction's argument order is reversed from GLSL
1652 * and the IR.
1653 */
1654 emit(BFE(result_dst, op[2], op[1], op[0]));
1655 break;
1656
1657 case ir_triop_vector_insert:
1658 assert(!"should have been lowered by lower_vector_insert");
1659 break;
1660
1661 case ir_quadop_bitfield_insert:
1662 assert(!"not reached: should be handled by "
1663 "bitfield_insert_to_bfm_bfi\n");
1664 break;
1665
1666 case ir_quadop_vector:
1667 assert(!"not reached: should be handled by lower_quadop_vector");
1668 break;
1669
1670 case ir_unop_pack_half_2x16:
1671 emit_pack_half_2x16(result_dst, op[0]);
1672 break;
1673 case ir_unop_unpack_half_2x16:
1674 emit_unpack_half_2x16(result_dst, op[0]);
1675 break;
1676 case ir_unop_pack_snorm_2x16:
1677 case ir_unop_pack_snorm_4x8:
1678 case ir_unop_pack_unorm_2x16:
1679 case ir_unop_pack_unorm_4x8:
1680 case ir_unop_unpack_snorm_2x16:
1681 case ir_unop_unpack_snorm_4x8:
1682 case ir_unop_unpack_unorm_2x16:
1683 case ir_unop_unpack_unorm_4x8:
1684 assert(!"not reached: should be handled by lower_packing_builtins");
1685 break;
1686 case ir_unop_unpack_half_2x16_split_x:
1687 case ir_unop_unpack_half_2x16_split_y:
1688 case ir_binop_pack_half_2x16_split:
1689 assert(!"not reached: should not occur in vertex shader");
1690 break;
1691 case ir_binop_ldexp:
1692 assert(!"not reached: should be handled by ldexp_to_arith()");
1693 break;
1694 }
1695 }
1696
1697
1698 void
1699 vec4_visitor::visit(ir_swizzle *ir)
1700 {
1701 src_reg src;
1702 int i = 0;
1703 int swizzle[4];
1704
1705 /* Note that this is only swizzles in expressions, not those on the left
1706 * hand side of an assignment, which do write masking. See ir_assignment
1707 * for that.
1708 */
1709
1710 ir->val->accept(this);
1711 src = this->result;
1712 assert(src.file != BAD_FILE);
1713
1714 for (i = 0; i < ir->type->vector_elements; i++) {
1715 switch (i) {
1716 case 0:
1717 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1718 break;
1719 case 1:
1720 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1721 break;
1722 case 2:
1723 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1724 break;
1725 case 3:
1726 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1727 break;
1728 }
1729 }
1730 for (; i < 4; i++) {
1731 /* Replicate the last channel out. */
1732 swizzle[i] = swizzle[ir->type->vector_elements - 1];
1733 }
1734
1735 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1736
1737 this->result = src;
1738 }
1739
1740 void
1741 vec4_visitor::visit(ir_dereference_variable *ir)
1742 {
1743 const struct glsl_type *type = ir->type;
1744 dst_reg *reg = variable_storage(ir->var);
1745
1746 if (!reg) {
1747 fail("Failed to find variable storage for %s\n", ir->var->name);
1748 this->result = src_reg(brw_null_reg());
1749 return;
1750 }
1751
1752 this->result = src_reg(*reg);
1753
1754 /* System values get their swizzle from the dst_reg writemask */
1755 if (ir->var->data.mode == ir_var_system_value)
1756 return;
1757
1758 if (type->is_scalar() || type->is_vector() || type->is_matrix())
1759 this->result.swizzle = swizzle_for_size(type->vector_elements);
1760 }
1761
1762
1763 int
1764 vec4_visitor::compute_array_stride(ir_dereference_array *ir)
1765 {
1766 /* Under normal circumstances array elements are stored consecutively, so
1767 * the stride is equal to the size of the array element.
1768 */
1769 return type_size(ir->type);
1770 }
1771
1772
1773 void
1774 vec4_visitor::visit(ir_dereference_array *ir)
1775 {
1776 ir_constant *constant_index;
1777 src_reg src;
1778 int array_stride = compute_array_stride(ir);
1779
1780 constant_index = ir->array_index->constant_expression_value();
1781
1782 ir->array->accept(this);
1783 src = this->result;
1784
1785 if (constant_index) {
1786 src.reg_offset += constant_index->value.i[0] * array_stride;
1787 } else {
1788 /* Variable index array dereference. It eats the "vec4" of the
1789 * base of the array and an index that offsets the Mesa register
1790 * index.
1791 */
1792 ir->array_index->accept(this);
1793
1794 src_reg index_reg;
1795
1796 if (array_stride == 1) {
1797 index_reg = this->result;
1798 } else {
1799 index_reg = src_reg(this, glsl_type::int_type);
1800
1801 emit(MUL(dst_reg(index_reg), this->result, src_reg(array_stride)));
1802 }
1803
1804 if (src.reladdr) {
1805 src_reg temp = src_reg(this, glsl_type::int_type);
1806
1807 emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
1808
1809 index_reg = temp;
1810 }
1811
1812 src.reladdr = ralloc(mem_ctx, src_reg);
1813 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1814 }
1815
1816 /* If the type is smaller than a vec4, replicate the last channel out. */
1817 if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1818 src.swizzle = swizzle_for_size(ir->type->vector_elements);
1819 else
1820 src.swizzle = BRW_SWIZZLE_NOOP;
1821 src.type = brw_type_for_base_type(ir->type);
1822
1823 this->result = src;
1824 }
1825
1826 void
1827 vec4_visitor::visit(ir_dereference_record *ir)
1828 {
1829 unsigned int i;
1830 const glsl_type *struct_type = ir->record->type;
1831 int offset = 0;
1832
1833 ir->record->accept(this);
1834
1835 for (i = 0; i < struct_type->length; i++) {
1836 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1837 break;
1838 offset += type_size(struct_type->fields.structure[i].type);
1839 }
1840
1841 /* If the type is smaller than a vec4, replicate the last channel out. */
1842 if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1843 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1844 else
1845 this->result.swizzle = BRW_SWIZZLE_NOOP;
1846 this->result.type = brw_type_for_base_type(ir->type);
1847
1848 this->result.reg_offset += offset;
1849 }
1850
1851 /**
1852 * We want to be careful in assignment setup to hit the actual storage
1853 * instead of potentially using a temporary like we might with the
1854 * ir_dereference handler.
1855 */
1856 static dst_reg
1857 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1858 {
1859 /* The LHS must be a dereference. If the LHS is a variable indexed array
1860 * access of a vector, it must be separated into a series conditional moves
1861 * before reaching this point (see ir_vec_index_to_cond_assign).
1862 */
1863 assert(ir->as_dereference());
1864 ir_dereference_array *deref_array = ir->as_dereference_array();
1865 if (deref_array) {
1866 assert(!deref_array->array->type->is_vector());
1867 }
1868
1869 /* Use the rvalue deref handler for the most part. We'll ignore
1870 * swizzles in it and write swizzles using writemask, though.
1871 */
1872 ir->accept(v);
1873 return dst_reg(v->result);
1874 }
1875
1876 void
1877 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1878 const struct glsl_type *type, uint32_t predicate)
1879 {
1880 if (type->base_type == GLSL_TYPE_STRUCT) {
1881 for (unsigned int i = 0; i < type->length; i++) {
1882 emit_block_move(dst, src, type->fields.structure[i].type, predicate);
1883 }
1884 return;
1885 }
1886
1887 if (type->is_array()) {
1888 for (unsigned int i = 0; i < type->length; i++) {
1889 emit_block_move(dst, src, type->fields.array, predicate);
1890 }
1891 return;
1892 }
1893
1894 if (type->is_matrix()) {
1895 const struct glsl_type *vec_type;
1896
1897 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1898 type->vector_elements, 1);
1899
1900 for (int i = 0; i < type->matrix_columns; i++) {
1901 emit_block_move(dst, src, vec_type, predicate);
1902 }
1903 return;
1904 }
1905
1906 assert(type->is_scalar() || type->is_vector());
1907
1908 dst->type = brw_type_for_base_type(type);
1909 src->type = dst->type;
1910
1911 dst->writemask = (1 << type->vector_elements) - 1;
1912
1913 src->swizzle = swizzle_for_size(type->vector_elements);
1914
1915 vec4_instruction *inst = emit(MOV(*dst, *src));
1916 inst->predicate = predicate;
1917
1918 dst->reg_offset++;
1919 src->reg_offset++;
1920 }
1921
1922
1923 /* If the RHS processing resulted in an instruction generating a
1924 * temporary value, and it would be easy to rewrite the instruction to
1925 * generate its result right into the LHS instead, do so. This ends
1926 * up reliably removing instructions where it can be tricky to do so
1927 * later without real UD chain information.
1928 */
1929 bool
1930 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
1931 dst_reg dst,
1932 src_reg src,
1933 vec4_instruction *pre_rhs_inst,
1934 vec4_instruction *last_rhs_inst)
1935 {
1936 /* This could be supported, but it would take more smarts. */
1937 if (ir->condition)
1938 return false;
1939
1940 if (pre_rhs_inst == last_rhs_inst)
1941 return false; /* No instructions generated to work with. */
1942
1943 /* Make sure the last instruction generated our source reg. */
1944 if (src.file != GRF ||
1945 src.file != last_rhs_inst->dst.file ||
1946 src.reg != last_rhs_inst->dst.reg ||
1947 src.reg_offset != last_rhs_inst->dst.reg_offset ||
1948 src.reladdr ||
1949 src.abs ||
1950 src.negate ||
1951 last_rhs_inst->predicate != BRW_PREDICATE_NONE)
1952 return false;
1953
1954 /* Check that that last instruction fully initialized the channels
1955 * we want to use, in the order we want to use them. We could
1956 * potentially reswizzle the operands of many instructions so that
1957 * we could handle out of order channels, but don't yet.
1958 */
1959
1960 for (unsigned i = 0; i < 4; i++) {
1961 if (dst.writemask & (1 << i)) {
1962 if (!(last_rhs_inst->dst.writemask & (1 << i)))
1963 return false;
1964
1965 if (BRW_GET_SWZ(src.swizzle, i) != i)
1966 return false;
1967 }
1968 }
1969
1970 /* Success! Rewrite the instruction. */
1971 last_rhs_inst->dst.file = dst.file;
1972 last_rhs_inst->dst.reg = dst.reg;
1973 last_rhs_inst->dst.reg_offset = dst.reg_offset;
1974 last_rhs_inst->dst.reladdr = dst.reladdr;
1975 last_rhs_inst->dst.writemask &= dst.writemask;
1976
1977 return true;
1978 }
1979
1980 void
1981 vec4_visitor::visit(ir_assignment *ir)
1982 {
1983 dst_reg dst = get_assignment_lhs(ir->lhs, this);
1984 uint32_t predicate = BRW_PREDICATE_NONE;
1985
1986 if (!ir->lhs->type->is_scalar() &&
1987 !ir->lhs->type->is_vector()) {
1988 ir->rhs->accept(this);
1989 src_reg src = this->result;
1990
1991 if (ir->condition) {
1992 emit_bool_to_cond_code(ir->condition, &predicate);
1993 }
1994
1995 /* emit_block_move doesn't account for swizzles in the source register.
1996 * This should be ok, since the source register is a structure or an
1997 * array, and those can't be swizzled. But double-check to be sure.
1998 */
1999 assert(src.swizzle ==
2000 (ir->rhs->type->is_matrix()
2001 ? swizzle_for_size(ir->rhs->type->vector_elements)
2002 : BRW_SWIZZLE_NOOP));
2003
2004 emit_block_move(&dst, &src, ir->rhs->type, predicate);
2005 return;
2006 }
2007
2008 /* Now we're down to just a scalar/vector with writemasks. */
2009 int i;
2010
2011 vec4_instruction *pre_rhs_inst, *last_rhs_inst;
2012 pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
2013
2014 ir->rhs->accept(this);
2015
2016 last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
2017
2018 src_reg src = this->result;
2019
2020 int swizzles[4];
2021 int first_enabled_chan = 0;
2022 int src_chan = 0;
2023
2024 assert(ir->lhs->type->is_vector() ||
2025 ir->lhs->type->is_scalar());
2026 dst.writemask = ir->write_mask;
2027
2028 for (int i = 0; i < 4; i++) {
2029 if (dst.writemask & (1 << i)) {
2030 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
2031 break;
2032 }
2033 }
2034
2035 /* Swizzle a small RHS vector into the channels being written.
2036 *
2037 * glsl ir treats write_mask as dictating how many channels are
2038 * present on the RHS while in our instructions we need to make
2039 * those channels appear in the slots of the vec4 they're written to.
2040 */
2041 for (int i = 0; i < 4; i++) {
2042 if (dst.writemask & (1 << i))
2043 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
2044 else
2045 swizzles[i] = first_enabled_chan;
2046 }
2047 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
2048 swizzles[2], swizzles[3]);
2049
2050 if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
2051 return;
2052 }
2053
2054 if (ir->condition) {
2055 emit_bool_to_cond_code(ir->condition, &predicate);
2056 }
2057
2058 for (i = 0; i < type_size(ir->lhs->type); i++) {
2059 vec4_instruction *inst = emit(MOV(dst, src));
2060 inst->predicate = predicate;
2061
2062 dst.reg_offset++;
2063 src.reg_offset++;
2064 }
2065 }
2066
2067 void
2068 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
2069 {
2070 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
2071 foreach_list(node, &ir->components) {
2072 ir_constant *field_value = (ir_constant *)node;
2073
2074 emit_constant_values(dst, field_value);
2075 }
2076 return;
2077 }
2078
2079 if (ir->type->is_array()) {
2080 for (unsigned int i = 0; i < ir->type->length; i++) {
2081 emit_constant_values(dst, ir->array_elements[i]);
2082 }
2083 return;
2084 }
2085
2086 if (ir->type->is_matrix()) {
2087 for (int i = 0; i < ir->type->matrix_columns; i++) {
2088 float *vec = &ir->value.f[i * ir->type->vector_elements];
2089
2090 for (int j = 0; j < ir->type->vector_elements; j++) {
2091 dst->writemask = 1 << j;
2092 dst->type = BRW_REGISTER_TYPE_F;
2093
2094 emit(MOV(*dst, src_reg(vec[j])));
2095 }
2096 dst->reg_offset++;
2097 }
2098 return;
2099 }
2100
2101 int remaining_writemask = (1 << ir->type->vector_elements) - 1;
2102
2103 for (int i = 0; i < ir->type->vector_elements; i++) {
2104 if (!(remaining_writemask & (1 << i)))
2105 continue;
2106
2107 dst->writemask = 1 << i;
2108 dst->type = brw_type_for_base_type(ir->type);
2109
2110 /* Find other components that match the one we're about to
2111 * write. Emits fewer instructions for things like vec4(0.5,
2112 * 1.5, 1.5, 1.5).
2113 */
2114 for (int j = i + 1; j < ir->type->vector_elements; j++) {
2115 if (ir->type->base_type == GLSL_TYPE_BOOL) {
2116 if (ir->value.b[i] == ir->value.b[j])
2117 dst->writemask |= (1 << j);
2118 } else {
2119 /* u, i, and f storage all line up, so no need for a
2120 * switch case for comparing each type.
2121 */
2122 if (ir->value.u[i] == ir->value.u[j])
2123 dst->writemask |= (1 << j);
2124 }
2125 }
2126
2127 switch (ir->type->base_type) {
2128 case GLSL_TYPE_FLOAT:
2129 emit(MOV(*dst, src_reg(ir->value.f[i])));
2130 break;
2131 case GLSL_TYPE_INT:
2132 emit(MOV(*dst, src_reg(ir->value.i[i])));
2133 break;
2134 case GLSL_TYPE_UINT:
2135 emit(MOV(*dst, src_reg(ir->value.u[i])));
2136 break;
2137 case GLSL_TYPE_BOOL:
2138 emit(MOV(*dst, src_reg(ir->value.b[i])));
2139 break;
2140 default:
2141 assert(!"Non-float/uint/int/bool constant");
2142 break;
2143 }
2144
2145 remaining_writemask &= ~dst->writemask;
2146 }
2147 dst->reg_offset++;
2148 }
2149
2150 void
2151 vec4_visitor::visit(ir_constant *ir)
2152 {
2153 dst_reg dst = dst_reg(this, ir->type);
2154 this->result = src_reg(dst);
2155
2156 emit_constant_values(&dst, ir);
2157 }
2158
2159 void
2160 vec4_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
2161 {
2162 ir_dereference *deref = static_cast<ir_dereference *>(
2163 ir->actual_parameters.get_head());
2164 ir_variable *location = deref->variable_referenced();
2165 unsigned surf_index = (prog_data->base.binding_table.abo_start +
2166 location->atomic.buffer_index);
2167
2168 /* Calculate the surface offset */
2169 src_reg offset(this, glsl_type::uint_type);
2170 ir_dereference_array *deref_array = deref->as_dereference_array();
2171 if (deref_array) {
2172 deref_array->array_index->accept(this);
2173
2174 src_reg tmp(this, glsl_type::uint_type);
2175 emit(MUL(dst_reg(tmp), this->result, ATOMIC_COUNTER_SIZE));
2176 emit(ADD(dst_reg(offset), tmp, location->atomic.offset));
2177 } else {
2178 offset = location->atomic.offset;
2179 }
2180
2181 /* Emit the appropriate machine instruction */
2182 const char *callee = ir->callee->function_name();
2183 dst_reg dst = get_assignment_lhs(ir->return_deref, this);
2184
2185 if (!strcmp("__intrinsic_atomic_read", callee)) {
2186 emit_untyped_surface_read(surf_index, dst, offset);
2187
2188 } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
2189 emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset,
2190 src_reg(), src_reg());
2191
2192 } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
2193 emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset,
2194 src_reg(), src_reg());
2195 }
2196 }
2197
2198 void
2199 vec4_visitor::visit(ir_call *ir)
2200 {
2201 const char *callee = ir->callee->function_name();
2202
2203 if (!strcmp("__intrinsic_atomic_read", callee) ||
2204 !strcmp("__intrinsic_atomic_increment", callee) ||
2205 !strcmp("__intrinsic_atomic_predecrement", callee)) {
2206 visit_atomic_counter_intrinsic(ir);
2207 } else {
2208 assert(!"Unsupported intrinsic.");
2209 }
2210 }
2211
2212 src_reg
2213 vec4_visitor::emit_mcs_fetch(ir_texture *ir, src_reg coordinate, int sampler)
2214 {
2215 vec4_instruction *inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF_MCS);
2216 inst->base_mrf = 2;
2217 inst->mlen = 1;
2218 inst->sampler = sampler;
2219 inst->dst = dst_reg(this, glsl_type::uvec4_type);
2220 inst->dst.writemask = WRITEMASK_XYZW;
2221
2222 /* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */
2223 int param_base = inst->base_mrf;
2224 int coord_mask = (1 << ir->coordinate->type->vector_elements) - 1;
2225 int zero_mask = 0xf & ~coord_mask;
2226
2227 emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
2228 coordinate));
2229
2230 emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
2231 src_reg(0)));
2232
2233 emit(inst);
2234 return src_reg(inst->dst);
2235 }
2236
2237 void
2238 vec4_visitor::visit(ir_texture *ir)
2239 {
2240 int sampler =
2241 _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog);
2242
2243 /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother
2244 * emitting anything other than setting up the constant result.
2245 */
2246 if (ir->op == ir_tg4) {
2247 ir_constant *chan = ir->lod_info.component->as_constant();
2248 int swiz = GET_SWZ(key->tex.swizzles[sampler], chan->value.i[0]);
2249 if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
2250 dst_reg result(this, ir->type);
2251 this->result = src_reg(result);
2252 emit(MOV(result, src_reg(swiz == SWIZZLE_ONE ? 1.0f : 0.0f)));
2253 return;
2254 }
2255 }
2256
2257 /* Should be lowered by do_lower_texture_projection */
2258 assert(!ir->projector);
2259
2260 /* Should be lowered */
2261 assert(!ir->offset || !ir->offset->type->is_array());
2262
2263 /* Generate code to compute all the subexpression trees. This has to be
2264 * done before loading any values into MRFs for the sampler message since
2265 * generating these values may involve SEND messages that need the MRFs.
2266 */
2267 src_reg coordinate;
2268 if (ir->coordinate) {
2269 ir->coordinate->accept(this);
2270 coordinate = this->result;
2271 }
2272
2273 src_reg shadow_comparitor;
2274 if (ir->shadow_comparitor) {
2275 ir->shadow_comparitor->accept(this);
2276 shadow_comparitor = this->result;
2277 }
2278
2279 bool has_nonconstant_offset = ir->offset && !ir->offset->as_constant();
2280 src_reg offset_value;
2281 if (has_nonconstant_offset) {
2282 ir->offset->accept(this);
2283 offset_value = src_reg(this->result);
2284 }
2285
2286 const glsl_type *lod_type = NULL, *sample_index_type = NULL;
2287 src_reg lod, dPdx, dPdy, sample_index, mcs;
2288 switch (ir->op) {
2289 case ir_tex:
2290 lod = src_reg(0.0f);
2291 lod_type = glsl_type::float_type;
2292 break;
2293 case ir_txf:
2294 case ir_txl:
2295 case ir_txs:
2296 ir->lod_info.lod->accept(this);
2297 lod = this->result;
2298 lod_type = ir->lod_info.lod->type;
2299 break;
2300 case ir_query_levels:
2301 lod = src_reg(0);
2302 lod_type = glsl_type::int_type;
2303 break;
2304 case ir_txf_ms:
2305 ir->lod_info.sample_index->accept(this);
2306 sample_index = this->result;
2307 sample_index_type = ir->lod_info.sample_index->type;
2308
2309 if (brw->gen >= 7 && key->tex.compressed_multisample_layout_mask & (1<<sampler))
2310 mcs = emit_mcs_fetch(ir, coordinate, sampler);
2311 else
2312 mcs = src_reg(0u);
2313 break;
2314 case ir_txd:
2315 ir->lod_info.grad.dPdx->accept(this);
2316 dPdx = this->result;
2317
2318 ir->lod_info.grad.dPdy->accept(this);
2319 dPdy = this->result;
2320
2321 lod_type = ir->lod_info.grad.dPdx->type;
2322 break;
2323 case ir_txb:
2324 case ir_lod:
2325 case ir_tg4:
2326 break;
2327 }
2328
2329 vec4_instruction *inst = NULL;
2330 switch (ir->op) {
2331 case ir_tex:
2332 case ir_txl:
2333 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXL);
2334 break;
2335 case ir_txd:
2336 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXD);
2337 break;
2338 case ir_txf:
2339 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF);
2340 break;
2341 case ir_txf_ms:
2342 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF_MS);
2343 break;
2344 case ir_txs:
2345 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXS);
2346 break;
2347 case ir_tg4:
2348 if (has_nonconstant_offset)
2349 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TG4_OFFSET);
2350 else
2351 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TG4);
2352 break;
2353 case ir_query_levels:
2354 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXS);
2355 break;
2356 case ir_txb:
2357 assert(!"TXB is not valid for vertex shaders.");
2358 break;
2359 case ir_lod:
2360 assert(!"LOD is not valid for vertex shaders.");
2361 break;
2362 default:
2363 assert(!"Unrecognized tex op");
2364 }
2365
2366 bool use_texture_offset = ir->offset != NULL && ir->op != ir_txf;
2367
2368 /* Texel offsets go in the message header; Gen4 also requires headers. */
2369 inst->header_present = use_texture_offset || brw->gen < 5 || ir->op == ir_tg4;
2370 inst->base_mrf = 2;
2371 inst->mlen = inst->header_present + 1; /* always at least one */
2372 inst->sampler = sampler;
2373 inst->dst = dst_reg(this, ir->type);
2374 inst->dst.writemask = WRITEMASK_XYZW;
2375 inst->shadow_compare = ir->shadow_comparitor != NULL;
2376
2377 if (use_texture_offset)
2378 inst->texture_offset = brw_texture_offset(ctx, ir->offset->as_constant());
2379
2380 /* Stuff the channel select bits in the top of the texture offset */
2381 if (ir->op == ir_tg4)
2382 inst->texture_offset |= gather_channel(ir, sampler)<<16;
2383
2384 /* MRF for the first parameter */
2385 int param_base = inst->base_mrf + inst->header_present;
2386
2387 if (ir->op == ir_txs || ir->op == ir_query_levels) {
2388 int writemask = brw->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
2389 emit(MOV(dst_reg(MRF, param_base, lod_type, writemask), lod));
2390 } else {
2391 /* Load the coordinate */
2392 /* FINISHME: gl_clamp_mask and saturate */
2393 int coord_mask = (1 << ir->coordinate->type->vector_elements) - 1;
2394 int zero_mask = 0xf & ~coord_mask;
2395
2396 emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
2397 coordinate));
2398
2399 if (zero_mask != 0) {
2400 emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
2401 src_reg(0)));
2402 }
2403 /* Load the shadow comparitor */
2404 if (ir->shadow_comparitor && ir->op != ir_txd && (ir->op != ir_tg4 || !has_nonconstant_offset)) {
2405 emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
2406 WRITEMASK_X),
2407 shadow_comparitor));
2408 inst->mlen++;
2409 }
2410
2411 /* Load the LOD info */
2412 if (ir->op == ir_tex || ir->op == ir_txl) {
2413 int mrf, writemask;
2414 if (brw->gen >= 5) {
2415 mrf = param_base + 1;
2416 if (ir->shadow_comparitor) {
2417 writemask = WRITEMASK_Y;
2418 /* mlen already incremented */
2419 } else {
2420 writemask = WRITEMASK_X;
2421 inst->mlen++;
2422 }
2423 } else /* brw->gen == 4 */ {
2424 mrf = param_base;
2425 writemask = WRITEMASK_W;
2426 }
2427 emit(MOV(dst_reg(MRF, mrf, lod_type, writemask), lod));
2428 } else if (ir->op == ir_txf) {
2429 emit(MOV(dst_reg(MRF, param_base, lod_type, WRITEMASK_W), lod));
2430 } else if (ir->op == ir_txf_ms) {
2431 emit(MOV(dst_reg(MRF, param_base + 1, sample_index_type, WRITEMASK_X),
2432 sample_index));
2433 if (brw->gen >= 7)
2434 /* MCS data is in the first channel of `mcs`, but we need to get it into
2435 * the .y channel of the second vec4 of params, so replicate .x across
2436 * the whole vec4 and then mask off everything except .y
2437 */
2438 mcs.swizzle = BRW_SWIZZLE_XXXX;
2439 emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::uint_type, WRITEMASK_Y),
2440 mcs));
2441 inst->mlen++;
2442 } else if (ir->op == ir_txd) {
2443 const glsl_type *type = lod_type;
2444
2445 if (brw->gen >= 5) {
2446 dPdx.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
2447 dPdy.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
2448 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), dPdx));
2449 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), dPdy));
2450 inst->mlen++;
2451
2452 if (ir->type->vector_elements == 3 || ir->shadow_comparitor) {
2453 dPdx.swizzle = BRW_SWIZZLE_ZZZZ;
2454 dPdy.swizzle = BRW_SWIZZLE_ZZZZ;
2455 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), dPdx));
2456 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), dPdy));
2457 inst->mlen++;
2458
2459 if (ir->shadow_comparitor) {
2460 emit(MOV(dst_reg(MRF, param_base + 2,
2461 ir->shadow_comparitor->type, WRITEMASK_Z),
2462 shadow_comparitor));
2463 }
2464 }
2465 } else /* brw->gen == 4 */ {
2466 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), dPdx));
2467 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), dPdy));
2468 inst->mlen += 2;
2469 }
2470 } else if (ir->op == ir_tg4 && has_nonconstant_offset) {
2471 if (ir->shadow_comparitor) {
2472 emit(MOV(dst_reg(MRF, param_base, ir->shadow_comparitor->type, WRITEMASK_W),
2473 shadow_comparitor));
2474 }
2475
2476 emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::ivec2_type, WRITEMASK_XY),
2477 offset_value));
2478 inst->mlen++;
2479 }
2480 }
2481
2482 emit(inst);
2483
2484 /* fixup num layers (z) for cube arrays: hardware returns faces * layers;
2485 * spec requires layers.
2486 */
2487 if (ir->op == ir_txs) {
2488 glsl_type const *type = ir->sampler->type;
2489 if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
2490 type->sampler_array) {
2491 emit_math(SHADER_OPCODE_INT_QUOTIENT,
2492 with_writemask(inst->dst, WRITEMASK_Z),
2493 src_reg(inst->dst), src_reg(6));
2494 }
2495 }
2496
2497 swizzle_result(ir, src_reg(inst->dst), sampler);
2498 }
2499
2500 /**
2501 * Set up the gather channel based on the swizzle, for gather4.
2502 */
2503 uint32_t
2504 vec4_visitor::gather_channel(ir_texture *ir, int sampler)
2505 {
2506 ir_constant *chan = ir->lod_info.component->as_constant();
2507 int swiz = GET_SWZ(key->tex.swizzles[sampler], chan->value.i[0]);
2508 switch (swiz) {
2509 case SWIZZLE_X: return 0;
2510 case SWIZZLE_Y:
2511 /* gather4 sampler is broken for green channel on RG32F --
2512 * we must ask for blue instead.
2513 */
2514 if (key->tex.gather_channel_quirk_mask & (1<<sampler))
2515 return 2;
2516 return 1;
2517 case SWIZZLE_Z: return 2;
2518 case SWIZZLE_W: return 3;
2519 default:
2520 assert(!"Not reached"); /* zero, one swizzles handled already */
2521 return 0;
2522 }
2523 }
2524
2525 void
2526 vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, int sampler)
2527 {
2528 int s = key->tex.swizzles[sampler];
2529
2530 this->result = src_reg(this, ir->type);
2531 dst_reg swizzled_result(this->result);
2532
2533 if (ir->op == ir_query_levels) {
2534 /* # levels is in .w */
2535 orig_val.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
2536 emit(MOV(swizzled_result, orig_val));
2537 return;
2538 }
2539
2540 if (ir->op == ir_txs || ir->type == glsl_type::float_type
2541 || s == SWIZZLE_NOOP || ir->op == ir_tg4) {
2542 emit(MOV(swizzled_result, orig_val));
2543 return;
2544 }
2545
2546
2547 int zero_mask = 0, one_mask = 0, copy_mask = 0;
2548 int swizzle[4] = {0};
2549
2550 for (int i = 0; i < 4; i++) {
2551 switch (GET_SWZ(s, i)) {
2552 case SWIZZLE_ZERO:
2553 zero_mask |= (1 << i);
2554 break;
2555 case SWIZZLE_ONE:
2556 one_mask |= (1 << i);
2557 break;
2558 default:
2559 copy_mask |= (1 << i);
2560 swizzle[i] = GET_SWZ(s, i);
2561 break;
2562 }
2563 }
2564
2565 if (copy_mask) {
2566 orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
2567 swizzled_result.writemask = copy_mask;
2568 emit(MOV(swizzled_result, orig_val));
2569 }
2570
2571 if (zero_mask) {
2572 swizzled_result.writemask = zero_mask;
2573 emit(MOV(swizzled_result, src_reg(0.0f)));
2574 }
2575
2576 if (one_mask) {
2577 swizzled_result.writemask = one_mask;
2578 emit(MOV(swizzled_result, src_reg(1.0f)));
2579 }
2580 }
2581
2582 void
2583 vec4_visitor::visit(ir_return *ir)
2584 {
2585 assert(!"not reached");
2586 }
2587
2588 void
2589 vec4_visitor::visit(ir_discard *ir)
2590 {
2591 assert(!"not reached");
2592 }
2593
2594 void
2595 vec4_visitor::visit(ir_if *ir)
2596 {
2597 /* Don't point the annotation at the if statement, because then it plus
2598 * the then and else blocks get printed.
2599 */
2600 this->base_ir = ir->condition;
2601
2602 if (brw->gen == 6) {
2603 emit_if_gen6(ir);
2604 } else {
2605 uint32_t predicate;
2606 emit_bool_to_cond_code(ir->condition, &predicate);
2607 emit(IF(predicate));
2608 }
2609
2610 visit_instructions(&ir->then_instructions);
2611
2612 if (!ir->else_instructions.is_empty()) {
2613 this->base_ir = ir->condition;
2614 emit(BRW_OPCODE_ELSE);
2615
2616 visit_instructions(&ir->else_instructions);
2617 }
2618
2619 this->base_ir = ir->condition;
2620 emit(BRW_OPCODE_ENDIF);
2621 }
2622
2623 void
2624 vec4_visitor::visit(ir_emit_vertex *)
2625 {
2626 assert(!"not reached");
2627 }
2628
2629 void
2630 vec4_visitor::visit(ir_end_primitive *)
2631 {
2632 assert(!"not reached");
2633 }
2634
2635 void
2636 vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
2637 dst_reg dst, src_reg offset,
2638 src_reg src0, src_reg src1)
2639 {
2640 unsigned mlen = 0;
2641
2642 /* Set the atomic operation offset. */
2643 emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), offset));
2644 mlen++;
2645
2646 /* Set the atomic operation arguments. */
2647 if (src0.file != BAD_FILE) {
2648 emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src0));
2649 mlen++;
2650 }
2651
2652 if (src1.file != BAD_FILE) {
2653 emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src1));
2654 mlen++;
2655 }
2656
2657 /* Emit the instruction. Note that this maps to the normal SIMD8
2658 * untyped atomic message on Ivy Bridge, but that's OK because
2659 * unused channels will be masked out.
2660 */
2661 vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst,
2662 src_reg(atomic_op), src_reg(surf_index));
2663 inst->base_mrf = 0;
2664 inst->mlen = mlen;
2665 }
2666
2667 void
2668 vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
2669 src_reg offset)
2670 {
2671 /* Set the surface read offset. */
2672 emit(MOV(brw_writemask(brw_uvec_mrf(8, 0, 0), WRITEMASK_X), offset));
2673
2674 /* Emit the instruction. Note that this maps to the normal SIMD8
2675 * untyped surface read message, but that's OK because unused
2676 * channels will be masked out.
2677 */
2678 vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ,
2679 dst, src_reg(surf_index));
2680 inst->base_mrf = 0;
2681 inst->mlen = 1;
2682 }
2683
2684 void
2685 vec4_visitor::emit_ndc_computation()
2686 {
2687 /* Get the position */
2688 src_reg pos = src_reg(output_reg[VARYING_SLOT_POS]);
2689
2690 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
2691 dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
2692 output_reg[BRW_VARYING_SLOT_NDC] = ndc;
2693
2694 current_annotation = "NDC";
2695 dst_reg ndc_w = ndc;
2696 ndc_w.writemask = WRITEMASK_W;
2697 src_reg pos_w = pos;
2698 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
2699 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
2700
2701 dst_reg ndc_xyz = ndc;
2702 ndc_xyz.writemask = WRITEMASK_XYZ;
2703
2704 emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
2705 }
2706
2707 void
2708 vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
2709 {
2710 if (brw->gen < 6 &&
2711 ((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) ||
2712 key->userclip_active || brw->has_negative_rhw_bug)) {
2713 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
2714 dst_reg header1_w = header1;
2715 header1_w.writemask = WRITEMASK_W;
2716
2717 emit(MOV(header1, 0u));
2718
2719 if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
2720 src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]);
2721
2722 current_annotation = "Point size";
2723 emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
2724 emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
2725 }
2726
2727 if (key->userclip_active) {
2728 current_annotation = "Clipping flags";
2729 dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
2730 dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
2731
2732 emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), src_reg(0.0f), BRW_CONDITIONAL_L));
2733 emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, src_reg(0));
2734 emit(OR(header1_w, src_reg(header1_w), src_reg(flags0)));
2735
2736 emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), src_reg(0.0f), BRW_CONDITIONAL_L));
2737 emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, src_reg(0));
2738 emit(SHL(flags1, src_reg(flags1), src_reg(4)));
2739 emit(OR(header1_w, src_reg(header1_w), src_reg(flags1)));
2740 }
2741
2742 /* i965 clipping workaround:
2743 * 1) Test for -ve rhw
2744 * 2) If set,
2745 * set ndc = (0,0,0,0)
2746 * set ucp[6] = 1
2747 *
2748 * Later, clipping will detect ucp[6] and ensure the primitive is
2749 * clipped against all fixed planes.
2750 */
2751 if (brw->has_negative_rhw_bug) {
2752 src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]);
2753 ndc_w.swizzle = BRW_SWIZZLE_WWWW;
2754 emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L));
2755 vec4_instruction *inst;
2756 inst = emit(OR(header1_w, src_reg(header1_w), src_reg(1u << 6)));
2757 inst->predicate = BRW_PREDICATE_NORMAL;
2758 inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f)));
2759 inst->predicate = BRW_PREDICATE_NORMAL;
2760 }
2761
2762 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
2763 } else if (brw->gen < 6) {
2764 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
2765 } else {
2766 emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
2767 if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
2768 emit(MOV(brw_writemask(reg, WRITEMASK_W),
2769 src_reg(output_reg[VARYING_SLOT_PSIZ])));
2770 }
2771 if (prog_data->vue_map.slots_valid & VARYING_BIT_LAYER) {
2772 emit(MOV(retype(brw_writemask(reg, WRITEMASK_Y), BRW_REGISTER_TYPE_D),
2773 src_reg(output_reg[VARYING_SLOT_LAYER])));
2774 }
2775 }
2776 }
2777
2778 void
2779 vec4_visitor::emit_clip_distances(dst_reg reg, int offset)
2780 {
2781 /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
2782 *
2783 * "If a linked set of shaders forming the vertex stage contains no
2784 * static write to gl_ClipVertex or gl_ClipDistance, but the
2785 * application has requested clipping against user clip planes through
2786 * the API, then the coordinate written to gl_Position is used for
2787 * comparison against the user clip planes."
2788 *
2789 * This function is only called if the shader didn't write to
2790 * gl_ClipDistance. Accordingly, we use gl_ClipVertex to perform clipping
2791 * if the user wrote to it; otherwise we use gl_Position.
2792 */
2793 gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX;
2794 if (!(prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) {
2795 clip_vertex = VARYING_SLOT_POS;
2796 }
2797
2798 for (int i = 0; i + offset < key->nr_userclip_plane_consts && i < 4;
2799 ++i) {
2800 reg.writemask = 1 << i;
2801 emit(DP4(reg,
2802 src_reg(output_reg[clip_vertex]),
2803 src_reg(this->userplane[i + offset])));
2804 }
2805 }
2806
2807 void
2808 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
2809 {
2810 assert (varying < VARYING_SLOT_MAX);
2811 reg.type = output_reg[varying].type;
2812 current_annotation = output_reg_annotation[varying];
2813 /* Copy the register, saturating if necessary */
2814 vec4_instruction *inst = emit(MOV(reg,
2815 src_reg(output_reg[varying])));
2816 if ((varying == VARYING_SLOT_COL0 ||
2817 varying == VARYING_SLOT_COL1 ||
2818 varying == VARYING_SLOT_BFC0 ||
2819 varying == VARYING_SLOT_BFC1) &&
2820 key->clamp_vertex_color) {
2821 inst->saturate = true;
2822 }
2823 }
2824
2825 void
2826 vec4_visitor::emit_urb_slot(int mrf, int varying)
2827 {
2828 struct brw_reg hw_reg = brw_message_reg(mrf);
2829 dst_reg reg = dst_reg(MRF, mrf);
2830 reg.type = BRW_REGISTER_TYPE_F;
2831
2832 switch (varying) {
2833 case VARYING_SLOT_PSIZ:
2834 /* PSIZ is always in slot 0, and is coupled with other flags. */
2835 current_annotation = "indices, point width, clip flags";
2836 emit_psiz_and_flags(hw_reg);
2837 break;
2838 case BRW_VARYING_SLOT_NDC:
2839 current_annotation = "NDC";
2840 emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
2841 break;
2842 case VARYING_SLOT_POS:
2843 current_annotation = "gl_Position";
2844 emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
2845 break;
2846 case VARYING_SLOT_EDGE:
2847 /* This is present when doing unfilled polygons. We're supposed to copy
2848 * the edge flag from the user-provided vertex array
2849 * (glEdgeFlagPointer), or otherwise we'll copy from the current value
2850 * of that attribute (starts as 1.0f). This is then used in clipping to
2851 * determine which edges should be drawn as wireframe.
2852 */
2853 current_annotation = "edge flag";
2854 emit(MOV(reg, src_reg(dst_reg(ATTR, VERT_ATTRIB_EDGEFLAG,
2855 glsl_type::float_type, WRITEMASK_XYZW))));
2856 break;
2857 case BRW_VARYING_SLOT_PAD:
2858 /* No need to write to this slot */
2859 break;
2860 default:
2861 emit_generic_urb_slot(reg, varying);
2862 break;
2863 }
2864 }
2865
2866 static int
2867 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
2868 {
2869 if (brw->gen >= 6) {
2870 /* URB data written (does not include the message header reg) must
2871 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
2872 * section 5.4.3.2.2: URB_INTERLEAVED.
2873 *
2874 * URB entries are allocated on a multiple of 1024 bits, so an
2875 * extra 128 bits written here to make the end align to 256 is
2876 * no problem.
2877 */
2878 if ((mlen % 2) != 1)
2879 mlen++;
2880 }
2881
2882 return mlen;
2883 }
2884
2885
2886 /**
2887 * Generates the VUE payload plus the necessary URB write instructions to
2888 * output it.
2889 *
2890 * The VUE layout is documented in Volume 2a.
2891 */
2892 void
2893 vec4_visitor::emit_vertex()
2894 {
2895 /* MRF 0 is reserved for the debugger, so start with message header
2896 * in MRF 1.
2897 */
2898 int base_mrf = 1;
2899 int mrf = base_mrf;
2900 /* In the process of generating our URB write message contents, we
2901 * may need to unspill a register or load from an array. Those
2902 * reads would use MRFs 14-15.
2903 */
2904 int max_usable_mrf = 13;
2905
2906 /* The following assertion verifies that max_usable_mrf causes an
2907 * even-numbered amount of URB write data, which will meet gen6's
2908 * requirements for length alignment.
2909 */
2910 assert ((max_usable_mrf - base_mrf) % 2 == 0);
2911
2912 /* First mrf is the g0-based message header containing URB handles and
2913 * such.
2914 */
2915 emit_urb_write_header(mrf++);
2916
2917 if (brw->gen < 6) {
2918 emit_ndc_computation();
2919 }
2920
2921 /* Lower legacy ff and ClipVertex clipping to clip distances */
2922 if (key->userclip_active && !prog->UsesClipDistanceOut) {
2923 current_annotation = "user clip distances";
2924
2925 output_reg[VARYING_SLOT_CLIP_DIST0] = dst_reg(this, glsl_type::vec4_type);
2926 output_reg[VARYING_SLOT_CLIP_DIST1] = dst_reg(this, glsl_type::vec4_type);
2927
2928 emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST0], 0);
2929 emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1], 4);
2930 }
2931
2932 /* We may need to split this up into several URB writes, so do them in a
2933 * loop.
2934 */
2935 int slot = 0;
2936 bool complete = false;
2937 do {
2938 /* URB offset is in URB row increments, and each of our MRFs is half of
2939 * one of those, since we're doing interleaved writes.
2940 */
2941 int offset = slot / 2;
2942
2943 mrf = base_mrf + 1;
2944 for (; slot < prog_data->vue_map.num_slots; ++slot) {
2945 emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]);
2946
2947 /* If this was max_usable_mrf, we can't fit anything more into this
2948 * URB WRITE.
2949 */
2950 if (mrf > max_usable_mrf) {
2951 slot++;
2952 break;
2953 }
2954 }
2955
2956 complete = slot >= prog_data->vue_map.num_slots;
2957 current_annotation = "URB write";
2958 vec4_instruction *inst = emit_urb_write_opcode(complete);
2959 inst->base_mrf = base_mrf;
2960 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2961 inst->offset += offset;
2962 } while(!complete);
2963 }
2964
2965
2966 src_reg
2967 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
2968 src_reg *reladdr, int reg_offset)
2969 {
2970 /* Because we store the values to scratch interleaved like our
2971 * vertex data, we need to scale the vec4 index by 2.
2972 */
2973 int message_header_scale = 2;
2974
2975 /* Pre-gen6, the message header uses byte offsets instead of vec4
2976 * (16-byte) offset units.
2977 */
2978 if (brw->gen < 6)
2979 message_header_scale *= 16;
2980
2981 if (reladdr) {
2982 src_reg index = src_reg(this, glsl_type::int_type);
2983
2984 emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2985 emit_before(inst, MUL(dst_reg(index),
2986 index, src_reg(message_header_scale)));
2987
2988 return index;
2989 } else {
2990 return src_reg(reg_offset * message_header_scale);
2991 }
2992 }
2993
2994 src_reg
2995 vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
2996 src_reg *reladdr, int reg_offset)
2997 {
2998 if (reladdr) {
2999 src_reg index = src_reg(this, glsl_type::int_type);
3000
3001 emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
3002
3003 /* Pre-gen6, the message header uses byte offsets instead of vec4
3004 * (16-byte) offset units.
3005 */
3006 if (brw->gen < 6) {
3007 emit_before(inst, MUL(dst_reg(index), index, src_reg(16)));
3008 }
3009
3010 return index;
3011 } else if (brw->gen >= 8) {
3012 /* Store the offset in a GRF so we can send-from-GRF. */
3013 src_reg offset = src_reg(this, glsl_type::int_type);
3014 emit_before(inst, MOV(dst_reg(offset), src_reg(reg_offset)));
3015 return offset;
3016 } else {
3017 int message_header_scale = brw->gen < 6 ? 16 : 1;
3018 return src_reg(reg_offset * message_header_scale);
3019 }
3020 }
3021
3022 /**
3023 * Emits an instruction before @inst to load the value named by @orig_src
3024 * from scratch space at @base_offset to @temp.
3025 *
3026 * @base_offset is measured in 32-byte units (the size of a register).
3027 */
3028 void
3029 vec4_visitor::emit_scratch_read(vec4_instruction *inst,
3030 dst_reg temp, src_reg orig_src,
3031 int base_offset)
3032 {
3033 int reg_offset = base_offset + orig_src.reg_offset;
3034 src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
3035
3036 emit_before(inst, SCRATCH_READ(temp, index));
3037 }
3038
3039 /**
3040 * Emits an instruction after @inst to store the value to be written
3041 * to @orig_dst to scratch space at @base_offset, from @temp.
3042 *
3043 * @base_offset is measured in 32-byte units (the size of a register).
3044 */
3045 void
3046 vec4_visitor::emit_scratch_write(vec4_instruction *inst, int base_offset)
3047 {
3048 int reg_offset = base_offset + inst->dst.reg_offset;
3049 src_reg index = get_scratch_offset(inst, inst->dst.reladdr, reg_offset);
3050
3051 /* Create a temporary register to store *inst's result in.
3052 *
3053 * We have to be careful in MOVing from our temporary result register in
3054 * the scratch write. If we swizzle from channels of the temporary that
3055 * weren't initialized, it will confuse live interval analysis, which will
3056 * make spilling fail to make progress.
3057 */
3058 src_reg temp = src_reg(this, glsl_type::vec4_type);
3059 temp.type = inst->dst.type;
3060 int first_writemask_chan = ffs(inst->dst.writemask) - 1;
3061 int swizzles[4];
3062 for (int i = 0; i < 4; i++)
3063 if (inst->dst.writemask & (1 << i))
3064 swizzles[i] = i;
3065 else
3066 swizzles[i] = first_writemask_chan;
3067 temp.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
3068 swizzles[2], swizzles[3]);
3069
3070 dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
3071 inst->dst.writemask));
3072 vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
3073 write->predicate = inst->predicate;
3074 write->ir = inst->ir;
3075 write->annotation = inst->annotation;
3076 inst->insert_after(write);
3077
3078 inst->dst.file = temp.file;
3079 inst->dst.reg = temp.reg;
3080 inst->dst.reg_offset = temp.reg_offset;
3081 inst->dst.reladdr = NULL;
3082 }
3083
3084 /**
3085 * We can't generally support array access in GRF space, because a
3086 * single instruction's destination can only span 2 contiguous
3087 * registers. So, we send all GRF arrays that get variable index
3088 * access to scratch space.
3089 */
3090 void
3091 vec4_visitor::move_grf_array_access_to_scratch()
3092 {
3093 int scratch_loc[this->virtual_grf_count];
3094
3095 for (int i = 0; i < this->virtual_grf_count; i++) {
3096 scratch_loc[i] = -1;
3097 }
3098
3099 /* First, calculate the set of virtual GRFs that need to be punted
3100 * to scratch due to having any array access on them, and where in
3101 * scratch.
3102 */
3103 foreach_list(node, &this->instructions) {
3104 vec4_instruction *inst = (vec4_instruction *)node;
3105
3106 if (inst->dst.file == GRF && inst->dst.reladdr &&
3107 scratch_loc[inst->dst.reg] == -1) {
3108 scratch_loc[inst->dst.reg] = c->last_scratch;
3109 c->last_scratch += this->virtual_grf_sizes[inst->dst.reg];
3110 }
3111
3112 for (int i = 0 ; i < 3; i++) {
3113 src_reg *src = &inst->src[i];
3114
3115 if (src->file == GRF && src->reladdr &&
3116 scratch_loc[src->reg] == -1) {
3117 scratch_loc[src->reg] = c->last_scratch;
3118 c->last_scratch += this->virtual_grf_sizes[src->reg];
3119 }
3120 }
3121 }
3122
3123 /* Now, for anything that will be accessed through scratch, rewrite
3124 * it to load/store. Note that this is a _safe list walk, because
3125 * we may generate a new scratch_write instruction after the one
3126 * we're processing.
3127 */
3128 foreach_list_safe(node, &this->instructions) {
3129 vec4_instruction *inst = (vec4_instruction *)node;
3130
3131 /* Set up the annotation tracking for new generated instructions. */
3132 base_ir = inst->ir;
3133 current_annotation = inst->annotation;
3134
3135 if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
3136 emit_scratch_write(inst, scratch_loc[inst->dst.reg]);
3137 }
3138
3139 for (int i = 0 ; i < 3; i++) {
3140 if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
3141 continue;
3142
3143 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
3144
3145 emit_scratch_read(inst, temp, inst->src[i],
3146 scratch_loc[inst->src[i].reg]);
3147
3148 inst->src[i].file = temp.file;
3149 inst->src[i].reg = temp.reg;
3150 inst->src[i].reg_offset = temp.reg_offset;
3151 inst->src[i].reladdr = NULL;
3152 }
3153 }
3154 }
3155
3156 /**
3157 * Emits an instruction before @inst to load the value named by @orig_src
3158 * from the pull constant buffer (surface) at @base_offset to @temp.
3159 */
3160 void
3161 vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
3162 dst_reg temp, src_reg orig_src,
3163 int base_offset)
3164 {
3165 int reg_offset = base_offset + orig_src.reg_offset;
3166 src_reg index = src_reg(prog_data->base.binding_table.pull_constants_start);
3167 src_reg offset = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
3168 vec4_instruction *load;
3169
3170 if (brw->gen >= 7) {
3171 dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
3172 grf_offset.type = offset.type;
3173 emit_before(inst, MOV(grf_offset, offset));
3174
3175 load = new(mem_ctx) vec4_instruction(this,
3176 VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
3177 temp, index, src_reg(grf_offset));
3178 } else {
3179 load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
3180 temp, index, offset);
3181 load->base_mrf = 14;
3182 load->mlen = 1;
3183 }
3184 emit_before(inst, load);
3185 }
3186
3187 /**
3188 * Implements array access of uniforms by inserting a
3189 * PULL_CONSTANT_LOAD instruction.
3190 *
3191 * Unlike temporary GRF array access (where we don't support it due to
3192 * the difficulty of doing relative addressing on instruction
3193 * destinations), we could potentially do array access of uniforms
3194 * that were loaded in GRF space as push constants. In real-world
3195 * usage we've seen, though, the arrays being used are always larger
3196 * than we could load as push constants, so just always move all
3197 * uniform array access out to a pull constant buffer.
3198 */
3199 void
3200 vec4_visitor::move_uniform_array_access_to_pull_constants()
3201 {
3202 int pull_constant_loc[this->uniforms];
3203
3204 for (int i = 0; i < this->uniforms; i++) {
3205 pull_constant_loc[i] = -1;
3206 }
3207
3208 /* Walk through and find array access of uniforms. Put a copy of that
3209 * uniform in the pull constant buffer.
3210 *
3211 * Note that we don't move constant-indexed accesses to arrays. No
3212 * testing has been done of the performance impact of this choice.
3213 */
3214 foreach_list_safe(node, &this->instructions) {
3215 vec4_instruction *inst = (vec4_instruction *)node;
3216
3217 for (int i = 0 ; i < 3; i++) {
3218 if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
3219 continue;
3220
3221 int uniform = inst->src[i].reg;
3222
3223 /* If this array isn't already present in the pull constant buffer,
3224 * add it.
3225 */
3226 if (pull_constant_loc[uniform] == -1) {
3227 const float **values = &prog_data->param[uniform * 4];
3228
3229 pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
3230
3231 for (int j = 0; j < uniform_size[uniform] * 4; j++) {
3232 prog_data->pull_param[prog_data->nr_pull_params++]
3233 = values[j];
3234 }
3235 }
3236
3237 /* Set up the annotation tracking for new generated instructions. */
3238 base_ir = inst->ir;
3239 current_annotation = inst->annotation;
3240
3241 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
3242
3243 emit_pull_constant_load(inst, temp, inst->src[i],
3244 pull_constant_loc[uniform]);
3245
3246 inst->src[i].file = temp.file;
3247 inst->src[i].reg = temp.reg;
3248 inst->src[i].reg_offset = temp.reg_offset;
3249 inst->src[i].reladdr = NULL;
3250 }
3251 }
3252
3253 /* Now there are no accesses of the UNIFORM file with a reladdr, so
3254 * no need to track them as larger-than-vec4 objects. This will be
3255 * relied on in cutting out unused uniform vectors from push
3256 * constants.
3257 */
3258 split_uniform_registers();
3259 }
3260
3261 void
3262 vec4_visitor::resolve_ud_negate(src_reg *reg)
3263 {
3264 if (reg->type != BRW_REGISTER_TYPE_UD ||
3265 !reg->negate)
3266 return;
3267
3268 src_reg temp = src_reg(this, glsl_type::uvec4_type);
3269 emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
3270 *reg = temp;
3271 }
3272
3273 vec4_visitor::vec4_visitor(struct brw_context *brw,
3274 struct brw_vec4_compile *c,
3275 struct gl_program *prog,
3276 const struct brw_vec4_prog_key *key,
3277 struct brw_vec4_prog_data *prog_data,
3278 struct gl_shader_program *shader_prog,
3279 struct brw_shader *shader,
3280 void *mem_ctx,
3281 bool debug_flag,
3282 bool no_spills)
3283 : sanity_param_count(0),
3284 fail_msg(NULL),
3285 first_non_payload_grf(0),
3286 need_all_constants_in_pull_buffer(false),
3287 debug_flag(debug_flag),
3288 no_spills(no_spills)
3289 {
3290 this->brw = brw;
3291 this->ctx = &brw->ctx;
3292 this->shader_prog = shader_prog;
3293 this->shader = shader;
3294
3295 this->mem_ctx = mem_ctx;
3296 this->failed = false;
3297
3298 this->base_ir = NULL;
3299 this->current_annotation = NULL;
3300 memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
3301
3302 this->c = c;
3303 this->prog = prog;
3304 this->key = key;
3305 this->prog_data = prog_data;
3306 this->stage_prog_data = &prog_data->base;
3307
3308 this->variable_ht = hash_table_ctor(0,
3309 hash_table_pointer_hash,
3310 hash_table_pointer_compare);
3311
3312 this->virtual_grf_start = NULL;
3313 this->virtual_grf_end = NULL;
3314 this->virtual_grf_sizes = NULL;
3315 this->virtual_grf_count = 0;
3316 this->virtual_grf_reg_map = NULL;
3317 this->virtual_grf_reg_count = 0;
3318 this->virtual_grf_array_size = 0;
3319 this->live_intervals_valid = false;
3320
3321 this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
3322
3323 this->uniforms = 0;
3324 }
3325
3326 vec4_visitor::~vec4_visitor()
3327 {
3328 hash_table_dtor(this->variable_ht);
3329 }
3330
3331
3332 void
3333 vec4_visitor::fail(const char *format, ...)
3334 {
3335 va_list va;
3336 char *msg;
3337
3338 if (failed)
3339 return;
3340
3341 failed = true;
3342
3343 va_start(va, format);
3344 msg = ralloc_vasprintf(mem_ctx, format, va);
3345 va_end(va);
3346 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
3347
3348 this->fail_msg = msg;
3349
3350 if (debug_flag) {
3351 fprintf(stderr, "%s", msg);
3352 }
3353 }
3354
3355 } /* namespace brw */