i965: Shrink Gen5 VUE map layout to be the same as Gen4.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_visitor.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25 #include "glsl/ir_uniform.h"
26 extern "C" {
27 #include "main/context.h"
28 #include "main/macros.h"
29 #include "program/prog_parameter.h"
30 #include "program/sampler.h"
31 }
32
33 namespace brw {
34
35 vec4_instruction::vec4_instruction(vec4_visitor *v,
36 enum opcode opcode, dst_reg dst,
37 src_reg src0, src_reg src1, src_reg src2)
38 {
39 this->opcode = opcode;
40 this->dst = dst;
41 this->src[0] = src0;
42 this->src[1] = src1;
43 this->src[2] = src2;
44 this->ir = v->base_ir;
45 this->annotation = v->current_annotation;
46 }
47
48 vec4_instruction *
49 vec4_visitor::emit(vec4_instruction *inst)
50 {
51 this->instructions.push_tail(inst);
52
53 return inst;
54 }
55
56 vec4_instruction *
57 vec4_visitor::emit_before(vec4_instruction *inst, vec4_instruction *new_inst)
58 {
59 new_inst->ir = inst->ir;
60 new_inst->annotation = inst->annotation;
61
62 inst->insert_before(new_inst);
63
64 return inst;
65 }
66
67 vec4_instruction *
68 vec4_visitor::emit(enum opcode opcode, dst_reg dst,
69 src_reg src0, src_reg src1, src_reg src2)
70 {
71 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
72 src0, src1, src2));
73 }
74
75
76 vec4_instruction *
77 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
78 {
79 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
80 }
81
82 vec4_instruction *
83 vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
84 {
85 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
86 }
87
88 vec4_instruction *
89 vec4_visitor::emit(enum opcode opcode)
90 {
91 return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
92 }
93
94 #define ALU1(op) \
95 vec4_instruction * \
96 vec4_visitor::op(dst_reg dst, src_reg src0) \
97 { \
98 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
99 src0); \
100 }
101
102 #define ALU2(op) \
103 vec4_instruction * \
104 vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \
105 { \
106 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
107 src0, src1); \
108 }
109
110 #define ALU3(op) \
111 vec4_instruction * \
112 vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1, src_reg src2)\
113 { \
114 return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
115 src0, src1, src2); \
116 }
117
118 ALU1(NOT)
119 ALU1(MOV)
120 ALU1(FRC)
121 ALU1(RNDD)
122 ALU1(RNDE)
123 ALU1(RNDZ)
124 ALU1(F32TO16)
125 ALU1(F16TO32)
126 ALU2(ADD)
127 ALU2(MUL)
128 ALU2(MACH)
129 ALU2(AND)
130 ALU2(OR)
131 ALU2(XOR)
132 ALU2(DP3)
133 ALU2(DP4)
134 ALU2(DPH)
135 ALU2(SHL)
136 ALU2(SHR)
137 ALU2(ASR)
138 ALU3(LRP)
139 ALU1(BFREV)
140 ALU3(BFE)
141 ALU2(BFI1)
142 ALU3(BFI2)
143 ALU1(FBH)
144 ALU1(FBL)
145 ALU1(CBIT)
146
147 /** Gen4 predicated IF. */
148 vec4_instruction *
149 vec4_visitor::IF(uint32_t predicate)
150 {
151 vec4_instruction *inst;
152
153 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF);
154 inst->predicate = predicate;
155
156 return inst;
157 }
158
159 /** Gen6+ IF with embedded comparison. */
160 vec4_instruction *
161 vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition)
162 {
163 assert(intel->gen >= 6);
164
165 vec4_instruction *inst;
166
167 resolve_ud_negate(&src0);
168 resolve_ud_negate(&src1);
169
170 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF, dst_null_d(),
171 src0, src1);
172 inst->conditional_mod = condition;
173
174 return inst;
175 }
176
177 /**
178 * CMP: Sets the low bit of the destination channels with the result
179 * of the comparison, while the upper bits are undefined, and updates
180 * the flag register with the packed 16 bits of the result.
181 */
182 vec4_instruction *
183 vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition)
184 {
185 vec4_instruction *inst;
186
187 /* original gen4 does type conversion to the destination type
188 * before before comparison, producing garbage results for floating
189 * point comparisons.
190 */
191 if (intel->gen == 4) {
192 dst.type = src0.type;
193 if (dst.file == HW_REG)
194 dst.fixed_hw_reg.type = dst.type;
195 }
196
197 resolve_ud_negate(&src0);
198 resolve_ud_negate(&src1);
199
200 inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_CMP, dst, src0, src1);
201 inst->conditional_mod = condition;
202
203 return inst;
204 }
205
206 vec4_instruction *
207 vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
208 {
209 vec4_instruction *inst;
210
211 inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_READ,
212 dst, index);
213 inst->base_mrf = 14;
214 inst->mlen = 2;
215
216 return inst;
217 }
218
219 vec4_instruction *
220 vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index)
221 {
222 vec4_instruction *inst;
223
224 inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_WRITE,
225 dst, src, index);
226 inst->base_mrf = 13;
227 inst->mlen = 3;
228
229 return inst;
230 }
231
232 void
233 vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
234 {
235 static enum opcode dot_opcodes[] = {
236 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
237 };
238
239 emit(dot_opcodes[elements - 2], dst, src0, src1);
240 }
241
242 src_reg
243 vec4_visitor::fix_3src_operand(src_reg src)
244 {
245 /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
246 * able to use vertical stride of zero to replicate the vec4 uniform, like
247 *
248 * g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
249 *
250 * But you can't, since vertical stride is always four in three-source
251 * instructions. Instead, insert a MOV instruction to do the replication so
252 * that the three-source instruction can consume it.
253 */
254
255 /* The MOV is only needed if the source is a uniform or immediate. */
256 if (src.file != UNIFORM && src.file != IMM)
257 return src;
258
259 dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
260 expanded.type = src.type;
261 emit(MOV(expanded, src));
262 return src_reg(expanded);
263 }
264
265 src_reg
266 vec4_visitor::fix_math_operand(src_reg src)
267 {
268 /* The gen6 math instruction ignores the source modifiers --
269 * swizzle, abs, negate, and at least some parts of the register
270 * region description.
271 *
272 * Rather than trying to enumerate all these cases, *always* expand the
273 * operand to a temp GRF for gen6.
274 *
275 * For gen7, keep the operand as-is, except if immediate, which gen7 still
276 * can't use.
277 */
278
279 if (intel->gen == 7 && src.file != IMM)
280 return src;
281
282 dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
283 expanded.type = src.type;
284 emit(MOV(expanded, src));
285 return src_reg(expanded);
286 }
287
288 void
289 vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
290 {
291 src = fix_math_operand(src);
292
293 if (dst.writemask != WRITEMASK_XYZW) {
294 /* The gen6 math instruction must be align1, so we can't do
295 * writemasks.
296 */
297 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
298
299 emit(opcode, temp_dst, src);
300
301 emit(MOV(dst, src_reg(temp_dst)));
302 } else {
303 emit(opcode, dst, src);
304 }
305 }
306
307 void
308 vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
309 {
310 vec4_instruction *inst = emit(opcode, dst, src);
311 inst->base_mrf = 1;
312 inst->mlen = 1;
313 }
314
315 void
316 vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
317 {
318 switch (opcode) {
319 case SHADER_OPCODE_RCP:
320 case SHADER_OPCODE_RSQ:
321 case SHADER_OPCODE_SQRT:
322 case SHADER_OPCODE_EXP2:
323 case SHADER_OPCODE_LOG2:
324 case SHADER_OPCODE_SIN:
325 case SHADER_OPCODE_COS:
326 break;
327 default:
328 assert(!"not reached: bad math opcode");
329 return;
330 }
331
332 if (intel->gen >= 6) {
333 return emit_math1_gen6(opcode, dst, src);
334 } else {
335 return emit_math1_gen4(opcode, dst, src);
336 }
337 }
338
339 void
340 vec4_visitor::emit_math2_gen6(enum opcode opcode,
341 dst_reg dst, src_reg src0, src_reg src1)
342 {
343 src0 = fix_math_operand(src0);
344 src1 = fix_math_operand(src1);
345
346 if (dst.writemask != WRITEMASK_XYZW) {
347 /* The gen6 math instruction must be align1, so we can't do
348 * writemasks.
349 */
350 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
351 temp_dst.type = dst.type;
352
353 emit(opcode, temp_dst, src0, src1);
354
355 emit(MOV(dst, src_reg(temp_dst)));
356 } else {
357 emit(opcode, dst, src0, src1);
358 }
359 }
360
361 void
362 vec4_visitor::emit_math2_gen4(enum opcode opcode,
363 dst_reg dst, src_reg src0, src_reg src1)
364 {
365 vec4_instruction *inst = emit(opcode, dst, src0, src1);
366 inst->base_mrf = 1;
367 inst->mlen = 2;
368 }
369
370 void
371 vec4_visitor::emit_math(enum opcode opcode,
372 dst_reg dst, src_reg src0, src_reg src1)
373 {
374 switch (opcode) {
375 case SHADER_OPCODE_POW:
376 case SHADER_OPCODE_INT_QUOTIENT:
377 case SHADER_OPCODE_INT_REMAINDER:
378 break;
379 default:
380 assert(!"not reached: unsupported binary math opcode");
381 return;
382 }
383
384 if (intel->gen >= 6) {
385 return emit_math2_gen6(opcode, dst, src0, src1);
386 } else {
387 return emit_math2_gen4(opcode, dst, src0, src1);
388 }
389 }
390
391 void
392 vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0)
393 {
394 if (intel->gen < 7)
395 assert(!"ir_unop_pack_half_2x16 should be lowered");
396
397 assert(dst.type == BRW_REGISTER_TYPE_UD);
398 assert(src0.type == BRW_REGISTER_TYPE_F);
399
400 /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
401 *
402 * Because this instruction does not have a 16-bit floating-point type,
403 * the destination data type must be Word (W).
404 *
405 * The destination must be DWord-aligned and specify a horizontal stride
406 * (HorzStride) of 2. The 16-bit result is stored in the lower word of
407 * each destination channel and the upper word is not modified.
408 *
409 * The above restriction implies that the f32to16 instruction must use
410 * align1 mode, because only in align1 mode is it possible to specify
411 * horizontal stride. We choose here to defy the hardware docs and emit
412 * align16 instructions.
413 *
414 * (I [chadv] did attempt to emit align1 instructions for VS f32to16
415 * instructions. I was partially successful in that the code passed all
416 * tests. However, the code was dubiously correct and fragile, and the
417 * tests were not harsh enough to probe that frailty. Not trusting the
418 * code, I chose instead to remain in align16 mode in defiance of the hw
419 * docs).
420 *
421 * I've [chadv] experimentally confirmed that, on gen7 hardware and the
422 * simulator, emitting a f32to16 in align16 mode with UD as destination
423 * data type is safe. The behavior differs from that specified in the PRM
424 * in that the upper word of each destination channel is cleared to 0.
425 */
426
427 dst_reg tmp_dst(this, glsl_type::uvec2_type);
428 src_reg tmp_src(tmp_dst);
429
430 #if 0
431 /* Verify the undocumented behavior on which the following instructions
432 * rely. If f32to16 fails to clear the upper word of the X and Y channels,
433 * then the result of the bit-or instruction below will be incorrect.
434 *
435 * You should inspect the disasm output in order to verify that the MOV is
436 * not optimized away.
437 */
438 emit(MOV(tmp_dst, src_reg(0x12345678u)));
439 #endif
440
441 /* Give tmp the form below, where "." means untouched.
442 *
443 * w z y x w z y x
444 * |.|.|0x0000hhhh|0x0000llll|.|.|0x0000hhhh|0x0000llll|
445 *
446 * That the upper word of each write-channel be 0 is required for the
447 * following bit-shift and bit-or instructions to work. Note that this
448 * relies on the undocumented hardware behavior mentioned above.
449 */
450 tmp_dst.writemask = WRITEMASK_XY;
451 emit(F32TO16(tmp_dst, src0));
452
453 /* Give the write-channels of dst the form:
454 * 0xhhhh0000
455 */
456 tmp_src.swizzle = SWIZZLE_Y;
457 emit(SHL(dst, tmp_src, src_reg(16u)));
458
459 /* Finally, give the write-channels of dst the form of packHalf2x16's
460 * output:
461 * 0xhhhhllll
462 */
463 tmp_src.swizzle = SWIZZLE_X;
464 emit(OR(dst, src_reg(dst), tmp_src));
465 }
466
467 void
468 vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0)
469 {
470 if (intel->gen < 7)
471 assert(!"ir_unop_unpack_half_2x16 should be lowered");
472
473 assert(dst.type == BRW_REGISTER_TYPE_F);
474 assert(src0.type == BRW_REGISTER_TYPE_UD);
475
476 /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
477 *
478 * Because this instruction does not have a 16-bit floating-point type,
479 * the source data type must be Word (W). The destination type must be
480 * F (Float).
481 *
482 * To use W as the source data type, we must adjust horizontal strides,
483 * which is only possible in align1 mode. All my [chadv] attempts at
484 * emitting align1 instructions for unpackHalf2x16 failed to pass the
485 * Piglit tests, so I gave up.
486 *
487 * I've verified that, on gen7 hardware and the simulator, it is safe to
488 * emit f16to32 in align16 mode with UD as source data type.
489 */
490
491 dst_reg tmp_dst(this, glsl_type::uvec2_type);
492 src_reg tmp_src(tmp_dst);
493
494 tmp_dst.writemask = WRITEMASK_X;
495 emit(AND(tmp_dst, src0, src_reg(0xffffu)));
496
497 tmp_dst.writemask = WRITEMASK_Y;
498 emit(SHR(tmp_dst, src0, src_reg(16u)));
499
500 dst.writemask = WRITEMASK_XY;
501 emit(F16TO32(dst, tmp_src));
502 }
503
504 void
505 vec4_visitor::visit_instructions(const exec_list *list)
506 {
507 foreach_list(node, list) {
508 ir_instruction *ir = (ir_instruction *)node;
509
510 base_ir = ir;
511 ir->accept(this);
512 }
513 }
514
515
516 static int
517 type_size(const struct glsl_type *type)
518 {
519 unsigned int i;
520 int size;
521
522 switch (type->base_type) {
523 case GLSL_TYPE_UINT:
524 case GLSL_TYPE_INT:
525 case GLSL_TYPE_FLOAT:
526 case GLSL_TYPE_BOOL:
527 if (type->is_matrix()) {
528 return type->matrix_columns;
529 } else {
530 /* Regardless of size of vector, it gets a vec4. This is bad
531 * packing for things like floats, but otherwise arrays become a
532 * mess. Hopefully a later pass over the code can pack scalars
533 * down if appropriate.
534 */
535 return 1;
536 }
537 case GLSL_TYPE_ARRAY:
538 assert(type->length > 0);
539 return type_size(type->fields.array) * type->length;
540 case GLSL_TYPE_STRUCT:
541 size = 0;
542 for (i = 0; i < type->length; i++) {
543 size += type_size(type->fields.structure[i].type);
544 }
545 return size;
546 case GLSL_TYPE_SAMPLER:
547 /* Samplers take up one slot in UNIFORMS[], but they're baked in
548 * at link time.
549 */
550 return 1;
551 case GLSL_TYPE_VOID:
552 case GLSL_TYPE_ERROR:
553 case GLSL_TYPE_INTERFACE:
554 assert(0);
555 break;
556 }
557
558 return 0;
559 }
560
561 int
562 vec4_visitor::virtual_grf_alloc(int size)
563 {
564 if (virtual_grf_array_size <= virtual_grf_count) {
565 if (virtual_grf_array_size == 0)
566 virtual_grf_array_size = 16;
567 else
568 virtual_grf_array_size *= 2;
569 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
570 virtual_grf_array_size);
571 virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int,
572 virtual_grf_array_size);
573 }
574 virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count;
575 virtual_grf_reg_count += size;
576 virtual_grf_sizes[virtual_grf_count] = size;
577 return virtual_grf_count++;
578 }
579
580 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
581 {
582 init();
583
584 this->file = GRF;
585 this->reg = v->virtual_grf_alloc(type_size(type));
586
587 if (type->is_array() || type->is_record()) {
588 this->swizzle = BRW_SWIZZLE_NOOP;
589 } else {
590 this->swizzle = swizzle_for_size(type->vector_elements);
591 }
592
593 this->type = brw_type_for_base_type(type);
594 }
595
596 dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
597 {
598 init();
599
600 this->file = GRF;
601 this->reg = v->virtual_grf_alloc(type_size(type));
602
603 if (type->is_array() || type->is_record()) {
604 this->writemask = WRITEMASK_XYZW;
605 } else {
606 this->writemask = (1 << type->vector_elements) - 1;
607 }
608
609 this->type = brw_type_for_base_type(type);
610 }
611
612 /* Our support for uniforms is piggy-backed on the struct
613 * gl_fragment_program, because that's where the values actually
614 * get stored, rather than in some global gl_shader_program uniform
615 * store.
616 */
617 void
618 vec4_visitor::setup_uniform_values(ir_variable *ir)
619 {
620 int namelen = strlen(ir->name);
621
622 /* The data for our (non-builtin) uniforms is stored in a series of
623 * gl_uniform_driver_storage structs for each subcomponent that
624 * glGetUniformLocation() could name. We know it's been set up in the same
625 * order we'd walk the type, so walk the list of storage and find anything
626 * with our name, or the prefix of a component that starts with our name.
627 */
628 for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) {
629 struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
630
631 if (strncmp(ir->name, storage->name, namelen) != 0 ||
632 (storage->name[namelen] != 0 &&
633 storage->name[namelen] != '.' &&
634 storage->name[namelen] != '[')) {
635 continue;
636 }
637
638 gl_constant_value *components = storage->storage;
639 unsigned vector_count = (MAX2(storage->array_elements, 1) *
640 storage->type->matrix_columns);
641
642 for (unsigned s = 0; s < vector_count; s++) {
643 uniform_vector_size[uniforms] = storage->type->vector_elements;
644
645 int i;
646 for (i = 0; i < uniform_vector_size[uniforms]; i++) {
647 prog_data->param[uniforms * 4 + i] = &components->f;
648 components++;
649 }
650 for (; i < 4; i++) {
651 static float zero = 0;
652 prog_data->param[uniforms * 4 + i] = &zero;
653 }
654
655 uniforms++;
656 }
657 }
658 }
659
660 void
661 vec4_visitor::setup_uniform_clipplane_values()
662 {
663 gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
664
665 if (intel->gen < 6) {
666 /* Pre-Gen6, we compact clip planes. For example, if the user
667 * enables just clip planes 0, 1, and 3, we will enable clip planes
668 * 0, 1, and 2 in the hardware, and we'll move clip plane 3 to clip
669 * plane 2. This simplifies the implementation of the Gen6 clip
670 * thread.
671 */
672 int compacted_clipplane_index = 0;
673 for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
674 if (!(key->userclip_planes_enabled_gen_4_5 & (1 << i)))
675 continue;
676
677 this->uniform_vector_size[this->uniforms] = 4;
678 this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms);
679 this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F;
680 for (int j = 0; j < 4; ++j) {
681 prog_data->param[this->uniforms * 4 + j] = &clip_planes[i][j];
682 }
683 ++compacted_clipplane_index;
684 ++this->uniforms;
685 }
686 } else {
687 /* In Gen6 and later, we don't compact clip planes, because this
688 * simplifies the implementation of gl_ClipDistance.
689 */
690 for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
691 this->uniform_vector_size[this->uniforms] = 4;
692 this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
693 this->userplane[i].type = BRW_REGISTER_TYPE_F;
694 for (int j = 0; j < 4; ++j) {
695 prog_data->param[this->uniforms * 4 + j] = &clip_planes[i][j];
696 }
697 ++this->uniforms;
698 }
699 }
700 }
701
702 /* Our support for builtin uniforms is even scarier than non-builtin.
703 * It sits on top of the PROG_STATE_VAR parameters that are
704 * automatically updated from GL context state.
705 */
706 void
707 vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
708 {
709 const ir_state_slot *const slots = ir->state_slots;
710 assert(ir->state_slots != NULL);
711
712 for (unsigned int i = 0; i < ir->num_state_slots; i++) {
713 /* This state reference has already been setup by ir_to_mesa,
714 * but we'll get the same index back here. We can reference
715 * ParameterValues directly, since unlike brw_fs.cpp, we never
716 * add new state references during compile.
717 */
718 int index = _mesa_add_state_reference(this->prog->Parameters,
719 (gl_state_index *)slots[i].tokens);
720 float *values = &this->prog->Parameters->ParameterValues[index][0].f;
721
722 this->uniform_vector_size[this->uniforms] = 0;
723 /* Add each of the unique swizzled channels of the element.
724 * This will end up matching the size of the glsl_type of this field.
725 */
726 int last_swiz = -1;
727 for (unsigned int j = 0; j < 4; j++) {
728 int swiz = GET_SWZ(slots[i].swizzle, j);
729 last_swiz = swiz;
730
731 prog_data->param[this->uniforms * 4 + j] = &values[swiz];
732 if (swiz <= last_swiz)
733 this->uniform_vector_size[this->uniforms]++;
734 }
735 this->uniforms++;
736 }
737 }
738
739 dst_reg *
740 vec4_visitor::variable_storage(ir_variable *var)
741 {
742 return (dst_reg *)hash_table_find(this->variable_ht, var);
743 }
744
745 void
746 vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate)
747 {
748 ir_expression *expr = ir->as_expression();
749
750 *predicate = BRW_PREDICATE_NORMAL;
751
752 if (expr) {
753 src_reg op[2];
754 vec4_instruction *inst;
755
756 assert(expr->get_num_operands() <= 2);
757 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
758 expr->operands[i]->accept(this);
759 op[i] = this->result;
760
761 resolve_ud_negate(&op[i]);
762 }
763
764 switch (expr->operation) {
765 case ir_unop_logic_not:
766 inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
767 inst->conditional_mod = BRW_CONDITIONAL_Z;
768 break;
769
770 case ir_binop_logic_xor:
771 inst = emit(XOR(dst_null_d(), op[0], op[1]));
772 inst->conditional_mod = BRW_CONDITIONAL_NZ;
773 break;
774
775 case ir_binop_logic_or:
776 inst = emit(OR(dst_null_d(), op[0], op[1]));
777 inst->conditional_mod = BRW_CONDITIONAL_NZ;
778 break;
779
780 case ir_binop_logic_and:
781 inst = emit(AND(dst_null_d(), op[0], op[1]));
782 inst->conditional_mod = BRW_CONDITIONAL_NZ;
783 break;
784
785 case ir_unop_f2b:
786 if (intel->gen >= 6) {
787 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
788 } else {
789 inst = emit(MOV(dst_null_f(), op[0]));
790 inst->conditional_mod = BRW_CONDITIONAL_NZ;
791 }
792 break;
793
794 case ir_unop_i2b:
795 if (intel->gen >= 6) {
796 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
797 } else {
798 inst = emit(MOV(dst_null_d(), op[0]));
799 inst->conditional_mod = BRW_CONDITIONAL_NZ;
800 }
801 break;
802
803 case ir_binop_all_equal:
804 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
805 *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
806 break;
807
808 case ir_binop_any_nequal:
809 inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
810 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
811 break;
812
813 case ir_unop_any:
814 inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
815 *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
816 break;
817
818 case ir_binop_greater:
819 case ir_binop_gequal:
820 case ir_binop_less:
821 case ir_binop_lequal:
822 case ir_binop_equal:
823 case ir_binop_nequal:
824 emit(CMP(dst_null_d(), op[0], op[1],
825 brw_conditional_for_comparison(expr->operation)));
826 break;
827
828 default:
829 assert(!"not reached");
830 break;
831 }
832 return;
833 }
834
835 ir->accept(this);
836
837 resolve_ud_negate(&this->result);
838
839 if (intel->gen >= 6) {
840 vec4_instruction *inst = emit(AND(dst_null_d(),
841 this->result, src_reg(1)));
842 inst->conditional_mod = BRW_CONDITIONAL_NZ;
843 } else {
844 vec4_instruction *inst = emit(MOV(dst_null_d(), this->result));
845 inst->conditional_mod = BRW_CONDITIONAL_NZ;
846 }
847 }
848
849 /**
850 * Emit a gen6 IF statement with the comparison folded into the IF
851 * instruction.
852 */
853 void
854 vec4_visitor::emit_if_gen6(ir_if *ir)
855 {
856 ir_expression *expr = ir->condition->as_expression();
857
858 if (expr) {
859 src_reg op[2];
860 dst_reg temp;
861
862 assert(expr->get_num_operands() <= 2);
863 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
864 expr->operands[i]->accept(this);
865 op[i] = this->result;
866 }
867
868 switch (expr->operation) {
869 case ir_unop_logic_not:
870 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
871 return;
872
873 case ir_binop_logic_xor:
874 emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
875 return;
876
877 case ir_binop_logic_or:
878 temp = dst_reg(this, glsl_type::bool_type);
879 emit(OR(temp, op[0], op[1]));
880 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
881 return;
882
883 case ir_binop_logic_and:
884 temp = dst_reg(this, glsl_type::bool_type);
885 emit(AND(temp, op[0], op[1]));
886 emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
887 return;
888
889 case ir_unop_f2b:
890 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
891 return;
892
893 case ir_unop_i2b:
894 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
895 return;
896
897 case ir_binop_greater:
898 case ir_binop_gequal:
899 case ir_binop_less:
900 case ir_binop_lequal:
901 case ir_binop_equal:
902 case ir_binop_nequal:
903 emit(IF(op[0], op[1],
904 brw_conditional_for_comparison(expr->operation)));
905 return;
906
907 case ir_binop_all_equal:
908 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
909 emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
910 return;
911
912 case ir_binop_any_nequal:
913 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
914 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
915 return;
916
917 case ir_unop_any:
918 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
919 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
920 return;
921
922 default:
923 assert(!"not reached");
924 emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
925 return;
926 }
927 return;
928 }
929
930 ir->condition->accept(this);
931
932 emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
933 }
934
935 static dst_reg
936 with_writemask(dst_reg const & r, int mask)
937 {
938 dst_reg result = r;
939 result.writemask = mask;
940 return result;
941 }
942
943 void
944 vec4_vs_visitor::emit_prolog()
945 {
946 dst_reg sign_recovery_shift;
947 dst_reg normalize_factor;
948 dst_reg es3_normalize_factor;
949
950 for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
951 if (vs_prog_data->inputs_read & BITFIELD64_BIT(i)) {
952 uint8_t wa_flags = vs_compile->key.gl_attrib_wa_flags[i];
953 dst_reg reg(ATTR, i);
954 dst_reg reg_d = reg;
955 reg_d.type = BRW_REGISTER_TYPE_D;
956 dst_reg reg_ud = reg;
957 reg_ud.type = BRW_REGISTER_TYPE_UD;
958
959 /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED attributes
960 * come in as floating point conversions of the integer values.
961 */
962 if (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK) {
963 dst_reg dst = reg;
964 dst.type = brw_type_for_base_type(glsl_type::vec4_type);
965 dst.writemask = (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK)) - 1;
966 emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
967 }
968
969 /* Do sign recovery for 2101010 formats if required. */
970 if (wa_flags & BRW_ATTRIB_WA_SIGN) {
971 if (sign_recovery_shift.file == BAD_FILE) {
972 /* shift constant: <22,22,22,30> */
973 sign_recovery_shift = dst_reg(this, glsl_type::uvec4_type);
974 emit(MOV(with_writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u)));
975 emit(MOV(with_writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u)));
976 }
977
978 emit(SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift)));
979 emit(ASR(reg_d, src_reg(reg_d), src_reg(sign_recovery_shift)));
980 }
981
982 /* Apply BGRA swizzle if required. */
983 if (wa_flags & BRW_ATTRIB_WA_BGRA) {
984 src_reg temp = src_reg(reg);
985 temp.swizzle = BRW_SWIZZLE4(2,1,0,3);
986 emit(MOV(reg, temp));
987 }
988
989 if (wa_flags & BRW_ATTRIB_WA_NORMALIZE) {
990 /* ES 3.0 has different rules for converting signed normalized
991 * fixed-point numbers than desktop GL.
992 */
993 if (_mesa_is_gles3(ctx) && (wa_flags & BRW_ATTRIB_WA_SIGN)) {
994 /* According to equation 2.2 of the ES 3.0 specification,
995 * signed normalization conversion is done by:
996 *
997 * f = c / (2^(b-1)-1)
998 */
999 if (es3_normalize_factor.file == BAD_FILE) {
1000 /* mul constant: 1 / (2^(b-1) - 1) */
1001 es3_normalize_factor = dst_reg(this, glsl_type::vec4_type);
1002 emit(MOV(with_writemask(es3_normalize_factor, WRITEMASK_XYZ),
1003 src_reg(1.0f / ((1<<9) - 1))));
1004 emit(MOV(with_writemask(es3_normalize_factor, WRITEMASK_W),
1005 src_reg(1.0f / ((1<<1) - 1))));
1006 }
1007
1008 dst_reg dst = reg;
1009 dst.type = brw_type_for_base_type(glsl_type::vec4_type);
1010 emit(MOV(dst, src_reg(reg_d)));
1011 emit(MUL(dst, src_reg(dst), src_reg(es3_normalize_factor)));
1012 emit_minmax(BRW_CONDITIONAL_G, dst, src_reg(dst), src_reg(-1.0f));
1013 } else {
1014 /* The following equations are from the OpenGL 3.2 specification:
1015 *
1016 * 2.1 unsigned normalization
1017 * f = c/(2^n-1)
1018 *
1019 * 2.2 signed normalization
1020 * f = (2c+1)/(2^n-1)
1021 *
1022 * Both of these share a common divisor, which is represented by
1023 * "normalize_factor" in the code below.
1024 */
1025 if (normalize_factor.file == BAD_FILE) {
1026 /* 1 / (2^b - 1) for b=<10,10,10,2> */
1027 normalize_factor = dst_reg(this, glsl_type::vec4_type);
1028 emit(MOV(with_writemask(normalize_factor, WRITEMASK_XYZ),
1029 src_reg(1.0f / ((1<<10) - 1))));
1030 emit(MOV(with_writemask(normalize_factor, WRITEMASK_W),
1031 src_reg(1.0f / ((1<<2) - 1))));
1032 }
1033
1034 dst_reg dst = reg;
1035 dst.type = brw_type_for_base_type(glsl_type::vec4_type);
1036 emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));
1037
1038 /* For signed normalization, we want the numerator to be 2c+1. */
1039 if (wa_flags & BRW_ATTRIB_WA_SIGN) {
1040 emit(MUL(dst, src_reg(dst), src_reg(2.0f)));
1041 emit(ADD(dst, src_reg(dst), src_reg(1.0f)));
1042 }
1043
1044 emit(MUL(dst, src_reg(dst), src_reg(normalize_factor)));
1045 }
1046 }
1047
1048 if (wa_flags & BRW_ATTRIB_WA_SCALE) {
1049 dst_reg dst = reg;
1050 dst.type = brw_type_for_base_type(glsl_type::vec4_type);
1051 emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));
1052 }
1053 }
1054 }
1055 }
1056
1057
1058 dst_reg *
1059 vec4_vs_visitor::make_reg_for_system_value(ir_variable *ir)
1060 {
1061 /* VertexID is stored by the VF as the last vertex element, but
1062 * we don't represent it with a flag in inputs_read, so we call
1063 * it VERT_ATTRIB_MAX, which setup_attributes() picks up on.
1064 */
1065 dst_reg *reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX);
1066 vs_prog_data->uses_vertexid = true;
1067
1068 switch (ir->location) {
1069 case SYSTEM_VALUE_VERTEX_ID:
1070 reg->writemask = WRITEMASK_X;
1071 break;
1072 case SYSTEM_VALUE_INSTANCE_ID:
1073 reg->writemask = WRITEMASK_Y;
1074 break;
1075 default:
1076 assert(!"not reached");
1077 break;
1078 }
1079
1080 return reg;
1081 }
1082
1083
1084 void
1085 vec4_visitor::visit(ir_variable *ir)
1086 {
1087 dst_reg *reg = NULL;
1088
1089 if (variable_storage(ir))
1090 return;
1091
1092 switch (ir->mode) {
1093 case ir_var_shader_in:
1094 reg = new(mem_ctx) dst_reg(ATTR, ir->location);
1095 break;
1096
1097 case ir_var_shader_out:
1098 reg = new(mem_ctx) dst_reg(this, ir->type);
1099
1100 for (int i = 0; i < type_size(ir->type); i++) {
1101 output_reg[ir->location + i] = *reg;
1102 output_reg[ir->location + i].reg_offset = i;
1103 output_reg[ir->location + i].type =
1104 brw_type_for_base_type(ir->type->get_scalar_type());
1105 output_reg_annotation[ir->location + i] = ir->name;
1106 }
1107 break;
1108
1109 case ir_var_auto:
1110 case ir_var_temporary:
1111 reg = new(mem_ctx) dst_reg(this, ir->type);
1112 break;
1113
1114 case ir_var_uniform:
1115 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
1116
1117 /* Thanks to the lower_ubo_reference pass, we will see only
1118 * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
1119 * variables, so no need for them to be in variable_ht.
1120 */
1121 if (ir->is_in_uniform_block())
1122 return;
1123
1124 /* Track how big the whole uniform variable is, in case we need to put a
1125 * copy of its data into pull constants for array access.
1126 */
1127 this->uniform_size[this->uniforms] = type_size(ir->type);
1128
1129 if (!strncmp(ir->name, "gl_", 3)) {
1130 setup_builtin_uniform_values(ir);
1131 } else {
1132 setup_uniform_values(ir);
1133 }
1134 break;
1135
1136 case ir_var_system_value:
1137 reg = make_reg_for_system_value(ir);
1138 break;
1139
1140 default:
1141 assert(!"not reached");
1142 }
1143
1144 reg->type = brw_type_for_base_type(ir->type);
1145 hash_table_insert(this->variable_ht, reg, ir);
1146 }
1147
1148 void
1149 vec4_visitor::visit(ir_loop *ir)
1150 {
1151 dst_reg counter;
1152
1153 /* We don't want debugging output to print the whole body of the
1154 * loop as the annotation.
1155 */
1156 this->base_ir = NULL;
1157
1158 if (ir->counter != NULL) {
1159 this->base_ir = ir->counter;
1160 ir->counter->accept(this);
1161 counter = *(variable_storage(ir->counter));
1162
1163 if (ir->from != NULL) {
1164 this->base_ir = ir->from;
1165 ir->from->accept(this);
1166
1167 emit(MOV(counter, this->result));
1168 }
1169 }
1170
1171 emit(BRW_OPCODE_DO);
1172
1173 if (ir->to) {
1174 this->base_ir = ir->to;
1175 ir->to->accept(this);
1176
1177 emit(CMP(dst_null_d(), src_reg(counter), this->result,
1178 brw_conditional_for_comparison(ir->cmp)));
1179
1180 vec4_instruction *inst = emit(BRW_OPCODE_BREAK);
1181 inst->predicate = BRW_PREDICATE_NORMAL;
1182 }
1183
1184 visit_instructions(&ir->body_instructions);
1185
1186
1187 if (ir->increment) {
1188 this->base_ir = ir->increment;
1189 ir->increment->accept(this);
1190 emit(ADD(counter, src_reg(counter), this->result));
1191 }
1192
1193 emit(BRW_OPCODE_WHILE);
1194 }
1195
1196 void
1197 vec4_visitor::visit(ir_loop_jump *ir)
1198 {
1199 switch (ir->mode) {
1200 case ir_loop_jump::jump_break:
1201 emit(BRW_OPCODE_BREAK);
1202 break;
1203 case ir_loop_jump::jump_continue:
1204 emit(BRW_OPCODE_CONTINUE);
1205 break;
1206 }
1207 }
1208
1209
1210 void
1211 vec4_visitor::visit(ir_function_signature *ir)
1212 {
1213 assert(0);
1214 (void)ir;
1215 }
1216
1217 void
1218 vec4_visitor::visit(ir_function *ir)
1219 {
1220 /* Ignore function bodies other than main() -- we shouldn't see calls to
1221 * them since they should all be inlined.
1222 */
1223 if (strcmp(ir->name, "main") == 0) {
1224 const ir_function_signature *sig;
1225 exec_list empty;
1226
1227 sig = ir->matching_signature(&empty);
1228
1229 assert(sig);
1230
1231 visit_instructions(&sig->body);
1232 }
1233 }
1234
1235 bool
1236 vec4_visitor::try_emit_sat(ir_expression *ir)
1237 {
1238 ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
1239 if (!sat_src)
1240 return false;
1241
1242 sat_src->accept(this);
1243 src_reg src = this->result;
1244
1245 this->result = src_reg(this, ir->type);
1246 vec4_instruction *inst;
1247 inst = emit(MOV(dst_reg(this->result), src));
1248 inst->saturate = true;
1249
1250 return true;
1251 }
1252
1253 bool
1254 vec4_visitor::try_emit_mad(ir_expression *ir, int mul_arg)
1255 {
1256 /* 3-src instructions were introduced in gen6. */
1257 if (intel->gen < 6)
1258 return false;
1259
1260 /* MAD can only handle floating-point data. */
1261 if (ir->type->base_type != GLSL_TYPE_FLOAT)
1262 return false;
1263
1264 ir_rvalue *nonmul = ir->operands[1 - mul_arg];
1265 ir_expression *mul = ir->operands[mul_arg]->as_expression();
1266
1267 if (!mul || mul->operation != ir_binop_mul)
1268 return false;
1269
1270 nonmul->accept(this);
1271 src_reg src0 = fix_3src_operand(this->result);
1272
1273 mul->operands[0]->accept(this);
1274 src_reg src1 = fix_3src_operand(this->result);
1275
1276 mul->operands[1]->accept(this);
1277 src_reg src2 = fix_3src_operand(this->result);
1278
1279 this->result = src_reg(this, ir->type);
1280 emit(BRW_OPCODE_MAD, dst_reg(this->result), src0, src1, src2);
1281
1282 return true;
1283 }
1284
1285 void
1286 vec4_visitor::emit_bool_comparison(unsigned int op,
1287 dst_reg dst, src_reg src0, src_reg src1)
1288 {
1289 /* original gen4 does destination conversion before comparison. */
1290 if (intel->gen < 5)
1291 dst.type = src0.type;
1292
1293 emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op)));
1294
1295 dst.type = BRW_REGISTER_TYPE_D;
1296 emit(AND(dst, src_reg(dst), src_reg(0x1)));
1297 }
1298
1299 void
1300 vec4_visitor::emit_minmax(uint32_t conditionalmod, dst_reg dst,
1301 src_reg src0, src_reg src1)
1302 {
1303 vec4_instruction *inst;
1304
1305 if (intel->gen >= 6) {
1306 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
1307 inst->conditional_mod = conditionalmod;
1308 } else {
1309 emit(CMP(dst, src0, src1, conditionalmod));
1310
1311 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
1312 inst->predicate = BRW_PREDICATE_NORMAL;
1313 }
1314 }
1315
1316 static bool
1317 is_16bit_constant(ir_rvalue *rvalue)
1318 {
1319 ir_constant *constant = rvalue->as_constant();
1320 if (!constant)
1321 return false;
1322
1323 if (constant->type != glsl_type::int_type &&
1324 constant->type != glsl_type::uint_type)
1325 return false;
1326
1327 return constant->value.u[0] < (1 << 16);
1328 }
1329
1330 void
1331 vec4_visitor::visit(ir_expression *ir)
1332 {
1333 unsigned int operand;
1334 src_reg op[Elements(ir->operands)];
1335 src_reg result_src;
1336 dst_reg result_dst;
1337 vec4_instruction *inst;
1338
1339 if (try_emit_sat(ir))
1340 return;
1341
1342 if (ir->operation == ir_binop_add) {
1343 if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1))
1344 return;
1345 }
1346
1347 for (operand = 0; operand < ir->get_num_operands(); operand++) {
1348 this->result.file = BAD_FILE;
1349 ir->operands[operand]->accept(this);
1350 if (this->result.file == BAD_FILE) {
1351 printf("Failed to get tree for expression operand:\n");
1352 ir->operands[operand]->print();
1353 exit(1);
1354 }
1355 op[operand] = this->result;
1356
1357 /* Matrix expression operands should have been broken down to vector
1358 * operations already.
1359 */
1360 assert(!ir->operands[operand]->type->is_matrix());
1361 }
1362
1363 int vector_elements = ir->operands[0]->type->vector_elements;
1364 if (ir->operands[1]) {
1365 vector_elements = MAX2(vector_elements,
1366 ir->operands[1]->type->vector_elements);
1367 }
1368
1369 this->result.file = BAD_FILE;
1370
1371 /* Storage for our result. Ideally for an assignment we'd be using
1372 * the actual storage for the result here, instead.
1373 */
1374 result_src = src_reg(this, ir->type);
1375 /* convenience for the emit functions below. */
1376 result_dst = dst_reg(result_src);
1377 /* If nothing special happens, this is the result. */
1378 this->result = result_src;
1379 /* Limit writes to the channels that will be used by result_src later.
1380 * This does limit this temp's use as a temporary for multi-instruction
1381 * sequences.
1382 */
1383 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1384
1385 switch (ir->operation) {
1386 case ir_unop_logic_not:
1387 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
1388 * ones complement of the whole register, not just bit 0.
1389 */
1390 emit(XOR(result_dst, op[0], src_reg(1)));
1391 break;
1392 case ir_unop_neg:
1393 op[0].negate = !op[0].negate;
1394 this->result = op[0];
1395 break;
1396 case ir_unop_abs:
1397 op[0].abs = true;
1398 op[0].negate = false;
1399 this->result = op[0];
1400 break;
1401
1402 case ir_unop_sign:
1403 emit(MOV(result_dst, src_reg(0.0f)));
1404
1405 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G));
1406 inst = emit(MOV(result_dst, src_reg(1.0f)));
1407 inst->predicate = BRW_PREDICATE_NORMAL;
1408
1409 emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L));
1410 inst = emit(MOV(result_dst, src_reg(-1.0f)));
1411 inst->predicate = BRW_PREDICATE_NORMAL;
1412
1413 break;
1414
1415 case ir_unop_rcp:
1416 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
1417 break;
1418
1419 case ir_unop_exp2:
1420 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
1421 break;
1422 case ir_unop_log2:
1423 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
1424 break;
1425 case ir_unop_exp:
1426 case ir_unop_log:
1427 assert(!"not reached: should be handled by ir_explog_to_explog2");
1428 break;
1429 case ir_unop_sin:
1430 case ir_unop_sin_reduced:
1431 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
1432 break;
1433 case ir_unop_cos:
1434 case ir_unop_cos_reduced:
1435 emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
1436 break;
1437
1438 case ir_unop_dFdx:
1439 case ir_unop_dFdy:
1440 assert(!"derivatives not valid in vertex shader");
1441 break;
1442
1443 case ir_unop_bitfield_reverse:
1444 emit(BFREV(result_dst, op[0]));
1445 break;
1446 case ir_unop_bit_count:
1447 emit(CBIT(result_dst, op[0]));
1448 break;
1449 case ir_unop_find_msb: {
1450 src_reg temp = src_reg(this, glsl_type::uint_type);
1451
1452 inst = emit(FBH(dst_reg(temp), op[0]));
1453 inst->dst.writemask = WRITEMASK_XYZW;
1454
1455 /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
1456 * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
1457 * subtract the result from 31 to convert the MSB count into an LSB count.
1458 */
1459
1460 /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
1461 temp.swizzle = BRW_SWIZZLE_NOOP;
1462 emit(MOV(result_dst, temp));
1463
1464 src_reg src_tmp = src_reg(result_dst);
1465 emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ));
1466
1467 src_tmp.negate = true;
1468 inst = emit(ADD(result_dst, src_tmp, src_reg(31)));
1469 inst->predicate = BRW_PREDICATE_NORMAL;
1470 break;
1471 }
1472 case ir_unop_find_lsb:
1473 emit(FBL(result_dst, op[0]));
1474 break;
1475
1476 case ir_unop_noise:
1477 assert(!"not reached: should be handled by lower_noise");
1478 break;
1479
1480 case ir_binop_add:
1481 emit(ADD(result_dst, op[0], op[1]));
1482 break;
1483 case ir_binop_sub:
1484 assert(!"not reached: should be handled by ir_sub_to_add_neg");
1485 break;
1486
1487 case ir_binop_mul:
1488 if (ir->type->is_integer()) {
1489 /* For integer multiplication, the MUL uses the low 16 bits of one of
1490 * the operands (src0 through SNB, src1 on IVB and later). The MACH
1491 * accumulates in the contribution of the upper 16 bits of that
1492 * operand. If we can determine that one of the args is in the low
1493 * 16 bits, though, we can just emit a single MUL.
1494 */
1495 if (is_16bit_constant(ir->operands[0])) {
1496 if (intel->gen < 7)
1497 emit(MUL(result_dst, op[0], op[1]));
1498 else
1499 emit(MUL(result_dst, op[1], op[0]));
1500 } else if (is_16bit_constant(ir->operands[1])) {
1501 if (intel->gen < 7)
1502 emit(MUL(result_dst, op[1], op[0]));
1503 else
1504 emit(MUL(result_dst, op[0], op[1]));
1505 } else {
1506 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
1507
1508 emit(MUL(acc, op[0], op[1]));
1509 emit(MACH(dst_null_d(), op[0], op[1]));
1510 emit(MOV(result_dst, src_reg(acc)));
1511 }
1512 } else {
1513 emit(MUL(result_dst, op[0], op[1]));
1514 }
1515 break;
1516 case ir_binop_div:
1517 /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
1518 assert(ir->type->is_integer());
1519 emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
1520 break;
1521 case ir_binop_mod:
1522 /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
1523 assert(ir->type->is_integer());
1524 emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
1525 break;
1526
1527 case ir_binop_less:
1528 case ir_binop_greater:
1529 case ir_binop_lequal:
1530 case ir_binop_gequal:
1531 case ir_binop_equal:
1532 case ir_binop_nequal: {
1533 emit(CMP(result_dst, op[0], op[1],
1534 brw_conditional_for_comparison(ir->operation)));
1535 emit(AND(result_dst, result_src, src_reg(0x1)));
1536 break;
1537 }
1538
1539 case ir_binop_all_equal:
1540 /* "==" operator producing a scalar boolean. */
1541 if (ir->operands[0]->type->is_vector() ||
1542 ir->operands[1]->type->is_vector()) {
1543 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
1544 emit(MOV(result_dst, src_reg(0)));
1545 inst = emit(MOV(result_dst, src_reg(1)));
1546 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
1547 } else {
1548 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
1549 emit(AND(result_dst, result_src, src_reg(0x1)));
1550 }
1551 break;
1552 case ir_binop_any_nequal:
1553 /* "!=" operator producing a scalar boolean. */
1554 if (ir->operands[0]->type->is_vector() ||
1555 ir->operands[1]->type->is_vector()) {
1556 emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
1557
1558 emit(MOV(result_dst, src_reg(0)));
1559 inst = emit(MOV(result_dst, src_reg(1)));
1560 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1561 } else {
1562 emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
1563 emit(AND(result_dst, result_src, src_reg(0x1)));
1564 }
1565 break;
1566
1567 case ir_unop_any:
1568 emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1569 emit(MOV(result_dst, src_reg(0)));
1570
1571 inst = emit(MOV(result_dst, src_reg(1)));
1572 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1573 break;
1574
1575 case ir_binop_logic_xor:
1576 emit(XOR(result_dst, op[0], op[1]));
1577 break;
1578
1579 case ir_binop_logic_or:
1580 emit(OR(result_dst, op[0], op[1]));
1581 break;
1582
1583 case ir_binop_logic_and:
1584 emit(AND(result_dst, op[0], op[1]));
1585 break;
1586
1587 case ir_binop_dot:
1588 assert(ir->operands[0]->type->is_vector());
1589 assert(ir->operands[0]->type == ir->operands[1]->type);
1590 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1591 break;
1592
1593 case ir_unop_sqrt:
1594 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1595 break;
1596 case ir_unop_rsq:
1597 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1598 break;
1599
1600 case ir_unop_bitcast_i2f:
1601 case ir_unop_bitcast_u2f:
1602 this->result = op[0];
1603 this->result.type = BRW_REGISTER_TYPE_F;
1604 break;
1605
1606 case ir_unop_bitcast_f2i:
1607 this->result = op[0];
1608 this->result.type = BRW_REGISTER_TYPE_D;
1609 break;
1610
1611 case ir_unop_bitcast_f2u:
1612 this->result = op[0];
1613 this->result.type = BRW_REGISTER_TYPE_UD;
1614 break;
1615
1616 case ir_unop_i2f:
1617 case ir_unop_i2u:
1618 case ir_unop_u2i:
1619 case ir_unop_u2f:
1620 case ir_unop_b2f:
1621 case ir_unop_b2i:
1622 case ir_unop_f2i:
1623 case ir_unop_f2u:
1624 emit(MOV(result_dst, op[0]));
1625 break;
1626 case ir_unop_f2b:
1627 case ir_unop_i2b: {
1628 emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1629 emit(AND(result_dst, result_src, src_reg(1)));
1630 break;
1631 }
1632
1633 case ir_unop_trunc:
1634 emit(RNDZ(result_dst, op[0]));
1635 break;
1636 case ir_unop_ceil:
1637 op[0].negate = !op[0].negate;
1638 inst = emit(RNDD(result_dst, op[0]));
1639 this->result.negate = true;
1640 break;
1641 case ir_unop_floor:
1642 inst = emit(RNDD(result_dst, op[0]));
1643 break;
1644 case ir_unop_fract:
1645 inst = emit(FRC(result_dst, op[0]));
1646 break;
1647 case ir_unop_round_even:
1648 emit(RNDE(result_dst, op[0]));
1649 break;
1650
1651 case ir_binop_min:
1652 emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]);
1653 break;
1654 case ir_binop_max:
1655 emit_minmax(BRW_CONDITIONAL_G, result_dst, op[0], op[1]);
1656 break;
1657
1658 case ir_binop_pow:
1659 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1660 break;
1661
1662 case ir_unop_bit_not:
1663 inst = emit(NOT(result_dst, op[0]));
1664 break;
1665 case ir_binop_bit_and:
1666 inst = emit(AND(result_dst, op[0], op[1]));
1667 break;
1668 case ir_binop_bit_xor:
1669 inst = emit(XOR(result_dst, op[0], op[1]));
1670 break;
1671 case ir_binop_bit_or:
1672 inst = emit(OR(result_dst, op[0], op[1]));
1673 break;
1674
1675 case ir_binop_lshift:
1676 inst = emit(SHL(result_dst, op[0], op[1]));
1677 break;
1678
1679 case ir_binop_rshift:
1680 if (ir->type->base_type == GLSL_TYPE_INT)
1681 inst = emit(ASR(result_dst, op[0], op[1]));
1682 else
1683 inst = emit(SHR(result_dst, op[0], op[1]));
1684 break;
1685
1686 case ir_binop_bfm:
1687 emit(BFI1(result_dst, op[0], op[1]));
1688 break;
1689
1690 case ir_binop_ubo_load: {
1691 ir_constant *uniform_block = ir->operands[0]->as_constant();
1692 ir_constant *const_offset_ir = ir->operands[1]->as_constant();
1693 unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
1694 src_reg offset = op[1];
1695
1696 /* Now, load the vector from that offset. */
1697 assert(ir->type->is_vector() || ir->type->is_scalar());
1698
1699 src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
1700 packed_consts.type = result.type;
1701 src_reg surf_index =
1702 src_reg(SURF_INDEX_VS_UBO(uniform_block->value.u[0]));
1703 if (const_offset_ir) {
1704 offset = src_reg(const_offset / 16);
1705 } else {
1706 emit(SHR(dst_reg(offset), offset, src_reg(4)));
1707 }
1708
1709 vec4_instruction *pull =
1710 emit(new(mem_ctx) vec4_instruction(this,
1711 VS_OPCODE_PULL_CONSTANT_LOAD,
1712 dst_reg(packed_consts),
1713 surf_index,
1714 offset));
1715 pull->base_mrf = 14;
1716 pull->mlen = 1;
1717
1718 packed_consts.swizzle = swizzle_for_size(ir->type->vector_elements);
1719 packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4,
1720 const_offset % 16 / 4,
1721 const_offset % 16 / 4,
1722 const_offset % 16 / 4);
1723
1724 /* UBO bools are any nonzero int. We store bools as either 0 or 1. */
1725 if (ir->type->base_type == GLSL_TYPE_BOOL) {
1726 emit(CMP(result_dst, packed_consts, src_reg(0u),
1727 BRW_CONDITIONAL_NZ));
1728 emit(AND(result_dst, result, src_reg(0x1)));
1729 } else {
1730 emit(MOV(result_dst, packed_consts));
1731 }
1732 break;
1733 }
1734
1735 case ir_binop_vector_extract:
1736 assert(!"should have been lowered by vec_index_to_cond_assign");
1737 break;
1738
1739 case ir_triop_lrp:
1740 op[0] = fix_3src_operand(op[0]);
1741 op[1] = fix_3src_operand(op[1]);
1742 op[2] = fix_3src_operand(op[2]);
1743 /* Note that the instruction's argument order is reversed from GLSL
1744 * and the IR.
1745 */
1746 emit(LRP(result_dst, op[2], op[1], op[0]));
1747 break;
1748
1749 case ir_triop_bfi:
1750 op[0] = fix_3src_operand(op[0]);
1751 op[1] = fix_3src_operand(op[1]);
1752 op[2] = fix_3src_operand(op[2]);
1753 emit(BFI2(result_dst, op[0], op[1], op[2]));
1754 break;
1755
1756 case ir_triop_bitfield_extract:
1757 op[0] = fix_3src_operand(op[0]);
1758 op[1] = fix_3src_operand(op[1]);
1759 op[2] = fix_3src_operand(op[2]);
1760 /* Note that the instruction's argument order is reversed from GLSL
1761 * and the IR.
1762 */
1763 emit(BFE(result_dst, op[2], op[1], op[0]));
1764 break;
1765
1766 case ir_triop_vector_insert:
1767 assert(!"should have been lowered by lower_vector_insert");
1768 break;
1769
1770 case ir_quadop_bitfield_insert:
1771 assert(!"not reached: should be handled by "
1772 "bitfield_insert_to_bfm_bfi\n");
1773 break;
1774
1775 case ir_quadop_vector:
1776 assert(!"not reached: should be handled by lower_quadop_vector");
1777 break;
1778
1779 case ir_unop_pack_half_2x16:
1780 emit_pack_half_2x16(result_dst, op[0]);
1781 break;
1782 case ir_unop_unpack_half_2x16:
1783 emit_unpack_half_2x16(result_dst, op[0]);
1784 break;
1785 case ir_unop_pack_snorm_2x16:
1786 case ir_unop_pack_snorm_4x8:
1787 case ir_unop_pack_unorm_2x16:
1788 case ir_unop_pack_unorm_4x8:
1789 case ir_unop_unpack_snorm_2x16:
1790 case ir_unop_unpack_snorm_4x8:
1791 case ir_unop_unpack_unorm_2x16:
1792 case ir_unop_unpack_unorm_4x8:
1793 assert(!"not reached: should be handled by lower_packing_builtins");
1794 break;
1795 case ir_unop_unpack_half_2x16_split_x:
1796 case ir_unop_unpack_half_2x16_split_y:
1797 case ir_binop_pack_half_2x16_split:
1798 assert(!"not reached: should not occur in vertex shader");
1799 break;
1800 }
1801 }
1802
1803
1804 void
1805 vec4_visitor::visit(ir_swizzle *ir)
1806 {
1807 src_reg src;
1808 int i = 0;
1809 int swizzle[4];
1810
1811 /* Note that this is only swizzles in expressions, not those on the left
1812 * hand side of an assignment, which do write masking. See ir_assignment
1813 * for that.
1814 */
1815
1816 ir->val->accept(this);
1817 src = this->result;
1818 assert(src.file != BAD_FILE);
1819
1820 for (i = 0; i < ir->type->vector_elements; i++) {
1821 switch (i) {
1822 case 0:
1823 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1824 break;
1825 case 1:
1826 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1827 break;
1828 case 2:
1829 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1830 break;
1831 case 3:
1832 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1833 break;
1834 }
1835 }
1836 for (; i < 4; i++) {
1837 /* Replicate the last channel out. */
1838 swizzle[i] = swizzle[ir->type->vector_elements - 1];
1839 }
1840
1841 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1842
1843 this->result = src;
1844 }
1845
1846 void
1847 vec4_visitor::visit(ir_dereference_variable *ir)
1848 {
1849 const struct glsl_type *type = ir->type;
1850 dst_reg *reg = variable_storage(ir->var);
1851
1852 if (!reg) {
1853 fail("Failed to find variable storage for %s\n", ir->var->name);
1854 this->result = src_reg(brw_null_reg());
1855 return;
1856 }
1857
1858 this->result = src_reg(*reg);
1859
1860 /* System values get their swizzle from the dst_reg writemask */
1861 if (ir->var->mode == ir_var_system_value)
1862 return;
1863
1864 if (type->is_scalar() || type->is_vector() || type->is_matrix())
1865 this->result.swizzle = swizzle_for_size(type->vector_elements);
1866 }
1867
1868
1869 int
1870 vec4_visitor::compute_array_stride(ir_dereference_array *ir)
1871 {
1872 /* Under normal circumstances array elements are stored consecutively, so
1873 * the stride is equal to the size of the array element.
1874 */
1875 return type_size(ir->type);
1876 }
1877
1878
1879 void
1880 vec4_visitor::visit(ir_dereference_array *ir)
1881 {
1882 ir_constant *constant_index;
1883 src_reg src;
1884 int array_stride = compute_array_stride(ir);
1885
1886 constant_index = ir->array_index->constant_expression_value();
1887
1888 ir->array->accept(this);
1889 src = this->result;
1890
1891 if (constant_index) {
1892 src.reg_offset += constant_index->value.i[0] * array_stride;
1893 } else {
1894 /* Variable index array dereference. It eats the "vec4" of the
1895 * base of the array and an index that offsets the Mesa register
1896 * index.
1897 */
1898 ir->array_index->accept(this);
1899
1900 src_reg index_reg;
1901
1902 if (array_stride == 1) {
1903 index_reg = this->result;
1904 } else {
1905 index_reg = src_reg(this, glsl_type::int_type);
1906
1907 emit(MUL(dst_reg(index_reg), this->result, src_reg(array_stride)));
1908 }
1909
1910 if (src.reladdr) {
1911 src_reg temp = src_reg(this, glsl_type::int_type);
1912
1913 emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
1914
1915 index_reg = temp;
1916 }
1917
1918 src.reladdr = ralloc(mem_ctx, src_reg);
1919 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1920 }
1921
1922 /* If the type is smaller than a vec4, replicate the last channel out. */
1923 if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1924 src.swizzle = swizzle_for_size(ir->type->vector_elements);
1925 else
1926 src.swizzle = BRW_SWIZZLE_NOOP;
1927 src.type = brw_type_for_base_type(ir->type);
1928
1929 this->result = src;
1930 }
1931
1932 void
1933 vec4_visitor::visit(ir_dereference_record *ir)
1934 {
1935 unsigned int i;
1936 const glsl_type *struct_type = ir->record->type;
1937 int offset = 0;
1938
1939 ir->record->accept(this);
1940
1941 for (i = 0; i < struct_type->length; i++) {
1942 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1943 break;
1944 offset += type_size(struct_type->fields.structure[i].type);
1945 }
1946
1947 /* If the type is smaller than a vec4, replicate the last channel out. */
1948 if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
1949 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1950 else
1951 this->result.swizzle = BRW_SWIZZLE_NOOP;
1952 this->result.type = brw_type_for_base_type(ir->type);
1953
1954 this->result.reg_offset += offset;
1955 }
1956
1957 /**
1958 * We want to be careful in assignment setup to hit the actual storage
1959 * instead of potentially using a temporary like we might with the
1960 * ir_dereference handler.
1961 */
1962 static dst_reg
1963 get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1964 {
1965 /* The LHS must be a dereference. If the LHS is a variable indexed array
1966 * access of a vector, it must be separated into a series conditional moves
1967 * before reaching this point (see ir_vec_index_to_cond_assign).
1968 */
1969 assert(ir->as_dereference());
1970 ir_dereference_array *deref_array = ir->as_dereference_array();
1971 if (deref_array) {
1972 assert(!deref_array->array->type->is_vector());
1973 }
1974
1975 /* Use the rvalue deref handler for the most part. We'll ignore
1976 * swizzles in it and write swizzles using writemask, though.
1977 */
1978 ir->accept(v);
1979 return dst_reg(v->result);
1980 }
1981
1982 void
1983 vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1984 const struct glsl_type *type, uint32_t predicate)
1985 {
1986 if (type->base_type == GLSL_TYPE_STRUCT) {
1987 for (unsigned int i = 0; i < type->length; i++) {
1988 emit_block_move(dst, src, type->fields.structure[i].type, predicate);
1989 }
1990 return;
1991 }
1992
1993 if (type->is_array()) {
1994 for (unsigned int i = 0; i < type->length; i++) {
1995 emit_block_move(dst, src, type->fields.array, predicate);
1996 }
1997 return;
1998 }
1999
2000 if (type->is_matrix()) {
2001 const struct glsl_type *vec_type;
2002
2003 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
2004 type->vector_elements, 1);
2005
2006 for (int i = 0; i < type->matrix_columns; i++) {
2007 emit_block_move(dst, src, vec_type, predicate);
2008 }
2009 return;
2010 }
2011
2012 assert(type->is_scalar() || type->is_vector());
2013
2014 dst->type = brw_type_for_base_type(type);
2015 src->type = dst->type;
2016
2017 dst->writemask = (1 << type->vector_elements) - 1;
2018
2019 src->swizzle = swizzle_for_size(type->vector_elements);
2020
2021 vec4_instruction *inst = emit(MOV(*dst, *src));
2022 inst->predicate = predicate;
2023
2024 dst->reg_offset++;
2025 src->reg_offset++;
2026 }
2027
2028
2029 /* If the RHS processing resulted in an instruction generating a
2030 * temporary value, and it would be easy to rewrite the instruction to
2031 * generate its result right into the LHS instead, do so. This ends
2032 * up reliably removing instructions where it can be tricky to do so
2033 * later without real UD chain information.
2034 */
2035 bool
2036 vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
2037 dst_reg dst,
2038 src_reg src,
2039 vec4_instruction *pre_rhs_inst,
2040 vec4_instruction *last_rhs_inst)
2041 {
2042 /* This could be supported, but it would take more smarts. */
2043 if (ir->condition)
2044 return false;
2045
2046 if (pre_rhs_inst == last_rhs_inst)
2047 return false; /* No instructions generated to work with. */
2048
2049 /* Make sure the last instruction generated our source reg. */
2050 if (src.file != GRF ||
2051 src.file != last_rhs_inst->dst.file ||
2052 src.reg != last_rhs_inst->dst.reg ||
2053 src.reg_offset != last_rhs_inst->dst.reg_offset ||
2054 src.reladdr ||
2055 src.abs ||
2056 src.negate ||
2057 last_rhs_inst->predicate != BRW_PREDICATE_NONE)
2058 return false;
2059
2060 /* Check that that last instruction fully initialized the channels
2061 * we want to use, in the order we want to use them. We could
2062 * potentially reswizzle the operands of many instructions so that
2063 * we could handle out of order channels, but don't yet.
2064 */
2065
2066 for (unsigned i = 0; i < 4; i++) {
2067 if (dst.writemask & (1 << i)) {
2068 if (!(last_rhs_inst->dst.writemask & (1 << i)))
2069 return false;
2070
2071 if (BRW_GET_SWZ(src.swizzle, i) != i)
2072 return false;
2073 }
2074 }
2075
2076 /* Success! Rewrite the instruction. */
2077 last_rhs_inst->dst.file = dst.file;
2078 last_rhs_inst->dst.reg = dst.reg;
2079 last_rhs_inst->dst.reg_offset = dst.reg_offset;
2080 last_rhs_inst->dst.reladdr = dst.reladdr;
2081 last_rhs_inst->dst.writemask &= dst.writemask;
2082
2083 return true;
2084 }
2085
2086 void
2087 vec4_visitor::visit(ir_assignment *ir)
2088 {
2089 dst_reg dst = get_assignment_lhs(ir->lhs, this);
2090 uint32_t predicate = BRW_PREDICATE_NONE;
2091
2092 if (!ir->lhs->type->is_scalar() &&
2093 !ir->lhs->type->is_vector()) {
2094 ir->rhs->accept(this);
2095 src_reg src = this->result;
2096
2097 if (ir->condition) {
2098 emit_bool_to_cond_code(ir->condition, &predicate);
2099 }
2100
2101 /* emit_block_move doesn't account for swizzles in the source register.
2102 * This should be ok, since the source register is a structure or an
2103 * array, and those can't be swizzled. But double-check to be sure.
2104 */
2105 assert(src.swizzle ==
2106 (ir->rhs->type->is_matrix()
2107 ? swizzle_for_size(ir->rhs->type->vector_elements)
2108 : BRW_SWIZZLE_NOOP));
2109
2110 emit_block_move(&dst, &src, ir->rhs->type, predicate);
2111 return;
2112 }
2113
2114 /* Now we're down to just a scalar/vector with writemasks. */
2115 int i;
2116
2117 vec4_instruction *pre_rhs_inst, *last_rhs_inst;
2118 pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
2119
2120 ir->rhs->accept(this);
2121
2122 last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
2123
2124 src_reg src = this->result;
2125
2126 int swizzles[4];
2127 int first_enabled_chan = 0;
2128 int src_chan = 0;
2129
2130 assert(ir->lhs->type->is_vector() ||
2131 ir->lhs->type->is_scalar());
2132 dst.writemask = ir->write_mask;
2133
2134 for (int i = 0; i < 4; i++) {
2135 if (dst.writemask & (1 << i)) {
2136 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
2137 break;
2138 }
2139 }
2140
2141 /* Swizzle a small RHS vector into the channels being written.
2142 *
2143 * glsl ir treats write_mask as dictating how many channels are
2144 * present on the RHS while in our instructions we need to make
2145 * those channels appear in the slots of the vec4 they're written to.
2146 */
2147 for (int i = 0; i < 4; i++) {
2148 if (dst.writemask & (1 << i))
2149 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
2150 else
2151 swizzles[i] = first_enabled_chan;
2152 }
2153 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
2154 swizzles[2], swizzles[3]);
2155
2156 if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
2157 return;
2158 }
2159
2160 if (ir->condition) {
2161 emit_bool_to_cond_code(ir->condition, &predicate);
2162 }
2163
2164 for (i = 0; i < type_size(ir->lhs->type); i++) {
2165 vec4_instruction *inst = emit(MOV(dst, src));
2166 inst->predicate = predicate;
2167
2168 dst.reg_offset++;
2169 src.reg_offset++;
2170 }
2171 }
2172
2173 void
2174 vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
2175 {
2176 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
2177 foreach_list(node, &ir->components) {
2178 ir_constant *field_value = (ir_constant *)node;
2179
2180 emit_constant_values(dst, field_value);
2181 }
2182 return;
2183 }
2184
2185 if (ir->type->is_array()) {
2186 for (unsigned int i = 0; i < ir->type->length; i++) {
2187 emit_constant_values(dst, ir->array_elements[i]);
2188 }
2189 return;
2190 }
2191
2192 if (ir->type->is_matrix()) {
2193 for (int i = 0; i < ir->type->matrix_columns; i++) {
2194 float *vec = &ir->value.f[i * ir->type->vector_elements];
2195
2196 for (int j = 0; j < ir->type->vector_elements; j++) {
2197 dst->writemask = 1 << j;
2198 dst->type = BRW_REGISTER_TYPE_F;
2199
2200 emit(MOV(*dst, src_reg(vec[j])));
2201 }
2202 dst->reg_offset++;
2203 }
2204 return;
2205 }
2206
2207 int remaining_writemask = (1 << ir->type->vector_elements) - 1;
2208
2209 for (int i = 0; i < ir->type->vector_elements; i++) {
2210 if (!(remaining_writemask & (1 << i)))
2211 continue;
2212
2213 dst->writemask = 1 << i;
2214 dst->type = brw_type_for_base_type(ir->type);
2215
2216 /* Find other components that match the one we're about to
2217 * write. Emits fewer instructions for things like vec4(0.5,
2218 * 1.5, 1.5, 1.5).
2219 */
2220 for (int j = i + 1; j < ir->type->vector_elements; j++) {
2221 if (ir->type->base_type == GLSL_TYPE_BOOL) {
2222 if (ir->value.b[i] == ir->value.b[j])
2223 dst->writemask |= (1 << j);
2224 } else {
2225 /* u, i, and f storage all line up, so no need for a
2226 * switch case for comparing each type.
2227 */
2228 if (ir->value.u[i] == ir->value.u[j])
2229 dst->writemask |= (1 << j);
2230 }
2231 }
2232
2233 switch (ir->type->base_type) {
2234 case GLSL_TYPE_FLOAT:
2235 emit(MOV(*dst, src_reg(ir->value.f[i])));
2236 break;
2237 case GLSL_TYPE_INT:
2238 emit(MOV(*dst, src_reg(ir->value.i[i])));
2239 break;
2240 case GLSL_TYPE_UINT:
2241 emit(MOV(*dst, src_reg(ir->value.u[i])));
2242 break;
2243 case GLSL_TYPE_BOOL:
2244 emit(MOV(*dst, src_reg(ir->value.b[i])));
2245 break;
2246 default:
2247 assert(!"Non-float/uint/int/bool constant");
2248 break;
2249 }
2250
2251 remaining_writemask &= ~dst->writemask;
2252 }
2253 dst->reg_offset++;
2254 }
2255
2256 void
2257 vec4_visitor::visit(ir_constant *ir)
2258 {
2259 dst_reg dst = dst_reg(this, ir->type);
2260 this->result = src_reg(dst);
2261
2262 emit_constant_values(&dst, ir);
2263 }
2264
2265 void
2266 vec4_visitor::visit(ir_call *ir)
2267 {
2268 assert(!"not reached");
2269 }
2270
2271 void
2272 vec4_visitor::visit(ir_texture *ir)
2273 {
2274 int sampler =
2275 _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog);
2276
2277 /* Should be lowered by do_lower_texture_projection */
2278 assert(!ir->projector);
2279
2280 /* Generate code to compute all the subexpression trees. This has to be
2281 * done before loading any values into MRFs for the sampler message since
2282 * generating these values may involve SEND messages that need the MRFs.
2283 */
2284 src_reg coordinate;
2285 if (ir->coordinate) {
2286 ir->coordinate->accept(this);
2287 coordinate = this->result;
2288 }
2289
2290 src_reg shadow_comparitor;
2291 if (ir->shadow_comparitor) {
2292 ir->shadow_comparitor->accept(this);
2293 shadow_comparitor = this->result;
2294 }
2295
2296 const glsl_type *lod_type = NULL, *sample_index_type = NULL;
2297 src_reg lod, dPdx, dPdy, sample_index;
2298 switch (ir->op) {
2299 case ir_tex:
2300 lod = src_reg(0.0f);
2301 lod_type = glsl_type::float_type;
2302 break;
2303 case ir_txf:
2304 case ir_txl:
2305 case ir_txs:
2306 ir->lod_info.lod->accept(this);
2307 lod = this->result;
2308 lod_type = ir->lod_info.lod->type;
2309 break;
2310 case ir_txf_ms:
2311 ir->lod_info.sample_index->accept(this);
2312 sample_index = this->result;
2313 sample_index_type = ir->lod_info.sample_index->type;
2314 break;
2315 case ir_txd:
2316 ir->lod_info.grad.dPdx->accept(this);
2317 dPdx = this->result;
2318
2319 ir->lod_info.grad.dPdy->accept(this);
2320 dPdy = this->result;
2321
2322 lod_type = ir->lod_info.grad.dPdx->type;
2323 break;
2324 case ir_txb:
2325 case ir_lod:
2326 break;
2327 }
2328
2329 vec4_instruction *inst = NULL;
2330 switch (ir->op) {
2331 case ir_tex:
2332 case ir_txl:
2333 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXL);
2334 break;
2335 case ir_txd:
2336 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXD);
2337 break;
2338 case ir_txf:
2339 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF);
2340 break;
2341 case ir_txf_ms:
2342 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF_MS);
2343 break;
2344 case ir_txs:
2345 inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXS);
2346 break;
2347 case ir_txb:
2348 assert(!"TXB is not valid for vertex shaders.");
2349 break;
2350 case ir_lod:
2351 assert(!"LOD is not valid for vertex shaders.");
2352 break;
2353 }
2354
2355 bool use_texture_offset = ir->offset != NULL && ir->op != ir_txf;
2356
2357 /* Texel offsets go in the message header; Gen4 also requires headers. */
2358 inst->header_present = use_texture_offset || intel->gen < 5;
2359 inst->base_mrf = 2;
2360 inst->mlen = inst->header_present + 1; /* always at least one */
2361 inst->sampler = sampler;
2362 inst->dst = dst_reg(this, ir->type);
2363 inst->dst.writemask = WRITEMASK_XYZW;
2364 inst->shadow_compare = ir->shadow_comparitor != NULL;
2365
2366 if (use_texture_offset)
2367 inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
2368
2369 /* MRF for the first parameter */
2370 int param_base = inst->base_mrf + inst->header_present;
2371
2372 if (ir->op == ir_txs) {
2373 int writemask = intel->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
2374 emit(MOV(dst_reg(MRF, param_base, lod_type, writemask), lod));
2375 } else {
2376 int i, coord_mask = 0, zero_mask = 0;
2377 /* Load the coordinate */
2378 /* FINISHME: gl_clamp_mask and saturate */
2379 for (i = 0; i < ir->coordinate->type->vector_elements; i++)
2380 coord_mask |= (1 << i);
2381 for (; i < 4; i++)
2382 zero_mask |= (1 << i);
2383
2384 if (ir->offset && ir->op == ir_txf) {
2385 /* It appears that the ld instruction used for txf does its
2386 * address bounds check before adding in the offset. To work
2387 * around this, just add the integer offset to the integer
2388 * texel coordinate, and don't put the offset in the header.
2389 */
2390 ir_constant *offset = ir->offset->as_constant();
2391 assert(offset);
2392
2393 for (int j = 0; j < ir->coordinate->type->vector_elements; j++) {
2394 src_reg src = coordinate;
2395 src.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(src.swizzle, j),
2396 BRW_GET_SWZ(src.swizzle, j),
2397 BRW_GET_SWZ(src.swizzle, j),
2398 BRW_GET_SWZ(src.swizzle, j));
2399 emit(ADD(dst_reg(MRF, param_base, ir->coordinate->type, 1 << j),
2400 src, offset->value.i[j]));
2401 }
2402 } else {
2403 emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
2404 coordinate));
2405 }
2406 emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
2407 src_reg(0)));
2408 /* Load the shadow comparitor */
2409 if (ir->shadow_comparitor && ir->op != ir_txd) {
2410 emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
2411 WRITEMASK_X),
2412 shadow_comparitor));
2413 inst->mlen++;
2414 }
2415
2416 /* Load the LOD info */
2417 if (ir->op == ir_tex || ir->op == ir_txl) {
2418 int mrf, writemask;
2419 if (intel->gen >= 5) {
2420 mrf = param_base + 1;
2421 if (ir->shadow_comparitor) {
2422 writemask = WRITEMASK_Y;
2423 /* mlen already incremented */
2424 } else {
2425 writemask = WRITEMASK_X;
2426 inst->mlen++;
2427 }
2428 } else /* intel->gen == 4 */ {
2429 mrf = param_base;
2430 writemask = WRITEMASK_Z;
2431 }
2432 emit(MOV(dst_reg(MRF, mrf, lod_type, writemask), lod));
2433 } else if (ir->op == ir_txf) {
2434 emit(MOV(dst_reg(MRF, param_base, lod_type, WRITEMASK_W), lod));
2435 } else if (ir->op == ir_txf_ms) {
2436 emit(MOV(dst_reg(MRF, param_base + 1, sample_index_type, WRITEMASK_X),
2437 sample_index));
2438 inst->mlen++;
2439
2440 /* on Gen7, there is an additional MCS parameter here after SI,
2441 * but we don't bother to emit it since it's always zero. If
2442 * we start supporting texturing from CMS surfaces, this will have
2443 * to change
2444 */
2445 } else if (ir->op == ir_txd) {
2446 const glsl_type *type = lod_type;
2447
2448 if (intel->gen >= 5) {
2449 dPdx.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
2450 dPdy.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
2451 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), dPdx));
2452 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), dPdy));
2453 inst->mlen++;
2454
2455 if (ir->type->vector_elements == 3 || ir->shadow_comparitor) {
2456 dPdx.swizzle = BRW_SWIZZLE_ZZZZ;
2457 dPdy.swizzle = BRW_SWIZZLE_ZZZZ;
2458 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), dPdx));
2459 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), dPdy));
2460 inst->mlen++;
2461
2462 if (ir->shadow_comparitor) {
2463 emit(MOV(dst_reg(MRF, param_base + 2,
2464 ir->shadow_comparitor->type, WRITEMASK_Z),
2465 shadow_comparitor));
2466 }
2467 }
2468 } else /* intel->gen == 4 */ {
2469 emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), dPdx));
2470 emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), dPdy));
2471 inst->mlen += 2;
2472 }
2473 }
2474 }
2475
2476 emit(inst);
2477
2478 /* fixup num layers (z) for cube arrays: hardware returns faces * layers;
2479 * spec requires layers.
2480 */
2481 if (ir->op == ir_txs) {
2482 glsl_type const *type = ir->sampler->type;
2483 if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
2484 type->sampler_array) {
2485 emit_math(SHADER_OPCODE_INT_QUOTIENT,
2486 with_writemask(inst->dst, WRITEMASK_Z),
2487 src_reg(inst->dst), src_reg(6));
2488 }
2489 }
2490
2491 swizzle_result(ir, src_reg(inst->dst), sampler);
2492 }
2493
2494 void
2495 vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, int sampler)
2496 {
2497 int s = key->tex.swizzles[sampler];
2498
2499 this->result = src_reg(this, ir->type);
2500 dst_reg swizzled_result(this->result);
2501
2502 if (ir->op == ir_txs || ir->type == glsl_type::float_type
2503 || s == SWIZZLE_NOOP) {
2504 emit(MOV(swizzled_result, orig_val));
2505 return;
2506 }
2507
2508 int zero_mask = 0, one_mask = 0, copy_mask = 0;
2509 int swizzle[4];
2510
2511 for (int i = 0; i < 4; i++) {
2512 switch (GET_SWZ(s, i)) {
2513 case SWIZZLE_ZERO:
2514 zero_mask |= (1 << i);
2515 break;
2516 case SWIZZLE_ONE:
2517 one_mask |= (1 << i);
2518 break;
2519 default:
2520 copy_mask |= (1 << i);
2521 swizzle[i] = GET_SWZ(s, i);
2522 break;
2523 }
2524 }
2525
2526 if (copy_mask) {
2527 orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
2528 swizzled_result.writemask = copy_mask;
2529 emit(MOV(swizzled_result, orig_val));
2530 }
2531
2532 if (zero_mask) {
2533 swizzled_result.writemask = zero_mask;
2534 emit(MOV(swizzled_result, src_reg(0.0f)));
2535 }
2536
2537 if (one_mask) {
2538 swizzled_result.writemask = one_mask;
2539 emit(MOV(swizzled_result, src_reg(1.0f)));
2540 }
2541 }
2542
2543 void
2544 vec4_visitor::visit(ir_return *ir)
2545 {
2546 assert(!"not reached");
2547 }
2548
2549 void
2550 vec4_visitor::visit(ir_discard *ir)
2551 {
2552 assert(!"not reached");
2553 }
2554
2555 void
2556 vec4_visitor::visit(ir_if *ir)
2557 {
2558 /* Don't point the annotation at the if statement, because then it plus
2559 * the then and else blocks get printed.
2560 */
2561 this->base_ir = ir->condition;
2562
2563 if (intel->gen == 6) {
2564 emit_if_gen6(ir);
2565 } else {
2566 uint32_t predicate;
2567 emit_bool_to_cond_code(ir->condition, &predicate);
2568 emit(IF(predicate));
2569 }
2570
2571 visit_instructions(&ir->then_instructions);
2572
2573 if (!ir->else_instructions.is_empty()) {
2574 this->base_ir = ir->condition;
2575 emit(BRW_OPCODE_ELSE);
2576
2577 visit_instructions(&ir->else_instructions);
2578 }
2579
2580 this->base_ir = ir->condition;
2581 emit(BRW_OPCODE_ENDIF);
2582 }
2583
2584 void
2585 vec4_visitor::emit_ndc_computation()
2586 {
2587 /* Get the position */
2588 src_reg pos = src_reg(output_reg[VARYING_SLOT_POS]);
2589
2590 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
2591 dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
2592 output_reg[BRW_VARYING_SLOT_NDC] = ndc;
2593
2594 current_annotation = "NDC";
2595 dst_reg ndc_w = ndc;
2596 ndc_w.writemask = WRITEMASK_W;
2597 src_reg pos_w = pos;
2598 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
2599 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
2600
2601 dst_reg ndc_xyz = ndc;
2602 ndc_xyz.writemask = WRITEMASK_XYZ;
2603
2604 emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
2605 }
2606
2607 void
2608 vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
2609 {
2610 if (intel->gen < 6 &&
2611 ((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) ||
2612 key->userclip_active || brw->has_negative_rhw_bug)) {
2613 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
2614 dst_reg header1_w = header1;
2615 header1_w.writemask = WRITEMASK_W;
2616 GLuint i;
2617
2618 emit(MOV(header1, 0u));
2619
2620 if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
2621 src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]);
2622
2623 current_annotation = "Point size";
2624 emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
2625 emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
2626 }
2627
2628 current_annotation = "Clipping flags";
2629 for (i = 0; i < key->nr_userclip_plane_consts; i++) {
2630 vec4_instruction *inst;
2631 gl_varying_slot slot = (prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)
2632 ? VARYING_SLOT_CLIP_VERTEX : VARYING_SLOT_POS;
2633
2634 inst = emit(DP4(dst_null_f(), src_reg(output_reg[slot]),
2635 src_reg(this->userplane[i])));
2636 inst->conditional_mod = BRW_CONDITIONAL_L;
2637
2638 inst = emit(OR(header1_w, src_reg(header1_w), 1u << i));
2639 inst->predicate = BRW_PREDICATE_NORMAL;
2640 }
2641
2642 /* i965 clipping workaround:
2643 * 1) Test for -ve rhw
2644 * 2) If set,
2645 * set ndc = (0,0,0,0)
2646 * set ucp[6] = 1
2647 *
2648 * Later, clipping will detect ucp[6] and ensure the primitive is
2649 * clipped against all fixed planes.
2650 */
2651 if (brw->has_negative_rhw_bug) {
2652 src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]);
2653 ndc_w.swizzle = BRW_SWIZZLE_WWWW;
2654 emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L));
2655 vec4_instruction *inst;
2656 inst = emit(OR(header1_w, src_reg(header1_w), src_reg(1u << 6)));
2657 inst->predicate = BRW_PREDICATE_NORMAL;
2658 inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f)));
2659 inst->predicate = BRW_PREDICATE_NORMAL;
2660 }
2661
2662 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
2663 } else if (intel->gen < 6) {
2664 emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
2665 } else {
2666 emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
2667 if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
2668 emit(MOV(brw_writemask(reg, WRITEMASK_W),
2669 src_reg(output_reg[VARYING_SLOT_PSIZ])));
2670 }
2671 if (prog_data->vue_map.slots_valid & VARYING_BIT_LAYER) {
2672 emit(MOV(retype(brw_writemask(reg, WRITEMASK_Y), BRW_REGISTER_TYPE_D),
2673 src_reg(output_reg[VARYING_SLOT_LAYER])));
2674 }
2675 }
2676 }
2677
2678 void
2679 vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset)
2680 {
2681 if (intel->gen < 6) {
2682 /* Clip distance slots are set aside in gen5, but they are not used. It
2683 * is not clear whether we actually need to set aside space for them,
2684 * but the performance cost is negligible.
2685 */
2686 return;
2687 }
2688
2689 /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
2690 *
2691 * "If a linked set of shaders forming the vertex stage contains no
2692 * static write to gl_ClipVertex or gl_ClipDistance, but the
2693 * application has requested clipping against user clip planes through
2694 * the API, then the coordinate written to gl_Position is used for
2695 * comparison against the user clip planes."
2696 *
2697 * This function is only called if the shader didn't write to
2698 * gl_ClipDistance. Accordingly, we use gl_ClipVertex to perform clipping
2699 * if the user wrote to it; otherwise we use gl_Position.
2700 */
2701 gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX;
2702 if (!(prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) {
2703 clip_vertex = VARYING_SLOT_POS;
2704 }
2705
2706 for (int i = 0; i + offset < key->nr_userclip_plane_consts && i < 4;
2707 ++i) {
2708 emit(DP4(dst_reg(brw_writemask(reg, 1 << i)),
2709 src_reg(output_reg[clip_vertex]),
2710 src_reg(this->userplane[i + offset])));
2711 }
2712 }
2713
2714 void
2715 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
2716 {
2717 assert (varying < VARYING_SLOT_MAX);
2718 reg.type = output_reg[varying].type;
2719 current_annotation = output_reg_annotation[varying];
2720 /* Copy the register, saturating if necessary */
2721 vec4_instruction *inst = emit(MOV(reg,
2722 src_reg(output_reg[varying])));
2723 if ((varying == VARYING_SLOT_COL0 ||
2724 varying == VARYING_SLOT_COL1 ||
2725 varying == VARYING_SLOT_BFC0 ||
2726 varying == VARYING_SLOT_BFC1) &&
2727 key->clamp_vertex_color) {
2728 inst->saturate = true;
2729 }
2730 }
2731
2732 void
2733 vec4_visitor::emit_urb_slot(int mrf, int varying)
2734 {
2735 struct brw_reg hw_reg = brw_message_reg(mrf);
2736 dst_reg reg = dst_reg(MRF, mrf);
2737 reg.type = BRW_REGISTER_TYPE_F;
2738
2739 switch (varying) {
2740 case VARYING_SLOT_PSIZ:
2741 /* PSIZ is always in slot 0, and is coupled with other flags. */
2742 current_annotation = "indices, point width, clip flags";
2743 emit_psiz_and_flags(hw_reg);
2744 break;
2745 case BRW_VARYING_SLOT_NDC:
2746 current_annotation = "NDC";
2747 emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
2748 break;
2749 case VARYING_SLOT_POS:
2750 current_annotation = "gl_Position";
2751 emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
2752 break;
2753 case VARYING_SLOT_CLIP_DIST0:
2754 case VARYING_SLOT_CLIP_DIST1:
2755 if (this->key->uses_clip_distance) {
2756 emit_generic_urb_slot(reg, varying);
2757 } else {
2758 current_annotation = "user clip distances";
2759 emit_clip_distances(hw_reg, (varying - VARYING_SLOT_CLIP_DIST0) * 4);
2760 }
2761 break;
2762 case VARYING_SLOT_EDGE:
2763 /* This is present when doing unfilled polygons. We're supposed to copy
2764 * the edge flag from the user-provided vertex array
2765 * (glEdgeFlagPointer), or otherwise we'll copy from the current value
2766 * of that attribute (starts as 1.0f). This is then used in clipping to
2767 * determine which edges should be drawn as wireframe.
2768 */
2769 current_annotation = "edge flag";
2770 emit(MOV(reg, src_reg(dst_reg(ATTR, VERT_ATTRIB_EDGEFLAG,
2771 glsl_type::float_type, WRITEMASK_XYZW))));
2772 break;
2773 case BRW_VARYING_SLOT_PAD:
2774 /* No need to write to this slot */
2775 break;
2776 default:
2777 emit_generic_urb_slot(reg, varying);
2778 break;
2779 }
2780 }
2781
2782 static int
2783 align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
2784 {
2785 struct intel_context *intel = &brw->intel;
2786
2787 if (intel->gen >= 6) {
2788 /* URB data written (does not include the message header reg) must
2789 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
2790 * section 5.4.3.2.2: URB_INTERLEAVED.
2791 *
2792 * URB entries are allocated on a multiple of 1024 bits, so an
2793 * extra 128 bits written here to make the end align to 256 is
2794 * no problem.
2795 */
2796 if ((mlen % 2) != 1)
2797 mlen++;
2798 }
2799
2800 return mlen;
2801 }
2802
2803 void
2804 vec4_vs_visitor::emit_urb_write_header(int mrf)
2805 {
2806 /* No need to do anything for VS; an implied write to this MRF will be
2807 * performed by VS_OPCODE_URB_WRITE.
2808 */
2809 (void) mrf;
2810 }
2811
2812 vec4_instruction *
2813 vec4_vs_visitor::emit_urb_write_opcode(bool complete)
2814 {
2815 /* For VS, the URB writes end the thread. */
2816 if (complete) {
2817 if (INTEL_DEBUG & DEBUG_SHADER_TIME)
2818 emit_shader_time_end();
2819 }
2820
2821 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
2822 inst->eot = complete;
2823
2824 return inst;
2825 }
2826
2827 /**
2828 * Generates the VUE payload plus the necessary URB write instructions to
2829 * output it.
2830 *
2831 * The VUE layout is documented in Volume 2a.
2832 */
2833 void
2834 vec4_visitor::emit_vertex()
2835 {
2836 /* MRF 0 is reserved for the debugger, so start with message header
2837 * in MRF 1.
2838 */
2839 int base_mrf = 1;
2840 int mrf = base_mrf;
2841 /* In the process of generating our URB write message contents, we
2842 * may need to unspill a register or load from an array. Those
2843 * reads would use MRFs 14-15.
2844 */
2845 int max_usable_mrf = 13;
2846
2847 /* The following assertion verifies that max_usable_mrf causes an
2848 * even-numbered amount of URB write data, which will meet gen6's
2849 * requirements for length alignment.
2850 */
2851 assert ((max_usable_mrf - base_mrf) % 2 == 0);
2852
2853 /* First mrf is the g0-based message header containing URB handles and
2854 * such.
2855 */
2856 emit_urb_write_header(mrf++);
2857
2858 if (intel->gen < 6) {
2859 emit_ndc_computation();
2860 }
2861
2862 /* Set up the VUE data for the first URB write */
2863 int slot;
2864 for (slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
2865 emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]);
2866
2867 /* If this was max_usable_mrf, we can't fit anything more into this URB
2868 * WRITE.
2869 */
2870 if (mrf > max_usable_mrf) {
2871 slot++;
2872 break;
2873 }
2874 }
2875
2876 bool complete = slot >= prog_data->vue_map.num_slots;
2877 current_annotation = "URB write";
2878 vec4_instruction *inst = emit_urb_write_opcode(complete);
2879 inst->base_mrf = base_mrf;
2880 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2881
2882 /* Optional second URB write */
2883 if (!complete) {
2884 mrf = base_mrf + 1;
2885
2886 for (; slot < prog_data->vue_map.num_slots; ++slot) {
2887 assert(mrf < max_usable_mrf);
2888
2889 emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]);
2890 }
2891
2892 current_annotation = "URB write";
2893 inst = emit_urb_write_opcode(true /* complete */);
2894 inst->base_mrf = base_mrf;
2895 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
2896 /* URB destination offset. In the previous write, we got MRFs
2897 * 2-13 minus the one header MRF, so 12 regs. URB offset is in
2898 * URB row increments, and each of our MRFs is half of one of
2899 * those, since we're doing interleaved writes.
2900 */
2901 inst->offset = (max_usable_mrf - base_mrf) / 2;
2902 }
2903 }
2904
2905 void
2906 vec4_vs_visitor::emit_thread_end()
2907 {
2908 /* For VS, we always end the thread by emitting a single vertex.
2909 * emit_urb_write_opcode() will take care of setting the eot flag on the
2910 * SEND instruction.
2911 */
2912 emit_vertex();
2913 }
2914
2915 src_reg
2916 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
2917 src_reg *reladdr, int reg_offset)
2918 {
2919 /* Because we store the values to scratch interleaved like our
2920 * vertex data, we need to scale the vec4 index by 2.
2921 */
2922 int message_header_scale = 2;
2923
2924 /* Pre-gen6, the message header uses byte offsets instead of vec4
2925 * (16-byte) offset units.
2926 */
2927 if (intel->gen < 6)
2928 message_header_scale *= 16;
2929
2930 if (reladdr) {
2931 src_reg index = src_reg(this, glsl_type::int_type);
2932
2933 emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2934 emit_before(inst, MUL(dst_reg(index),
2935 index, src_reg(message_header_scale)));
2936
2937 return index;
2938 } else {
2939 return src_reg(reg_offset * message_header_scale);
2940 }
2941 }
2942
2943 src_reg
2944 vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
2945 src_reg *reladdr, int reg_offset)
2946 {
2947 if (reladdr) {
2948 src_reg index = src_reg(this, glsl_type::int_type);
2949
2950 emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
2951
2952 /* Pre-gen6, the message header uses byte offsets instead of vec4
2953 * (16-byte) offset units.
2954 */
2955 if (intel->gen < 6) {
2956 emit_before(inst, MUL(dst_reg(index), index, src_reg(16)));
2957 }
2958
2959 return index;
2960 } else {
2961 int message_header_scale = intel->gen < 6 ? 16 : 1;
2962 return src_reg(reg_offset * message_header_scale);
2963 }
2964 }
2965
2966 /**
2967 * Emits an instruction before @inst to load the value named by @orig_src
2968 * from scratch space at @base_offset to @temp.
2969 *
2970 * @base_offset is measured in 32-byte units (the size of a register).
2971 */
2972 void
2973 vec4_visitor::emit_scratch_read(vec4_instruction *inst,
2974 dst_reg temp, src_reg orig_src,
2975 int base_offset)
2976 {
2977 int reg_offset = base_offset + orig_src.reg_offset;
2978 src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
2979
2980 emit_before(inst, SCRATCH_READ(temp, index));
2981 }
2982
2983 /**
2984 * Emits an instruction after @inst to store the value to be written
2985 * to @orig_dst to scratch space at @base_offset, from @temp.
2986 *
2987 * @base_offset is measured in 32-byte units (the size of a register).
2988 */
2989 void
2990 vec4_visitor::emit_scratch_write(vec4_instruction *inst, int base_offset)
2991 {
2992 int reg_offset = base_offset + inst->dst.reg_offset;
2993 src_reg index = get_scratch_offset(inst, inst->dst.reladdr, reg_offset);
2994
2995 /* Create a temporary register to store *inst's result in.
2996 *
2997 * We have to be careful in MOVing from our temporary result register in
2998 * the scratch write. If we swizzle from channels of the temporary that
2999 * weren't initialized, it will confuse live interval analysis, which will
3000 * make spilling fail to make progress.
3001 */
3002 src_reg temp = src_reg(this, glsl_type::vec4_type);
3003 temp.type = inst->dst.type;
3004 int first_writemask_chan = ffs(inst->dst.writemask) - 1;
3005 int swizzles[4];
3006 for (int i = 0; i < 4; i++)
3007 if (inst->dst.writemask & (1 << i))
3008 swizzles[i] = i;
3009 else
3010 swizzles[i] = first_writemask_chan;
3011 temp.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
3012 swizzles[2], swizzles[3]);
3013
3014 dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
3015 inst->dst.writemask));
3016 vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
3017 write->predicate = inst->predicate;
3018 write->ir = inst->ir;
3019 write->annotation = inst->annotation;
3020 inst->insert_after(write);
3021
3022 inst->dst.file = temp.file;
3023 inst->dst.reg = temp.reg;
3024 inst->dst.reg_offset = temp.reg_offset;
3025 inst->dst.reladdr = NULL;
3026 }
3027
3028 /**
3029 * We can't generally support array access in GRF space, because a
3030 * single instruction's destination can only span 2 contiguous
3031 * registers. So, we send all GRF arrays that get variable index
3032 * access to scratch space.
3033 */
3034 void
3035 vec4_visitor::move_grf_array_access_to_scratch()
3036 {
3037 int scratch_loc[this->virtual_grf_count];
3038
3039 for (int i = 0; i < this->virtual_grf_count; i++) {
3040 scratch_loc[i] = -1;
3041 }
3042
3043 /* First, calculate the set of virtual GRFs that need to be punted
3044 * to scratch due to having any array access on them, and where in
3045 * scratch.
3046 */
3047 foreach_list(node, &this->instructions) {
3048 vec4_instruction *inst = (vec4_instruction *)node;
3049
3050 if (inst->dst.file == GRF && inst->dst.reladdr &&
3051 scratch_loc[inst->dst.reg] == -1) {
3052 scratch_loc[inst->dst.reg] = c->last_scratch;
3053 c->last_scratch += this->virtual_grf_sizes[inst->dst.reg];
3054 }
3055
3056 for (int i = 0 ; i < 3; i++) {
3057 src_reg *src = &inst->src[i];
3058
3059 if (src->file == GRF && src->reladdr &&
3060 scratch_loc[src->reg] == -1) {
3061 scratch_loc[src->reg] = c->last_scratch;
3062 c->last_scratch += this->virtual_grf_sizes[src->reg];
3063 }
3064 }
3065 }
3066
3067 /* Now, for anything that will be accessed through scratch, rewrite
3068 * it to load/store. Note that this is a _safe list walk, because
3069 * we may generate a new scratch_write instruction after the one
3070 * we're processing.
3071 */
3072 foreach_list_safe(node, &this->instructions) {
3073 vec4_instruction *inst = (vec4_instruction *)node;
3074
3075 /* Set up the annotation tracking for new generated instructions. */
3076 base_ir = inst->ir;
3077 current_annotation = inst->annotation;
3078
3079 if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
3080 emit_scratch_write(inst, scratch_loc[inst->dst.reg]);
3081 }
3082
3083 for (int i = 0 ; i < 3; i++) {
3084 if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
3085 continue;
3086
3087 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
3088
3089 emit_scratch_read(inst, temp, inst->src[i],
3090 scratch_loc[inst->src[i].reg]);
3091
3092 inst->src[i].file = temp.file;
3093 inst->src[i].reg = temp.reg;
3094 inst->src[i].reg_offset = temp.reg_offset;
3095 inst->src[i].reladdr = NULL;
3096 }
3097 }
3098 }
3099
3100 /**
3101 * Emits an instruction before @inst to load the value named by @orig_src
3102 * from the pull constant buffer (surface) at @base_offset to @temp.
3103 */
3104 void
3105 vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
3106 dst_reg temp, src_reg orig_src,
3107 int base_offset)
3108 {
3109 int reg_offset = base_offset + orig_src.reg_offset;
3110 src_reg index = src_reg((unsigned)SURF_INDEX_VERT_CONST_BUFFER);
3111 src_reg offset = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
3112 vec4_instruction *load;
3113
3114 if (intel->gen >= 7) {
3115 dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
3116 grf_offset.type = offset.type;
3117 emit_before(inst, MOV(grf_offset, offset));
3118
3119 load = new(mem_ctx) vec4_instruction(this,
3120 VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
3121 temp, index, src_reg(grf_offset));
3122 } else {
3123 load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
3124 temp, index, offset);
3125 load->base_mrf = 14;
3126 load->mlen = 1;
3127 }
3128 emit_before(inst, load);
3129 }
3130
3131 /**
3132 * Implements array access of uniforms by inserting a
3133 * PULL_CONSTANT_LOAD instruction.
3134 *
3135 * Unlike temporary GRF array access (where we don't support it due to
3136 * the difficulty of doing relative addressing on instruction
3137 * destinations), we could potentially do array access of uniforms
3138 * that were loaded in GRF space as push constants. In real-world
3139 * usage we've seen, though, the arrays being used are always larger
3140 * than we could load as push constants, so just always move all
3141 * uniform array access out to a pull constant buffer.
3142 */
3143 void
3144 vec4_visitor::move_uniform_array_access_to_pull_constants()
3145 {
3146 int pull_constant_loc[this->uniforms];
3147
3148 for (int i = 0; i < this->uniforms; i++) {
3149 pull_constant_loc[i] = -1;
3150 }
3151
3152 /* Walk through and find array access of uniforms. Put a copy of that
3153 * uniform in the pull constant buffer.
3154 *
3155 * Note that we don't move constant-indexed accesses to arrays. No
3156 * testing has been done of the performance impact of this choice.
3157 */
3158 foreach_list_safe(node, &this->instructions) {
3159 vec4_instruction *inst = (vec4_instruction *)node;
3160
3161 for (int i = 0 ; i < 3; i++) {
3162 if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
3163 continue;
3164
3165 int uniform = inst->src[i].reg;
3166
3167 /* If this array isn't already present in the pull constant buffer,
3168 * add it.
3169 */
3170 if (pull_constant_loc[uniform] == -1) {
3171 const float **values = &prog_data->param[uniform * 4];
3172
3173 pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
3174
3175 for (int j = 0; j < uniform_size[uniform] * 4; j++) {
3176 prog_data->pull_param[prog_data->nr_pull_params++]
3177 = values[j];
3178 }
3179 }
3180
3181 /* Set up the annotation tracking for new generated instructions. */
3182 base_ir = inst->ir;
3183 current_annotation = inst->annotation;
3184
3185 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
3186
3187 emit_pull_constant_load(inst, temp, inst->src[i],
3188 pull_constant_loc[uniform]);
3189
3190 inst->src[i].file = temp.file;
3191 inst->src[i].reg = temp.reg;
3192 inst->src[i].reg_offset = temp.reg_offset;
3193 inst->src[i].reladdr = NULL;
3194 }
3195 }
3196
3197 /* Now there are no accesses of the UNIFORM file with a reladdr, so
3198 * no need to track them as larger-than-vec4 objects. This will be
3199 * relied on in cutting out unused uniform vectors from push
3200 * constants.
3201 */
3202 split_uniform_registers();
3203 }
3204
3205 void
3206 vec4_visitor::resolve_ud_negate(src_reg *reg)
3207 {
3208 if (reg->type != BRW_REGISTER_TYPE_UD ||
3209 !reg->negate)
3210 return;
3211
3212 src_reg temp = src_reg(this, glsl_type::uvec4_type);
3213 emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
3214 *reg = temp;
3215 }
3216
3217 vec4_visitor::vec4_visitor(struct brw_context *brw,
3218 struct brw_vec4_compile *c,
3219 struct gl_program *prog,
3220 const struct brw_vec4_prog_key *key,
3221 struct brw_vec4_prog_data *prog_data,
3222 struct gl_shader_program *shader_prog,
3223 struct brw_shader *shader,
3224 void *mem_ctx,
3225 bool debug_flag)
3226 : debug_flag(debug_flag)
3227 {
3228 this->brw = brw;
3229 this->intel = &brw->intel;
3230 this->ctx = &intel->ctx;
3231 this->shader_prog = shader_prog;
3232 this->shader = shader;
3233
3234 this->mem_ctx = mem_ctx;
3235 this->failed = false;
3236
3237 this->base_ir = NULL;
3238 this->current_annotation = NULL;
3239 memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
3240
3241 this->c = c;
3242 this->prog = prog;
3243 this->key = key;
3244 this->prog_data = prog_data;
3245
3246 this->variable_ht = hash_table_ctor(0,
3247 hash_table_pointer_hash,
3248 hash_table_pointer_compare);
3249
3250 this->virtual_grf_start = NULL;
3251 this->virtual_grf_end = NULL;
3252 this->virtual_grf_sizes = NULL;
3253 this->virtual_grf_count = 0;
3254 this->virtual_grf_reg_map = NULL;
3255 this->virtual_grf_reg_count = 0;
3256 this->virtual_grf_array_size = 0;
3257 this->live_intervals_valid = false;
3258
3259 this->max_grf = intel->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
3260
3261 this->uniforms = 0;
3262 }
3263
3264 vec4_visitor::~vec4_visitor()
3265 {
3266 hash_table_dtor(this->variable_ht);
3267 }
3268
3269
3270 vec4_vs_visitor::vec4_vs_visitor(struct brw_context *brw,
3271 struct brw_vs_compile *vs_compile,
3272 struct brw_vs_prog_data *vs_prog_data,
3273 struct gl_shader_program *prog,
3274 struct brw_shader *shader,
3275 void *mem_ctx)
3276 : vec4_visitor(brw, &vs_compile->base, &vs_compile->vp->program.Base,
3277 &vs_compile->key.base, &vs_prog_data->base, prog, shader,
3278 mem_ctx, INTEL_DEBUG & DEBUG_VS),
3279 vs_compile(vs_compile),
3280 vs_prog_data(vs_prog_data)
3281 {
3282 }
3283
3284
3285 void
3286 vec4_visitor::fail(const char *format, ...)
3287 {
3288 va_list va;
3289 char *msg;
3290
3291 if (failed)
3292 return;
3293
3294 failed = true;
3295
3296 va_start(va, format);
3297 msg = ralloc_vasprintf(mem_ctx, format, va);
3298 va_end(va);
3299 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
3300
3301 this->fail_msg = msg;
3302
3303 if (debug_flag) {
3304 fprintf(stderr, "%s", msg);
3305 }
3306 }
3307
3308 } /* namespace brw */